summaryrefslogtreecommitdiff
path: root/ext/mcpat/cacti
diff options
context:
space:
mode:
authorYasuko Eckert <yasuko.eckert@amd.com>2014-06-03 13:32:59 -0700
committerYasuko Eckert <yasuko.eckert@amd.com>2014-06-03 13:32:59 -0700
commit0deef376d96bfe0a3a2496714ac22471d9ee818a (patch)
tree43d383a5bc4315863240dd61f7a4077ce2ac86e7 /ext/mcpat/cacti
parent1104199115a6ff5ed04f92ba6391f18728765014 (diff)
downloadgem5-0deef376d96bfe0a3a2496714ac22471d9ee818a.tar.xz
ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint.
Diffstat (limited to 'ext/mcpat/cacti')
-rw-r--r--ext/mcpat/cacti/Ucache.cc1404
-rw-r--r--ext/mcpat/cacti/Ucache.h52
-rw-r--r--ext/mcpat/cacti/arbiter.cc119
-rw-r--r--[-rwxr-xr-x]ext/mcpat/cacti/bank.cc274
-rwxr-xr-xext/mcpat/cacti/bank.h6
-rw-r--r--ext/mcpat/cacti/basic_circuit.cc1001
-rw-r--r--ext/mcpat/cacti/basic_circuit.h51
-rw-r--r--ext/mcpat/cacti/cacti_interface.cc183
-rw-r--r--ext/mcpat/cacti/cacti_interface.h641
-rw-r--r--ext/mcpat/cacti/component.cc253
-rw-r--r--ext/mcpat/cacti/component.h44
-rw-r--r--ext/mcpat/cacti/const.h28
-rw-r--r--ext/mcpat/cacti/crossbar.cc220
-rw-r--r--ext/mcpat/cacti/crossbar.h40
-rw-r--r--ext/mcpat/cacti/decoder.cc2241
-rw-r--r--ext/mcpat/cacti/decoder.h260
-rw-r--r--ext/mcpat/cacti/htree2.cc1077
-rw-r--r--ext/mcpat/cacti/htree2.h27
-rw-r--r--ext/mcpat/cacti/io.cc3274
-rw-r--r--[-rwxr-xr-x]ext/mcpat/cacti/mat.cc3282
-rwxr-xr-xext/mcpat/cacti/mat.h14
-rw-r--r--ext/mcpat/cacti/nuca.cc1007
-rw-r--r--ext/mcpat/cacti/nuca.h16
-rw-r--r--ext/mcpat/cacti/parameter.cc1162
-rw-r--r--ext/mcpat/cacti/parameter.h450
-rw-r--r--ext/mcpat/cacti/router.cc386
-rw-r--r--ext/mcpat/cacti/router.h14
-rwxr-xr-xext/mcpat/cacti/subarray.cc257
-rwxr-xr-xext/mcpat/cacti/subarray.h10
-rw-r--r--ext/mcpat/cacti/technology.cc5177
-rwxr-xr-xext/mcpat/cacti/uca.cc723
-rwxr-xr-xext/mcpat/cacti/uca.h13
-rw-r--r--ext/mcpat/cacti/wire.cc1368
-rw-r--r--ext/mcpat/cacti/wire.h29
34 files changed, 12191 insertions, 12912 deletions
diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc
index f3e1227df..ada9c5aa1 100644
--- a/ext/mcpat/cacti/Ucache.cc
+++ b/ext/mcpat/cacti/Ucache.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -54,176 +55,163 @@ using namespace std;
const uint32_t nthreads = NTHREADS;
-void min_values_t::update_min_values(const min_values_t * val)
-{
- min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
- min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
- min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
- min_area = (min_area > val->min_area) ? val->min_area : min_area;
- min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
+void min_values_t::update_min_values(const min_values_t * val) {
+ min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
+ min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
+ min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
+ min_area = (min_area > val->min_area) ? val->min_area : min_area;
+ min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
}
-void min_values_t::update_min_values(const uca_org_t & res)
-{
- min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
- min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
- min_area = (min_area > res.area) ? res.area : min_area;
- min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
+void min_values_t::update_min_values(const uca_org_t & res) {
+ min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
+ min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res.area) ? res.area : min_area;
+ min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
}
-void min_values_t::update_min_values(const nuca_org_t * res)
-{
- min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
- min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
- min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
- min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
+void min_values_t::update_min_values(const nuca_org_t * res) {
+ min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
+ min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
+ min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
}
-void min_values_t::update_min_values(const mem_array * res)
-{
- min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
- min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
- min_area = (min_area > res->area) ? res->area : min_area;
- min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
+void min_values_t::update_min_values(const mem_array * res) {
+ min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
+ min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->area) ? res->area : min_area;
+ min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
}
-void * calc_time_mt_wrapper(void * void_obj)
-{
- calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
- uint32_t tid = calc_obj->tid;
- list<mem_array *> & data_arr = calc_obj->data_arr;
- list<mem_array *> & tag_arr = calc_obj->tag_arr;
- bool is_tag = calc_obj->is_tag;
- bool pure_ram = calc_obj->pure_ram;
- bool pure_cam = calc_obj->pure_cam;
- bool is_main_mem = calc_obj->is_main_mem;
- double Nspd_min = calc_obj->Nspd_min;
- min_values_t * data_res = calc_obj->data_res;
- min_values_t * tag_res = calc_obj->tag_res;
-
- data_arr.clear();
- data_arr.push_back(new mem_array);
- tag_arr.clear();
- tag_arr.push_back(new mem_array);
-
- uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
- uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
-
-
- bool is_valid_partition;
- int wt_min, wt_max;
-
- if (g_ip->force_wiretype) {
- if (g_ip->wt == 0) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
- else {
- wt_min = Global;
- wt_max = Low_swing-1;
- }
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
+void * calc_time_mt_wrapper(void * void_obj) {
+ calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
+ uint32_t tid = calc_obj->tid;
+ list<mem_array *> & data_arr = calc_obj->data_arr;
+ list<mem_array *> & tag_arr = calc_obj->tag_arr;
+ bool is_tag = calc_obj->is_tag;
+ bool pure_ram = calc_obj->pure_ram;
+ bool pure_cam = calc_obj->pure_cam;
+ bool is_main_mem = calc_obj->is_main_mem;
+ double Nspd_min = calc_obj->Nspd_min;
+ min_values_t * data_res = calc_obj->data_res;
+ min_values_t * tag_res = calc_obj->tag_res;
- for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
- {
- for (int wr = wt_min; wr <= wt_max; wr++)
- {
- for (uint32_t iter = tid; iter < niter; iter += nthreads)
- {
- // reconstruct Ndwl, Ndbl, Ndcm
- unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
- unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
- unsigned int Ndcm = 1 << (iter % Ndcm_niter);
- for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
- {
- for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
- {
- //for debuging
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = g_ip->wt;
- Ndwl = g_ip->ndwl;
- Ndbl = g_ip->ndbl;
- Ndcm = g_ip->ndcm;
- if(g_ip->nspd != 0) {
- Nspd = g_ip->nspd;
- }
- if(g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = g_ip->ndsam1;
- Ndsam_lev_2 = g_ip->ndsam2;
- }
- }
-
- if (is_tag == true)
- {
- is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- tag_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
- // If it's a fully-associative cache, the data array partition parameters are identical to that of
- // the tag array, so compute data array partition properties also here.
- if (is_tag == false || g_ip->fully_assoc)
- {
- is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- data_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
-
- if (is_valid_partition)
- {
- if (is_tag == true)
- {
- tag_arr.back()->wt = (enum Wire_type) wr;
- tag_res->update_min_values(tag_arr.back());
- tag_arr.push_back(new mem_array);
- }
- if (is_tag == false || g_ip->fully_assoc)
- {
- data_arr.back()->wt = (enum Wire_type) wr;
- data_res->update_min_values(data_arr.back());
- data_arr.push_back(new mem_array);
- }
- }
+ data_arr.clear();
+ data_arr.push_back(new mem_array);
+ tag_arr.clear();
+ tag_arr.push_back(new mem_array);
+
+ uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
+ uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
+
+
+ bool is_valid_partition;
+ int wt_min, wt_max;
+
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == 0) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing - 1;
+ }
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = wt_max;
- iter = niter;
- if(g_ip->nspd != 0) {
- Nspd = MAXDATASPD;
- }
- if (g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = MAX_COL_MUX+1;
- Ndsam_lev_2 = MAX_COL_MUX+1;
+ for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) {
+ for (int wr = wt_min; wr <= wt_max; wr++) {
+ for (uint32_t iter = tid; iter < niter; iter += nthreads) {
+ // reconstruct Ndwl, Ndbl, Ndcm
+ unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
+ unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter);
+ unsigned int Ndcm = 1 << (iter % Ndcm_niter);
+ for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX;
+ Ndsam_lev_1 *= 2) {
+ for (unsigned int Ndsam_lev_2 = 1;
+ Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) {
+ //for debuging
+ if (g_ip->force_cache_config && is_tag == false) {
+ wr = g_ip->wt;
+ Ndwl = g_ip->ndwl;
+ Ndbl = g_ip->ndbl;
+ Ndcm = g_ip->ndcm;
+ if (g_ip->nspd != 0) {
+ Nspd = g_ip->nspd;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = g_ip->ndsam1;
+ Ndsam_lev_2 = g_ip->ndsam2;
+ }
+ }
+
+ if (is_tag == true) {
+ is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ tag_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+ // If it's a fully-associative cache, the data array partition parameters are identical to that of
+ // the tag array, so compute data array partition properties also here.
+ if (is_tag == false || g_ip->fully_assoc) {
+ is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ data_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+
+ if (is_valid_partition) {
+ if (is_tag == true) {
+ tag_arr.back()->wt = (enum Wire_type) wr;
+ tag_res->update_min_values(tag_arr.back());
+ tag_arr.push_back(new mem_array);
+ }
+ if (is_tag == false || g_ip->fully_assoc) {
+ data_arr.back()->wt = (enum Wire_type) wr;
+ data_res->update_min_values(data_arr.back());
+ data_arr.push_back(new mem_array);
+ }
+ }
+
+ if (g_ip->force_cache_config && is_tag == false) {
+ wr = wt_max;
+ iter = niter;
+ if (g_ip->nspd != 0) {
+ Nspd = MAXDATASPD;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = MAX_COL_MUX + 1;
+ Ndsam_lev_2 = MAX_COL_MUX + 1;
+ }
+ }
+ }
}
}
- }
}
- }
}
- }
- delete data_arr.back();
- delete tag_arr.back();
- data_arr.pop_back();
- tag_arr.pop_back();
+ delete data_arr.back();
+ delete tag_arr.back();
+ data_arr.pop_back();
+ tag_arr.pop_back();
- pthread_exit(NULL);
+#ifndef DEBUG
+ pthread_exit(NULL);
+#else
+ return NULL;
+#endif
}
@@ -242,423 +230,448 @@ bool calculate_time(
int flag_results_populate,
results_mem_array *ptr_results,
uca_org_t *ptr_fin_res,
- bool is_main_mem)
-{
- DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
+ bool is_main_mem) {
+ DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
- if (dyn_p.is_valid == false)
- {
- return false;
- }
+ if (dyn_p.is_valid == false) {
+ return false;
+ }
- UCA * uca = new UCA(dyn_p);
+ UCA * uca = new UCA(dyn_p);
- if (flag_results_populate)
- { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
- }
- else
- {
- int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
- int num_mats = uca->bank.dp.num_mats;
- bool is_fa = uca->bank.dp.fully_assoc;
- bool pure_cam = uca->bank.dp.pure_cam;
+ //For the final solution, populate the ptr_results data structure
+ //-- TODO: copy only necessary variables
+ if (flag_results_populate) {
+ } else {
+ int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
+ int num_mats = uca->bank.dp.num_mats;
+ bool is_fa = uca->bank.dp.fully_assoc;
+ bool pure_cam = uca->bank.dp.pure_cam;
ptr_array->Ndwl = Ndwl;
- ptr_array->Ndbl = Ndbl;
- ptr_array->Nspd = Nspd;
- ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
- ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
- ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
- ptr_array->access_time = uca->access_time;
- ptr_array->cycle_time = uca->cycle_time;
- ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
- ptr_array->area_ram_cells = uca->area_all_dataramcells;
- ptr_array->area = uca->area.get_area();
- ptr_array->height = uca->area.h;
- ptr_array->width = uca->area.w;
- ptr_array->mat_height = uca->bank.mat.area.h;
- ptr_array->mat_length = uca->bank.mat.area.w;
- ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
- ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
- ptr_array->power = uca->power;
- ptr_array->delay_senseamp_mux_decoder =
- MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
- uca->delay_array_to_sa_mux_lev_2_decoder);
- ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
- ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
-
- ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
- ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
- ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
- ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
- ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
- ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
- ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
- ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
- ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
- ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
-
- ptr_array->all_banks_height = uca->area.h;
- ptr_array->all_banks_width = uca->area.w;
- ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
-
- ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
- ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
- ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
-// cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl;
- ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
-// cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl;
- ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
- ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
- ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
- ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
- ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
- ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
- ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
- ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
- ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bitlines = uca->bank.mat.power_bitline;
- ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_sense_amps = uca->bank.mat.power_sa;
- ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
- ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
- ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_comparators = uca->bank.mat.power_comparator;
- ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
-
-// cout << " num of mats: " << dyn_p.num_mats << endl;
- if (is_fa || pure_cam)
- {
- ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
-// cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl;
- ptr_array->power_htree_out_search = uca->bank.htree_out_search->power;
-// cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl;
- ptr_array->power_searchline = uca->bank.mat.power_searchline;
-// cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl;
- ptr_array->power_searchline.searchOp.dynamic *= num_mats;
- ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
- ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchlines = uca->bank.mat.power_matchline;
- ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
- ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
-// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl;
- }
-
- ptr_array->activate_energy = uca->activate_energy;
- ptr_array->read_energy = uca->read_energy;
- ptr_array->write_energy = uca->write_energy;
- ptr_array->precharge_energy = uca->precharge_energy;
- ptr_array->refresh_power = uca->refresh_power;
- ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
- ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
- ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
-
- ptr_array->precharge_delay = uca->precharge_delay;
-
-
-// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl;
-//
-// if (!(is_fa || pure_cam))
-// {
-// cout << " num of cols: " << dyn_p.num_c_subarray << endl;
-// }
-// else if (is_fa)
-// {
-// cout << " num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl;
-// } else
-// cout << " num of cols: " << dyn_p.tag_num_c_subarray<< endl;
-// cout << uca->bank.mat.subarray.get_total_cell_area()<<endl;
- }
+ ptr_array->Ndbl = Ndbl;
+ ptr_array->Nspd = Nspd;
+ ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
+ ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
+ ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
+ ptr_array->access_time = uca->access_time;
+ ptr_array->cycle_time = uca->cycle_time;
+ ptr_array->multisubbank_interleave_cycle_time =
+ uca->multisubbank_interleave_cycle_time;
+ ptr_array->area_ram_cells = uca->area_all_dataramcells;
+ ptr_array->area = uca->area.get_area();
+ ptr_array->height = uca->area.h;
+ ptr_array->width = uca->area.w;
+ ptr_array->mat_height = uca->bank.mat.area.h;
+ ptr_array->mat_length = uca->bank.mat.area.w;
+ ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
+ ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
+ ptr_array->power = uca->power;
+ ptr_array->delay_senseamp_mux_decoder =
+ MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
+ uca->delay_array_to_sa_mux_lev_2_decoder);
+ ptr_array->delay_before_subarray_output_driver =
+ uca->delay_before_subarray_output_driver;
+ ptr_array->delay_from_subarray_output_driver_to_output =
+ uca->delay_from_subarray_out_drv_to_out;
+
+ ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
+ ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
+ ptr_array->delay_row_predecode_driver_and_block =
+ uca->bank.mat.r_predec->delay;
+ ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
+ ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
+ ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
+ ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
+ ptr_array->delay_subarray_output_driver =
+ uca->bank.mat.delay_subarray_out_drv_htree;
+ ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
+ ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
+
+ ptr_array->all_banks_height = uca->area.h;
+ ptr_array->all_banks_width = uca->area.w;
+ ptr_array->area_efficiency = uca->area_all_dataramcells * 100 /
+ (uca->area.get_area());
+
+ ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
+ ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
+ ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
+ ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
+
+ ptr_array->power_row_predecoder_drivers =
+ uca->bank.mat.r_predec->driver_power;
+ ptr_array->power_row_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_row_predecoder_blocks =
+ uca->bank.mat.r_predec->block_power;
+ ptr_array->power_row_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
+ ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_drivers =
+ uca->bank.mat.b_mux_predec->driver_power;
+ ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_blocks =
+ uca->bank.mat.b_mux_predec->block_power;
+ ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
+ ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers =
+ uca->bank.mat.sa_mux_lev_1_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks =
+ uca->bank.mat.sa_mux_lev_1_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_decoders =
+ uca->bank.mat.power_sa_mux_lev_1_decoders;
+ ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers =
+ uca->bank.mat.sa_mux_lev_2_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks =
+ uca->bank.mat.sa_mux_lev_2_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_decoders =
+ uca->bank.mat.power_sa_mux_lev_2_decoders;
+ ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bitlines = uca->bank.mat.power_bitline;
+ ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_sense_amps = uca->bank.mat.power_sa;
+ ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_prechg_eq_drivers =
+ uca->bank.mat.power_bl_precharge_eq_drv;
+ ptr_array->power_prechg_eq_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_output_drivers_at_subarray =
+ uca->bank.mat.power_subarray_out_drv;
+ ptr_array->power_output_drivers_at_subarray.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_comparators = uca->bank.mat.power_comparator;
+ ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ if (is_fa || pure_cam) {
+ ptr_array->power_htree_in_search =
+ uca->bank.htree_in_search->power;
+ ptr_array->power_htree_out_search =
+ uca->bank.htree_out_search->power;
+ ptr_array->power_searchline = uca->bank.mat.power_searchline;
+ ptr_array->power_searchline.searchOp.dynamic *= num_mats;
+ ptr_array->power_searchline_precharge =
+ uca->bank.mat.power_searchline_precharge;
+ ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchlines = uca->bank.mat.power_matchline;
+ ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_precharge =
+ uca->bank.mat.power_matchline_precharge;
+ ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_to_wordline_drv =
+ uca->bank.mat.power_ml_to_ram_wl_drv;
+ }
+
+ ptr_array->activate_energy = uca->activate_energy;
+ ptr_array->read_energy = uca->read_energy;
+ ptr_array->write_energy = uca->write_energy;
+ ptr_array->precharge_energy = uca->precharge_energy;
+ ptr_array->refresh_power = uca->refresh_power;
+ ptr_array->leak_power_subbank_closed_page =
+ uca->leak_power_subbank_closed_page;
+ ptr_array->leak_power_subbank_open_page =
+ uca->leak_power_subbank_open_page;
+ ptr_array->leak_power_request_and_reply_networks =
+ uca->leak_power_request_and_reply_networks;
+
+ ptr_array->precharge_delay = uca->precharge_delay;
+ }
- delete uca;
- return true;
+ delete uca;
+ return true;
}
-bool check_uca_org(uca_org_t & u, min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
+bool check_uca_org(uca_org_t & u, min_values_t *minval) {
+ if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
+ g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage) * 100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area) / minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
}
-bool check_mem_org(mem_array & u, const min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
+bool check_mem_org(mem_array & u, const min_values_t *minval) {
+ if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
+ g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage) * 100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area) / minval->min_area) * 100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
}
-void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- float d, a, dp, lp, c;
-
- dp = g_ip->dynamic_power_wt;
- lp = g_ip->leakage_power_wt;
- a = g_ip->area_wt;
- d = g_ip->delay_wt;
- c = g_ip->cycle_time_wt;
+void find_optimal_uca(uca_org_t *res, min_values_t * minval,
+ list<uca_org_t> & ulist) {
+ double cost = 0;
+ double min_cost = BIGNUM;
+ float d, a, dp, lp, c;
- if (ulist.empty() == true)
- {
- cout << "ERROR: no valid cache organizations found" << endl;
- exit(0);
- }
+ dp = g_ip->dynamic_power_wt;
+ lp = g_ip->leakage_power_wt;
+ a = g_ip->area_wt;
+ d = g_ip->delay_wt;
+ c = g_ip->cycle_time_wt;
- for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
- {
- if (g_ip->ed == 1)
- {
- cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
- }
- else if (g_ip->ed == 2)
- {
- cost = ((niter)->access_time/minval->min_delay)*
- ((niter)->access_time/minval->min_delay)*
- ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
+ if (ulist.empty() == true) {
+ cout << "ERROR: no valid cache organizations found" << endl;
+ exit(0);
}
- else
- {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- bool v = check_uca_org(*niter, minval);
- //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v)
- {
- cost = (d * ((niter)->access_time/minval->min_delay) +
- c * ((niter)->cycle_time/minval->min_cyc) +
- dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
- lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
- a * ((niter)->area/minval->min_area));
- //fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- *res = (*(niter));
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
+
+ for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end();
+ niter++) {
+ if (g_ip->ed == 1) {
+ cost = ((niter)->access_time / minval->min_delay) *
+ ((niter)->power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ } else if (g_ip->ed == 2) {
+ cost = ((niter)->access_time / minval->min_delay) *
+ ((niter)->access_time / minval->min_delay) *
+ ((niter)->power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ } else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ bool v = check_uca_org(*niter, minval);
+
+ if (v) {
+ cost = (d * ((niter)->access_time / minval->min_delay) +
+ c * ((niter)->cycle_time / minval->min_cyc) +
+ dp * ((niter)->power.readOp.dynamic / minval->min_dyn) +
+ lp *
+ ((niter)->power.readOp.leakage / minval->min_leakage) +
+ a * ((niter)->area / minval->min_area));
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ niter = ulist.erase(niter);
+ if (niter != ulist.begin())
+ niter--;
+ }
+ } else {
+ niter = ulist.erase(niter);
+ if (niter != ulist.begin())
+ niter--;
+ }
}
- }
- else {
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
- }
}
- }
- if (min_cost == BIGNUM)
- {
- cout << "ERROR: no cache organizations met optimization criteria" << endl;
- exit(0);
- }
+ if (min_cost == BIGNUM) {
+ cout << "ERROR: no cache organizations met optimization criteria"
+ << endl;
+ exit(0);
+ }
}
-void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
-{
- double cost = BIGNUM;
- double cur_cost;
- double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
- mem_array * res = NULL;
+void filter_tag_arr(const min_values_t * min, list<mem_array *> & list) {
+ double cost = BIGNUM;
+ double cur_cost;
+ double wt_delay = g_ip->delay_wt;
+ double wt_dyn = g_ip->dynamic_power_wt;
+ double wt_leakage = g_ip->leakage_power_wt;
+ double wt_cyc = g_ip->cycle_time_wt;
+ double wt_area = g_ip->area_wt;
+ mem_array * res = NULL;
- if (list.empty() == true)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(1);
- }
+ if (list.empty() == true) {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(1);
+ }
- while (list.empty() != true)
- {
- bool v = check_mem_org(*list.back(), min);
- if (v)
- {
- cur_cost = wt_delay * (list.back()->access_time/min->min_delay) +
- wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
- wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
- wt_area * (list.back()->area/min->min_area) +
- wt_cyc * (list.back()->cycle_time/min->min_cyc);
- }
- else
- {
- cur_cost = BIGNUM;
- }
- if (cur_cost < cost)
- {
- if (res != NULL)
- {
- delete res;
- }
- cost = cur_cost;
- res = list.back();
+ while (list.empty() != true) {
+ bool v = check_mem_org(*list.back(), min);
+ if (v) {
+ cur_cost = wt_delay * (list.back()->access_time / min->min_delay) +
+ wt_dyn * (list.back()->power.readOp.dynamic /
+ min->min_dyn) +
+ wt_leakage * (list.back()->power.readOp.leakage /
+ min->min_leakage) +
+ wt_area * (list.back()->area / min->min_area) +
+ wt_cyc * (list.back()->cycle_time / min->min_cyc);
+ } else {
+ cur_cost = BIGNUM;
+ }
+ if (cur_cost < cost) {
+ if (res != NULL) {
+ delete res;
+ }
+ cost = cur_cost;
+ res = list.back();
+ } else {
+ delete list.back();
+ }
+ list.pop_back();
}
- else
- {
- delete list.back();
+ if (!res) {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(0);
}
- list.pop_back();
- }
- if(!res)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(0);
- }
- list.push_back(res);
+ list.push_back(res);
}
-void filter_data_arr(list<mem_array *> & curr_list)
-{
- if (curr_list.empty() == true)
- {
- cout << "ERROR: no valid data array organizations found" << endl;
- exit(1);
- }
+void filter_data_arr(list<mem_array *> & curr_list) {
+ if (curr_list.empty() == true) {
+ cout << "ERROR: no valid data array organizations found" << endl;
+ exit(1);
+ }
- list<mem_array *>::iterator iter;
+ list<mem_array *>::iterator iter;
- for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
- {
- mem_array * m = *iter;
+ for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) {
+ mem_array * m = *iter;
- if (m == NULL) exit(1);
+ if (m == NULL) exit(1);
- if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
- ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
- {
- delete m;
- iter = curr_list.erase(iter);
- iter --;
+ if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay >
+ 0.5) &&
+ ((m->power.readOp.dynamic - m->arr_min->min_dyn) /
+ m->arr_min->min_dyn > 0.5)) {
+ delete m;
+ iter = curr_list.erase(iter);
+ iter --;
+ }
}
- }
}
@@ -675,210 +688,199 @@ void filter_data_arr(list<mem_array *> & curr_list)
* above results
* 4. Cache model with least cost is picked from sol_list
*/
-void solve(uca_org_t *fin_res)
-{
- bool is_dram = false;
- int pure_ram = g_ip->pure_ram;
- bool pure_cam = g_ip->pure_cam;
-
- init_tech_params(g_ip->F_sz_um, false);
-
-
- list<mem_array *> tag_arr (0);
- list<mem_array *> data_arr(0);
- list<mem_array *>::iterator miter;
- list<uca_org_t> sol_list(1, uca_org_t());
-
- fin_res->tag_array.access_time = 0;
- fin_res->tag_array.Ndwl = 0;
- fin_res->tag_array.Ndbl = 0;
- fin_res->tag_array.Nspd = 0;
- fin_res->tag_array.deg_bl_muxing = 0;
- fin_res->tag_array.Ndsam_lev_1 = 0;
- fin_res->tag_array.Ndsam_lev_2 = 0;
-
-
- // distribute calculate_time() execution to multiple threads
- calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
- pthread_t threads[nthreads];
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].tid = t;
- calc_array[t].pure_ram = pure_ram;
- calc_array[t].pure_cam = pure_cam;
- calc_array[t].data_res = new min_values_t();
- calc_array[t].tag_res = new min_values_t();
- }
-
- bool is_tag;
- uint32_t ram_cell_tech_type;
-
- // If it's a cache, first calculate the area, delay and power for all tag array partitions.
- if (!(pure_ram||pure_cam||g_ip->fully_assoc))
- { //cache
- is_tag = true;
- ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
- init_tech_params(g_ip->F_sz_um, is_tag);
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = false;
- calc_array[t].Nspd_min = 0.125;
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+void solve(uca_org_t *fin_res) {
+ bool is_dram = false;
+ int pure_ram = g_ip->pure_ram;
+ bool pure_cam = g_ip->pure_cam;
+
+ init_tech_params(g_ip->F_sz_um, false);
+
+
+ list<mem_array *> tag_arr (0);
+ list<mem_array *> data_arr(0);
+ list<mem_array *>::iterator miter;
+ list<uca_org_t> sol_list(1, uca_org_t());
+
+ fin_res->tag_array.access_time = 0;
+ fin_res->tag_array.Ndwl = 0;
+ fin_res->tag_array.Ndbl = 0;
+ fin_res->tag_array.Nspd = 0;
+ fin_res->tag_array.deg_bl_muxing = 0;
+ fin_res->tag_array.Ndsam_lev_1 = 0;
+ fin_res->tag_array.Ndsam_lev_2 = 0;
+
+
+ // distribute calculate_time() execution to multiple threads
+ calc_time_mt_wrapper_struct * calc_array =
+ new calc_time_mt_wrapper_struct[nthreads];
+ pthread_t threads[nthreads];
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].tid = t;
+ calc_array[t].pure_ram = pure_ram;
+ calc_array[t].pure_cam = pure_cam;
+ calc_array[t].data_res = new min_values_t();
+ calc_array[t].tag_res = new min_values_t();
}
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
- }
+ bool is_tag;
+ uint32_t ram_cell_tech_type;
+
+ // If it's a cache, first calculate the area, delay and power for all tag array partitions.
+ if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache
+ is_tag = true;
+ ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) ||
+ (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = false;
+ calc_array[t].Nspd_min = 0.125;
+#ifndef DEBUG
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
+ (void *)(&(calc_array[t])));
+#else
+ calc_time_mt_wrapper((void *)(&(calc_array[t])));
+#endif
+ }
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- calc_array[t].tag_arr.sort(mem_array::lt);
- tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+#ifndef DEBUG
+ for (uint32_t t = 0; t < nthreads; t++) {
+ pthread_join(threads[t], NULL);
+ }
+#endif
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ calc_array[t].tag_arr.sort(mem_array::lt);
+ tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+ }
}
- }
- // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
-// if (!g_ip->fully_assoc)
-// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
+ // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
+ // in the new cacti, cam, fully_associative cache are processed as single array in the data portion
is_tag = false;
ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
init_tech_params(g_ip->F_sz_um, is_tag);
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = g_ip->is_main_mem;
- if (!(pure_cam||g_ip->fully_assoc))
- {
- calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
- }
- else
- {
- calc_array[t].Nspd_min = 1;
- }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = g_ip->is_main_mem;
+ if (!(pure_cam || g_ip->fully_assoc)) {
+ calc_array[t].Nspd_min = (double)(g_ip->out_w) /
+ (double)(g_ip->block_sz * 8);
+ } else {
+ calc_array[t].Nspd_min = 1;
+ }
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+#ifndef DEBUG
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
+ (void *)(&(calc_array[t])));
+#else
+ calc_time_mt_wrapper((void *)(&(calc_array[t])));
+#endif
}
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
+#ifndef DEBUG
+ for (uint32_t t = 0; t < nthreads; t++) {
+ pthread_join(threads[t], NULL);
}
+#endif
data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- }
-// }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- min_values_t * d_min = new min_values_t();
- min_values_t * t_min = new min_values_t();
- min_values_t * cache_min = new min_values_t();
+ }
- for (uint32_t t = 0; t < nthreads; t++)
- {
- d_min->update_min_values(calc_array[t].data_res);
- t_min->update_min_values(calc_array[t].tag_res);
- }
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- (*miter)->arr_min = d_min;
- }
+ min_values_t * d_min = new min_values_t();
+ min_values_t * t_min = new min_values_t();
+ min_values_t * cache_min = new min_values_t();
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
- filter_data_arr(data_arr);
- if(!(pure_ram||pure_cam||g_ip->fully_assoc))
- {
- filter_tag_arr(t_min, tag_arr);
- }
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
+ for (uint32_t t = 0; t < nthreads; t++) {
+ d_min->update_min_values(calc_array[t].data_res);
+ t_min->update_min_values(calc_array[t].tag_res);
+ }
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ (*miter)->arr_min = d_min;
+ }
- if (pure_ram||pure_cam||g_ip->fully_assoc)
- {
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = NULL;
- curr_org.data_array2 = (*miter);
+ filter_data_arr(data_arr);
+ if (!(pure_ram || pure_cam || g_ip->fully_assoc)) {
+ filter_tag_arr(t_min, tag_arr);
+ }
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
+ if (pure_ram || pure_cam || g_ip->fully_assoc) {
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = NULL;
+ curr_org.data_array2 = (*miter);
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
- sol_list.push_back(uca_org_t());
- }
- }
- else
- {
- while (tag_arr.empty() != true)
- {
- mem_array * arr_temp = (tag_arr.back());
- //delete tag_arr.back();
- tag_arr.pop_back();
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = arr_temp;
- curr_org.data_array2 = (*miter);
+ sol_list.push_back(uca_org_t());
+ }
+ } else {
+ while (tag_arr.empty() != true) {
+ mem_array * arr_temp = (tag_arr.back());
+ tag_arr.pop_back();
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = arr_temp;
+ curr_org.data_array2 = (*miter);
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
- sol_list.push_back(uca_org_t());
- }
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
}
- }
- sol_list.pop_back();
+ sol_list.pop_back();
- find_optimal_uca(fin_res, cache_min, sol_list);
+ find_optimal_uca(fin_res, cache_min, sol_list);
- sol_list.clear();
+ sol_list.clear();
- for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
- {
- if (*miter != fin_res->data_array2)
- {
- delete *miter;
+ for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) {
+ if (*miter != fin_res->data_array2) {
+ delete *miter;
+ }
}
- }
- data_arr.clear();
+ data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- delete calc_array[t].data_res;
- delete calc_array[t].tag_res;
- }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ delete calc_array[t].data_res;
+ delete calc_array[t].tag_res;
+ }
- delete [] calc_array;
- delete cache_min;
- delete d_min;
- delete t_min;
+ delete [] calc_array;
+ delete cache_min;
+ delete d_min;
+ delete t_min;
}
void update(uca_org_t *fin_res)
@@ -886,7 +888,14 @@ void update(uca_org_t *fin_res)
if(fin_res->tag_array2)
{
init_tech_params(g_ip->F_sz_um,true);
- DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam,
+ fin_res->tag_array2->Nspd,
+ fin_res->tag_array2->Ndwl,
+ fin_res->tag_array2->Ndbl,
+ fin_res->tag_array2->Ndcm,
+ fin_res->tag_array2->Ndsam_lev_1,
+ fin_res->tag_array2->Ndsam_lev_2,
+ g_ip->is_main_mem);
if(tag_arr_dyn_p.is_valid)
{
UCA * tag_arr = new UCA(tag_arr_dyn_p);
@@ -894,12 +903,20 @@ void update(uca_org_t *fin_res)
}
else
{
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback"
+ << endl;
exit(1);
}
}
init_tech_params(g_ip->F_sz_um,false);
- DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam,
+ fin_res->data_array2->Nspd,
+ fin_res->data_array2->Ndwl,
+ fin_res->data_array2->Ndbl,
+ fin_res->data_array2->Ndcm,
+ fin_res->data_array2->Ndsam_lev_1,
+ fin_res->data_array2->Ndsam_lev_2,
+ g_ip->is_main_mem);
if(data_arr_dyn_p.is_valid)
{
UCA * data_arr = new UCA(data_arr_dyn_p);
@@ -907,7 +924,8 @@ void update(uca_org_t *fin_res)
}
else
{
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback"
+ << endl;
exit(1);
}
diff --git a/ext/mcpat/cacti/Ucache.h b/ext/mcpat/cacti/Ucache.h
index 20985fff1..87836adcd 100644
--- a/ext/mcpat/cacti/Ucache.h
+++ b/ext/mcpat/cacti/Ucache.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,9 +40,8 @@
#include "nuca.h"
#include "router.h"
-class min_values_t
-{
- public:
+class min_values_t {
+public:
double min_delay;
double min_dyn;
double min_leakage;
@@ -58,17 +58,16 @@ class min_values_t
-struct solution
-{
- int tag_array_index;
- int data_array_index;
- list<mem_array *>::iterator tag_array_iter;
- list<mem_array *>::iterator data_array_iter;
- double access_time;
- double cycle_time;
- double area;
- double efficiency;
- powerDef total_power;
+struct solution {
+ int tag_array_index;
+ int data_array_index;
+ list<mem_array *>::iterator tag_array_iter;
+ list<mem_array *>::iterator data_array_iter;
+ double access_time;
+ double cycle_time;
+ double area;
+ double efficiency;
+ powerDef total_power;
};
@@ -94,20 +93,19 @@ void solve(uca_org_t *fin_res);
void init_tech_params(double tech, bool is_tag);
-struct calc_time_mt_wrapper_struct
-{
- uint32_t tid;
- bool is_tag;
- bool pure_ram;
- bool pure_cam;
- bool is_main_mem;
- double Nspd_min;
+struct calc_time_mt_wrapper_struct {
+ uint32_t tid;
+ bool is_tag;
+ bool pure_ram;
+ bool pure_cam;
+ bool is_main_mem;
+ double Nspd_min;
- min_values_t * data_res;
- min_values_t * tag_res;
+ min_values_t * data_res;
+ min_values_t * tag_res;
- list<mem_array *> data_arr;
- list<mem_array *> tag_arr;
+ list<mem_array *> data_arr;
+ list<mem_array *> tag_arr;
};
void *calc_time_mt_wrapper(void * void_obj);
diff --git a/ext/mcpat/cacti/arbiter.cc b/ext/mcpat/cacti/arbiter.cc
index 6664abf13..8106d2025 100644
--- a/ext/mcpat/cacti/arbiter.cc
+++ b/ext/mcpat/cacti/arbiter.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,95 +37,107 @@ Arbiter::Arbiter(
double flit_size_,
double output_len,
TechnologyParameter::DeviceType *dt
- ):R(n_req), flit_size(flit_size_),
- o_len (output_len), deviceType(dt)
-{
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- Vdd = dt->Vdd;
- double technology = g_ip->F_sz_um;
- NTn1 = 13.5*technology/2;
- PTn1 = 76*technology/2;
- NTn2 = 13.5*technology/2;
- PTn2 = 76*technology/2;
- NTi = 12.5*technology/2;
- PTi = 25*technology/2;
- NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
- PTtr = 20*technology/2; /* pmos tr. length*/
+ ): R(n_req), flit_size(flit_size_),
+ o_len (output_len), deviceType(dt) {
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ double technology = g_ip->F_sz_um;
+ NTn1 = 13.5 * technology / 2;
+ PTn1 = 76 * technology / 2;
+ NTn2 = 13.5 * technology / 2;
+ PTn2 = 76 * technology / 2;
+ NTi = 12.5 * technology / 2;
+ PTi = 25 * technology / 2;
+ NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20 * technology / 2; /* pmos tr. length*/
}
-Arbiter::~Arbiter(){}
+Arbiter::~Arbiter() {}
double
Arbiter::arb_req() {
- double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
- gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
- drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
- return temp;
+ double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 *
+ gate_C(NTn2, 0) +
+ gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) +
+ drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
+ return temp;
}
double
Arbiter::arb_pri() {
- double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
- of flip-flop is ignored */
- return temp;
+ /* switching capacitance of flip-flop is ignored */
+ double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0));
+ return temp;
}
double
Arbiter::arb_grant() {
- double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
- return temp;
+ double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
+ drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
+ return temp;
}
double
Arbiter::arb_int() {
- double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
- 2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
- return temp;
+ double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
+ drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
+ 2 * gate_C(NTn2, 0) + gate_C(PTn2, 0));
+ return temp;
}
void
Arbiter::compute_power() {
- power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
- arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
- double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
- double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
- double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
- double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
- double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
- double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
- power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
- power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
+ power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() *
+ Vdd * Vdd / 2 +
+ arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd *
+ Vdd);
+ double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
+ min_w_pmos * PTn1 * 2, 2, nor);
+ double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R,
+ min_w_pmos * PTn2 * R, 2, nor);
+ double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi,
+ min_w_pmos * PTi, 1, inv);
+ double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
+ min_w_pmos * PTn1 * 2, 2, nor);
+ double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R,
+ min_w_pmos * PTn2 * R, 2, nor);
+ double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi,
+ min_w_pmos * PTi, 1, inv);
+ //FIXME include priority table leakage
+ power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd;
+ power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd +
+ not_leak_gate * Vdd;
}
double //wire cap with triple spacing
Arbiter::Cw3(double length) {
- Wire wc(g_ip->wt, length, 1, 3, 3);
- double temp = (wc.wire_cap(length,true));
- return temp;
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ double temp = (wc.wire_cap(length, true));
+ return temp;
}
double
Arbiter::crossbar_ctrline() {
- double temp = (Cw3(o_len * 1e-6 /* m */) +
- drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
- gate_C(NTi, 0) + gate_C(PTi, 0));
- return temp;
+ double temp = (Cw3(o_len * 1e-6 /* m */) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
+ gate_C(NTi, 0) + gate_C(PTi, 0));
+ return temp;
}
double
Arbiter::transmission_buf_ctrcap() {
- double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
- return temp;
+ double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0);
+ return temp;
}
-void Arbiter::print_arbiter()
-{
- cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
- cout << "Flit size : " << flit_size << " bits" << endl;
- cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
- cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
+void Arbiter::print_arbiter() {
+ cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
}
diff --git a/ext/mcpat/cacti/bank.cc b/ext/mcpat/cacti/bank.cc
index a18c7f1ed..b4fd95090 100755..100644
--- a/ext/mcpat/cacti/bank.cc
+++ b/ext/mcpat/cacti/bank.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,163 +37,174 @@
#include "bank.h"
Bank::Bank(const DynamicParameter & dyn_p):
- dp(dyn_p), mat(dp),
- num_addr_b_mat(dyn_p.number_addr_bits_mat),
- num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
-{
- int RWP;
- int ERP;
- int EWP;
- int SCHP;
-
- if (dp.use_inp_params)
- {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else
- {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
- }
-
- int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
- int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
- int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
- int searchinbits;
- int searchoutbits;
-
- if (dp.fully_assoc || dp.pure_cam)
- {
- datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
- dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
- searchinbits = dp.num_si_b_bank_per_port * SCHP;
- searchoutbits = dp.num_so_b_bank_per_port * SCHP;
- }
-
- if (!(dp.fully_assoc || dp.pure_cam))
- {
- if (g_ip->fast_access && dp.is_tag == false)
- {
- dataoutbits *= g_ip->data_assoc;
+ dp(dyn_p), mat(dp),
+ num_addr_b_mat(dyn_p.number_addr_bits_mat),
+ num_mats_hor_dir(dyn_p.num_mats_h_dir),
+ num_mats_ver_dir(dyn_p.num_mats_v_dir) {
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
+
+ if (dp.use_inp_params) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
}
- htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
- htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
- htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+ int total_addrbits = (dp.number_addr_bits_mat +
+ dp.number_subbanks_decode) * (RWP + ERP + EWP);
+ int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ int searchinbits;
+ int searchoutbits;
+
+ if (dp.fully_assoc || dp.pure_cam) {
+ datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ searchinbits = dp.num_si_b_bank_per_port * SCHP;
+ searchoutbits = dp.num_so_b_bank_per_port * SCHP;
+ }
+
+ if (!(dp.fully_assoc || dp.pure_cam)) {
+ if (g_ip->fast_access && dp.is_tag == false) {
+ dataoutbits *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w,
+ (double)mat.area.h,
+ total_addrbits, datainbits, 0, dataoutbits,
+ 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
+ Add_htree);
+ htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w,
+ (double)mat.area.h,
+ total_addrbits, datainbits, 0, dataoutbits,
+ 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
+ Data_in_htree);
+ htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w,
+ (double)mat.area.h,
+ total_addrbits, datainbits, 0, dataoutbits,
+ 0, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_out_htree);
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
-// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
- }
- else
- {
- htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
- htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
- htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
- htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
- htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
- }
-
- num_addr_b_row_dec = _log2(mat.subarray.num_rows);
- num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
- num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
+// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ } else {
+ htree_in_add =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Add_htree);
+ htree_in_data =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_in_htree);
+ htree_out_data =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_out_htree);
+ htree_in_search =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_in_htree, true, true);
+ htree_out_search =
+ new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_out_htree, true);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ }
+
+ num_addr_b_row_dec = _log2(mat.subarray.num_rows);
+ num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
+ num_addr_b_routed_to_mat_for_rd_or_wr =
+ num_addr_b_mat - num_addr_b_row_dec;
}
-Bank::~Bank()
-{
- delete htree_in_add;
- delete htree_out_data;
- delete htree_in_data;
- if (dp.fully_assoc || dp.pure_cam)
- {
- delete htree_in_search;
- delete htree_out_search;
- }
+Bank::~Bank() {
+ delete htree_in_add;
+ delete htree_out_data;
+ delete htree_in_data;
+ if (dp.fully_assoc || dp.pure_cam) {
+ delete htree_in_search;
+ delete htree_out_search;
+ }
}
-double Bank::compute_delays(double inrisetime)
-{
- return mat.compute_delays(inrisetime);
+double Bank::compute_delays(double inrisetime) {
+ return mat.compute_delays(inrisetime);
}
-void Bank::compute_power_energy()
-{
- mat.compute_power_energy();
+void Bank::compute_power_energy() {
+ mat.compute_power_energy();
- if (!(dp.fully_assoc || dp.pure_cam))
- {
- power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
- power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
- power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+ if (!(dp.fully_assoc || dp.pure_cam)) {
+ power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
- power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
- power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
- power.readOp.leakage += htree_in_add->power.readOp.leakage;
- power.readOp.leakage += htree_in_data->power.readOp.leakage;
- power.readOp.leakage += htree_out_data->power.readOp.leakage;
- power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
- }
- else
- {
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ } else {
- power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
- power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
- power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+ power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
- power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
- power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
- mat.power_sa.searchOp.dynamic +
- mat.power_bitline.searchOp.dynamic +
- mat.power_subarray_out_drv.searchOp.dynamic+
- mat.ml_to_ram_wl_drv->power.readOp.dynamic;
+ power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
+ power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
+ mat.power_sa.searchOp.dynamic +
+ mat.power_bitline.searchOp.dynamic +
+ mat.power_subarray_out_drv.searchOp.dynamic +
+ mat.ml_to_ram_wl_drv->power.readOp.dynamic;
- power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
- power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
- power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
- power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
+ power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
+ power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
- power.readOp.leakage += htree_in_add->power.readOp.leakage;
- power.readOp.leakage += htree_in_data->power.readOp.leakage;
- power.readOp.leakage += htree_out_data->power.readOp.leakage;
- power.readOp.leakage += htree_in_search->power.readOp.leakage;
- power.readOp.leakage += htree_out_search->power.readOp.leakage;
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.leakage += htree_in_search->power.readOp.leakage;
+ power.readOp.leakage += htree_out_search->power.readOp.leakage;
- power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
- }
+ }
}
diff --git a/ext/mcpat/cacti/bank.h b/ext/mcpat/cacti/bank.h
index 153609ab0..49151f050 100755
--- a/ext/mcpat/cacti/bank.h
+++ b/ext/mcpat/cacti/bank.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -39,9 +40,8 @@
#include "htree2.h"
#include "mat.h"
-class Bank : public Component
-{
- public:
+class Bank : public Component {
+public:
Bank(const DynamicParameter & dyn_p);
~Bank();
double compute_delays(double inrisetime); // return outrisetime
diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc
index 6efd5dd27..00ea3ce9d 100644
--- a/ext/mcpat/cacti/basic_circuit.cc
+++ b/ext/mcpat/cacti/basic_circuit.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,59 +40,48 @@
#include "basic_circuit.h"
#include "parameter.h"
-uint32_t _log2(uint64_t num)
-{
- uint32_t log2 = 0;
+uint32_t _log2(uint64_t num) {
+ uint32_t log2 = 0;
- if (num == 0)
- {
- std::cerr << "log0?" << std::endl;
- exit(1);
- }
+ if (num == 0) {
+ std::cerr << "log0?" << std::endl;
+ exit(1);
+ }
- while (num > 1)
- {
- num = (num >> 1);
- log2++;
- }
+ while (num > 1) {
+ num = (num >> 1);
+ log2++;
+ }
- return log2;
+ return log2;
}
-bool is_pow2(int64_t val)
-{
- if (val <= 0)
- {
- return false;
- }
- else if (val == 1)
- {
- return true;
- }
- else
- {
- return (_log2(val) != _log2(val-1));
- }
+bool is_pow2(int64_t val) {
+ if (val <= 0) {
+ return false;
+ } else if (val == 1) {
+ return true;
+ } else {
+ return (_log2(val) != _log2(val - 1));
+ }
}
-int powers (int base, int n)
-{
- int i, p;
+int powers (int base, int n) {
+ int i, p;
- p = 1;
- for (i = 1; i <= n; ++i)
- p *= base;
- return p;
+ p = 1;
+ for (i = 1; i <= n; ++i)
+ p *= base;
+ return p;
}
/*----------------------------------------------------------------------*/
-double logtwo (double x)
-{
- assert(x > 0);
- return ((double) (log (x) / log (2.0)));
+double logtwo (double x) {
+ assert(x > 0);
+ return ((double) (log (x) / log (2.0)));
}
/*----------------------------------------------------------------------*/
@@ -102,28 +92,20 @@ double gate_C(
double wirelength,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if (_is_dram && _is_cell)
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if (_is_dram && _is_wl_tr)
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if (!_is_dram && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+ bool _is_wl_tr) {
+ const TechnologyParameter::DeviceType * dt;
+
+ if (_is_dram && _is_cell) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if (_is_dram && _is_wl_tr) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if (!_is_dram && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
}
@@ -134,29 +116,21 @@ double gate_C_pass(
double wirelength, // poly wire length going to gate in lambda
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- // v5.0
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+ bool _is_wl_tr) {
+ // v5.0
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if ((!_is_dram) && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
}
@@ -169,83 +143,67 @@ double drain_C_(
double fold_dimension,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- double w_folded_tr;
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; // DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; // DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double c_junc_area = dt->C_junc;
- double c_junc_sidewall = dt->C_junc_sidewall;
- double c_fringe = 2*dt->C_fringe;
- double c_overlap = 2*dt->C_overlap;
- double drain_C_metal_connecting_folded_tr = 0;
-
- // determine the width of the transistor after folding (if it is getting folded)
- if (next_arg_thresh_folding_width_or_height_cell == 0)
- { // interpret fold_dimension as the the folding width threshold
- // i.e. the value of transistor width above which the transistor gets folded
- w_folded_tr = fold_dimension;
- }
- else
- { // interpret fold_dimension as the height of the cell that this transistor is part of.
- double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
- // TODO : w_folded_tr must come from Component::compute_gate_area()
- double ratio_p_to_n = 2.0 / (2.0 + 1.0);
- if (nchannel)
- {
- w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ bool _is_wl_tr) {
+ double w_folded_tr;
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; // DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; // DRAM wordline transistor
+ } else if ((!_is_dram) && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ double c_junc_area = dt->C_junc;
+ double c_junc_sidewall = dt->C_junc_sidewall;
+ double c_fringe = 2 * dt->C_fringe;
+ double c_overlap = 2 * dt->C_overlap;
+ double drain_C_metal_connecting_folded_tr = 0;
+
+ // determine the width of the transistor after folding (if it is getting folded)
+ if (next_arg_thresh_folding_width_or_height_cell == 0) {
+ // interpret fold_dimension as the the folding width threshold
+ // i.e. the value of transistor width above which the transistor gets folded
+ w_folded_tr = fold_dimension;
+ } else { // interpret fold_dimension as the height of the cell that this transistor is part of.
+ double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
+ // TODO : w_folded_tr must come from Component::compute_gate_area()
+ double ratio_p_to_n = 2.0 / (2.0 + 1.0);
+ if (nchannel) {
+ w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ } else {
+ w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ }
}
- else
- {
- w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ int num_folded_tr = (int) (ceil(width / w_folded_tr));
+
+ if (num_folded_tr < 2) {
+ w_folded_tr = width;
}
- }
- int num_folded_tr = (int) (ceil(width / w_folded_tr));
-
- if (num_folded_tr < 2)
- {
- w_folded_tr = width;
- }
-
- double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
- (stack - 1) * g_tp.spacing_poly_to_poly;
- double drain_h_for_sidewall = w_folded_tr;
- double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
- if (num_folded_tr > 1)
- {
- total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
- (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
-
- if (num_folded_tr%2 == 0)
- {
- drain_h_for_sidewall = 0;
+
+ double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
+ (stack - 1) * g_tp.spacing_poly_to_poly;
+ double drain_h_for_sidewall = w_folded_tr;
+ double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
+ if (num_folded_tr > 1) {
+ total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
+ (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
+
+ if (num_folded_tr % 2 == 0) {
+ drain_h_for_sidewall = 0;
+ }
+ total_drain_height_for_cap_wrt_gate *= num_folded_tr;
+ drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
}
- total_drain_height_for_cap_wrt_gate *= num_folded_tr;
- drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
- }
- double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
- double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
- double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
+ double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
+ double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
+ double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
- return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
+ return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
}
@@ -255,29 +213,21 @@ double tr_R_on(
int stack,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
- return (stack * restrans / width);
+ bool _is_wl_tr) {
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if ((!_is_dram) && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (stack * restrans / width);
}
@@ -291,46 +241,34 @@ double R_to_w(
int nchannel,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && (_is_cell))
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
- return (restrans / res);
+ bool _is_wl_tr) {
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if ((!_is_dram) && (_is_cell)) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (restrans / res);
}
double pmos_to_nmos_sz_ratio(
bool _is_dram,
- bool _is_wl_tr)
-{
- double p_to_n_sizing_ratio;
- if ((_is_dram) && (_is_wl_tr))
- { //DRAM wordline transistor
- p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
- }
- else
- { //DRAM or SRAM all other transistors
- p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
- }
- return p_to_n_sizing_ratio;
+ bool _is_wl_tr) {
+ double p_to_n_sizing_ratio;
+ if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
+ } else { //DRAM or SRAM all other transistors
+ p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
+ }
+ return p_to_n_sizing_ratio;
}
@@ -340,26 +278,23 @@ double horowitz(
double tf, // time constant of gate
double vs1, // threshold voltage
double vs2, // threshold voltage
- int rise) // whether input rises or fall
-{
- if (inputramptime == 0 && vs1 == vs2)
- {
- return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
- }
- double a, b, td;
-
- a = inputramptime / tf;
- if (rise == RISE)
- {
- b = 0.5;
- td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
- }
- else
- {
- b = 0.4;
- td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
- }
- return (td);
+ int rise) { // whether input rises or fall
+ if (inputramptime == 0 && vs1 == vs2) {
+ return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
+ }
+ double a, b, td;
+
+ a = inputramptime / tf;
+ if (rise == RISE) {
+ b = 0.5;
+ td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) +
+ tf * (log(vs1) - log(vs2));
+ } else {
+ b = 0.4;
+ td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) +
+ tf * (log(1.0 - vs1) - log(1.0 - vs2));
+ }
+ return (td);
}
double cmos_Ileak(
@@ -367,23 +302,17 @@ double cmos_Ileak(
double pWidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
}
@@ -391,107 +320,81 @@ double simplified_nmos_leakage(
double nwidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nwidth * dt->I_off_n;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nwidth * dt->I_off_n;
}
-int factorial(int n, int m)
-{
- int fa = m, i;
- for (i=m+1; i<=n; i++)
- fa *=i;
- return fa;
+int factorial(int n, int m) {
+ int fa = m, i;
+ for (i = m + 1; i <= n; i++)
+ fa *= i;
+ return fa;
}
-int combination(int n, int m)
-{
- int ret;
- ret = factorial(n, m+1) / factorial(n - m);
- return ret;
+int combination(int n, int m) {
+ int ret;
+ ret = factorial(n, m + 1) / factorial(n - m);
+ return ret;
}
double simplified_pmos_leakage(
double pwidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pwidth * dt->I_off_p;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pwidth * dt->I_off_p;
}
double cmos_Ig_n(
double nWidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nWidth*dt->I_g_on_n;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_g_on_n;
}
double cmos_Ig_p(
double pWidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pWidth*dt->I_g_on_p;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pWidth*dt->I_g_on_p;
}
double cmos_Isub_leakage(
@@ -502,98 +405,93 @@ double cmos_Isub_leakage(
bool _is_dram,
bool _is_cell,
bool _is_wl_tr,
- enum Half_net_topology topo)
-{
- assert (fanin>=1);
- double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
- double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
- double Isub=0;
+ enum Half_net_topology topo) {
+ assert (fanin >= 1);
+ double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Isub = 0;
int num_states;
int num_off_tx;
num_states = int(pow(2.0, fanin));
- switch (g_type)
- {
+ switch (g_type) {
case nmos:
- if (fanin==1)
- {
- Isub = nmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
- }
- else
- {
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
- {
- //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
+ if (fanin == 1) {
+ Isub = nmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ //only when all tx are off, leakage power is non-zero.
+ //The possibility of this state is 1/num_states
+ Isub = nmos_leak * fanin / num_states;
+ } else {
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ //when num_off_tx ==0 there is no leakage power
+ Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
+ Isub /= num_states;
+ }
}
break;
case pmos:
- if (fanin==1)
- {
- Isub = pmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
- }
- else
- {
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
- {
- //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
+ if (fanin == 1) {
+ Isub = pmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ //only when all tx are off, leakage power is non-zero.
+ //The possibility of this state is 1/num_states
+ Isub = pmos_leak * fanin / num_states;
+ } else {
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ //when num_off_tx ==0 there is no leakage power
+ Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
+ Isub /= num_states;
+ }
}
break;
case inv:
- Isub = (nmos_leak + pmos_leak)/2;
+ Isub = (nmos_leak + pmos_leak) / 2;
break;
case nand:
- Isub += fanin*pmos_leak;//the pullup network
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
- {
- //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ Isub += fanin * pmos_leak;//the pullup network
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ // the pulldown network
+ Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
- Isub /=num_states;
+ Isub /= num_states;
break;
case nor:
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
- {
- //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ // the pullup network
+ Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
- Isub += fanin*nmos_leak;//the pulldown network
- Isub /=num_states;
+ Isub += fanin * nmos_leak;//the pulldown network
+ Isub /= num_states;
break;
case tri:
- Isub += (nmos_leak + pmos_leak)/2;//enabled
- Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
- Isub /=2;
+ Isub += (nmos_leak + pmos_leak) / 2;//enabled
+ //disabled upper bound of leakage power
+ Isub += nmos_leak * UNI_LEAK_STACK_FACTOR;
+ Isub /= 2;
break;
case tg:
- Isub = (nmos_leak + pmos_leak)/2;
+ Isub = (nmos_leak + pmos_leak) / 2;
break;
default:
assert(0);
break;
- }
+ }
return Isub;
}
@@ -607,120 +505,116 @@ double cmos_Ig_leakage(
bool _is_dram,
bool _is_cell,
bool _is_wl_tr,
- enum Half_net_topology topo)
-{
- assert (fanin>=1);
- double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
- double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
- double Ig_on=0;
- int num_states;
- int num_on_tx;
-
- num_states = int(pow(2.0, fanin));
-
- switch (g_type)
- {
- case nmos:
- if (fanin==1)
- {
- Ig_on = nmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- }
- else
- {
- Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
- //num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
- }
- Ig_on /=num_states;
- }
- }
- break;
- case pmos:
- if (fanin==1)
- {
- Ig_on = pmos_leak/num_states;
+ enum Half_net_topology topo) {
+ assert (fanin >= 1);
+ double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Ig_on = 0;
+ int num_states;
+ int num_on_tx;
+
+ num_states = int(pow(2.0, fanin));
+
+ switch (g_type) {
+ case nmos:
+ if (fanin == 1) {
+ Ig_on = nmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx;
}
- else
- {
- if (topo==parallel)
- {
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- }
- else
- {
- Ig_on += pmos_leak * fanin;//pull down network when all TXs are on.
- //num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
- }
- Ig_on /=num_states;
- }
+ } else {
+ //pull down network when all TXs are on.
+ Ig_on += nmos_leak * fanin;
+ //num_on_tx is the number of on tx
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ //when num_on_tx=[1,n-1]
+ //TODO: this is a approximation now, a precise computation
+ //will be very complicated.
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx / 2;
}
- break;
-
- case inv:
- Ig_on = (nmos_leak + pmos_leak)/2;
- break;
- case nand:
- //pull up network
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ Ig_on /= num_states;
+ }
+ }
+ break;
+ case pmos:
+ if (fanin == 1) {
+ Ig_on = pmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx;
}
-
- //pull down network
- Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ } else {
+ //pull down network when all TXs are on.
+ Ig_on += pmos_leak * fanin;
//num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ //when num_on_tx=[1,n-1]
+ //TODO: this is a approximation now, a precise computation
+ //will be very complicated.
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx / 2;
}
- Ig_on /=num_states;
- break;
- case nor:
- // num_on_tx is the number of on tx in pull up network
- Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;
+ Ig_on /= num_states;
+ }
+ }
+ break;
- }
- //pull down network
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- Ig_on /=num_states;
- break;
- case tri:
- Ig_on += (2*nmos_leak + 2*pmos_leak)/2;//enabled
- Ig_on += (nmos_leak + pmos_leak)/2; //disabled upper bound of leakage power
- Ig_on /=2;
- break;
- case tg:
- Ig_on = (nmos_leak + pmos_leak)/2;
- break;
- default:
- assert(0);
- break;
- }
-
- return Ig_on;
+ case inv:
+ Ig_on = (nmos_leak + pmos_leak) / 2;
+ break;
+ case nand:
+ //pull up network
+ //when num_on_tx=[1,n]
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
+ }
+
+ //pull down network
+ Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ //num_on_tx is the number of on tx
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ //when num_on_tx=[1,n-1]
+ //TODO: this is a approximation now, a precise computation will be
+ //very complicated.
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
+ }
+ Ig_on /= num_states;
+ break;
+ case nor:
+ // num_on_tx is the number of on tx in pull up network
+ Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
+
+ }
+ //pull down network
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ //when num_on_tx=[1,n]
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
+ }
+ Ig_on /= num_states;
+ break;
+ case tri:
+ Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2;//enabled
+ //disabled upper bound of leakage power
+ Ig_on += (nmos_leak + pmos_leak) / 2;
+ Ig_on /= 2;
+ break;
+ case tg:
+ Ig_on = (nmos_leak + pmos_leak) / 2;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return Ig_on;
}
double shortcircuit_simple(
@@ -734,21 +628,28 @@ double shortcircuit_simple(
double i_on_p,
double i_on_n_in,
double i_on_p_in,
- double vdd)
-{
-
- double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
- double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
-
- fo_n = i_on_n/i_on_n_in;
- fo_p = i_on_p/i_on_p_in;
- fanout = c_out/c_in;
- beta_ratio = i_on_p/i_on_n;
- vt_to_vdd_ratio = vt/vdd;
-
- //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
- p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
- p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
+ double vdd) {
+
+ double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
+ double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+
+ fo_n = i_on_n / i_on_n_in;
+ fo_p = i_on_p / i_on_p_in;
+ fanout = c_out / c_in;
+ beta_ratio = i_on_p / i_on_n;
+ vt_to_vdd_ratio = vt / vdd;
+
+ //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+ p_short_circuit_discharge_low =
+ 10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
+ pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
+ vt_to_vdd_ratio)) * c_in *
+ vdd * vdd * fo_p * fo_p / fanout / beta_ratio;
+ p_short_circuit_charge_low =
+ 10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
+ pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
+ vt_to_vdd_ratio)) * c_in *
+ vdd * vdd * fo_n * fo_n / fanout * beta_ratio;
// double t1, t2, t3, t4, t5;
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
// t2=pow(velocity_index,2.0);
@@ -756,8 +657,12 @@ double shortcircuit_simple(
// t4=t1/t2/t3;
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
- p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
- p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+ p_short_circuit_discharge_high =
+ pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd *
+ fo_p / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);
+ p_short_circuit_charge_high = pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) *
+ c_in * vdd * vdd * fo_n / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 *
+ velocity_index);
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
// t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
@@ -766,11 +671,11 @@ double shortcircuit_simple(
// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
- p_short_circuit_discharge = p_short_circuit_discharge_low;
- p_short_circuit_charge = p_short_circuit_charge_low;
- p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
+ p_short_circuit_discharge = p_short_circuit_discharge_low;
+ p_short_circuit_charge = p_short_circuit_charge_low;
+ p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2;
- return (p_short_circuit);
+ return (p_short_circuit);
}
double shortcircuit(
@@ -784,25 +689,33 @@ double shortcircuit(
double i_on_p,
double i_on_n_in,
double i_on_p_in,
- double vdd)
-{
-
- double p_short_circuit=0, p_short_circuit_discharge;//, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
- double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
- double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
-
- fo_n = i_on_n/i_on_n_in;
- fo_p = i_on_p/i_on_p_in;
- fanout = 1;
- beta_ratio = i_on_p/i_on_n;
- vt_to_vdd_ratio = vt/vdd;
- e = 2.71828;
- f_alpha = 1/(velocity_index+2) -velocity_index/(2*(velocity_index+3)) +velocity_index/(velocity_index+4)*(velocity_index/2-1);
- k_v = 0.9/0.8+(vdd-vt)/0.8*log(10*(vdd-vt)/e);
- g_v_alpha = (velocity_index + 1)*pow((1-velocity_index),velocity_index)*pow((1-velocity_index),velocity_index/2)/f_alpha/pow((1-velocity_index-velocity_index),(velocity_index/2+velocity_index+2));
- h_v_alpha = pow(2, velocity_index)*(velocity_index+1)*pow((1-velocity_index),velocity_index)/pow((1-velocity_index-velocity_index),(velocity_index+1));
-
- //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+ double vdd) {
+
+ //this is actually energy
+ double p_short_circuit = 0, p_short_circuit_discharge;
+ double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+ double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
+
+ fo_n = i_on_n / i_on_n_in;
+ fo_p = i_on_p / i_on_p_in;
+ fanout = 1;
+ beta_ratio = i_on_p / i_on_n;
+ vt_to_vdd_ratio = vt / vdd;
+ e = 2.71828;
+ f_alpha = 1 / (velocity_index + 2) - velocity_index /
+ (2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) *
+ (velocity_index / 2 - 1);
+ k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * log(10 * (vdd - vt) / e);
+ g_v_alpha = (velocity_index + 1) *
+ pow((1 - velocity_index), velocity_index) *
+ pow((1 - velocity_index), velocity_index / 2) / f_alpha /
+ pow((1 - velocity_index - velocity_index),
+ (velocity_index / 2 + velocity_index + 2));
+ h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) *
+ pow((1 - velocity_index), velocity_index) /
+ pow((1 - velocity_index - velocity_index), (velocity_index + 1));
+
+ //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
// p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
// p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
// double t1, t2, t3, t4, t5;
@@ -824,6 +737,8 @@ double shortcircuit(
//
// p_short_circuit = p_short_circuit_discharge;
- p_short_circuit_discharge = k_v*vdd*vdd*c_in*fo_p*fo_p/((vdd-vt)*g_v_alpha*fanout*beta_ratio/2/k_v + h_v_alpha*fo_p);
- return (p_short_circuit);
+ p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p /
+ ((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha *
+ fo_p);
+ return (p_short_circuit);
}
diff --git a/ext/mcpat/cacti/basic_circuit.h b/ext/mcpat/cacti/basic_circuit.h
index aaab6c0ea..e4bb5760a 100644
--- a/ext/mcpat/cacti/basic_circuit.h
+++ b/ext/mcpat/cacti/basic_circuit.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -49,10 +50,10 @@ int combination(int n, int m);
//#define DBG
#ifdef DBG
- #define PRINTDW(a);\
+#define PRINTDW(a);\
a;
#else
- #define PRINTDW(a);\
+#define PRINTDW(a);\
#endif
@@ -76,7 +77,7 @@ enum Htree_type {
enum Gate_type {
nmos,
pmos,
- inv,
+ inv,
nand,
nor,
tri,
@@ -164,13 +165,13 @@ double cmos_Ig_n(
double nWidth,
bool _is_dram = false,
bool _is_cell = false,
- bool _is_wl_tr= false);
+ bool _is_wl_tr = false);
double cmos_Ig_p(
double pWidth,
bool _is_dram = false,
bool _is_cell = false,
- bool _is_wl_tr= false);
+ bool _is_wl_tr = false);
double cmos_Isub_leakage(
@@ -220,29 +221,29 @@ double shortcircuit_simple(
double vdd);
//set power point product mask; strictly speaking this is not real point product
inline void set_pppm(
- double * pppv,
- double a=1,
- double b=1,
- double c=1,
- double d=1
- ){
- pppv[0]= a;
- pppv[1]= b;
- pppv[2]= c;
- pppv[3]= d;
+ double * pppv,
+ double a = 1,
+ double b = 1,
+ double c = 1,
+ double d = 1
+) {
+ pppv[0] = a;
+ pppv[1] = b;
+ pppv[2] = c;
+ pppv[3] = d;
}
inline void set_sppm(
- double * sppv,
- double a=1,
- double b=1,
- double c=1,
- double d=1
- ){
- sppv[0]= a;
- sppv[1]= b;
- sppv[2]= c;
+ double * sppv,
+ double a = 1,
+ double b = 1,
+ double c = 1,
+ double d = 1
+) {
+ sppv[0] = a;
+ sppv[1] = b;
+ sppv[2] = c;
}
#endif
diff --git a/ext/mcpat/cacti/cacti_interface.cc b/ext/mcpat/cacti/cacti_interface.cc
index b6d0d13de..b397db897 100644
--- a/ext/mcpat/cacti/cacti_interface.cc
+++ b/ext/mcpat/cacti/cacti_interface.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -47,127 +48,107 @@
using namespace std;
-bool mem_array::lt(const mem_array * m1, const mem_array * m2)
-{
- if (m1->Nspd < m2->Nspd) return true;
- else if (m1->Nspd > m2->Nspd) return false;
- else if (m1->Ndwl < m2->Ndwl) return true;
- else if (m1->Ndwl > m2->Ndwl) return false;
- else if (m1->Ndbl < m2->Ndbl) return true;
- else if (m1->Ndbl > m2->Ndbl) return false;
- else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
- else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
- else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
- else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
- else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
- else return false;
+bool mem_array::lt(const mem_array * m1, const mem_array * m2) {
+ if (m1->Nspd < m2->Nspd) return true;
+ else if (m1->Nspd > m2->Nspd) return false;
+ else if (m1->Ndwl < m2->Ndwl) return true;
+ else if (m1->Ndwl > m2->Ndwl) return false;
+ else if (m1->Ndbl < m2->Ndbl) return true;
+ else if (m1->Ndbl > m2->Ndbl) return false;
+ else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
+ else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
+ else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
+ else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
+ else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
+ else return false;
}
-void uca_org_t::find_delay()
-{
- mem_array * data_arr = data_array2;
- mem_array * tag_arr = tag_array2;
-
- // check whether it is a regular cache or scratch ram
- if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
- {
- access_time = data_arr->access_time;
- }
- // Both tag and data lookup happen in parallel
- // and the entire set is sent over the data array h-tree without
- // waiting for the way-select signal --TODO add the corresponding
- // power overhead Nav
- else if (g_ip->fast_access == true)
- {
- access_time = MAX(tag_arr->access_time, data_arr->access_time);
- }
- // Tag is accessed first. On a hit, way-select signal along with the
- // address is sent to read/write the appropriate block in the data
- // array
- else if (g_ip->is_seq_acc == true)
- {
- access_time = tag_arr->access_time + data_arr->access_time;
- }
- // Normal access: tag array access and data array access happen in parallel.
- // But, the data array will wait for the way-select and transfer only the
- // appropriate block over the h-tree.
- else
- {
- access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
- data_arr->delay_before_subarray_output_driver) +
- data_arr->delay_from_subarray_output_driver_to_output;
- }
+void uca_org_t::find_delay() {
+ mem_array * data_arr = data_array2;
+ mem_array * tag_arr = tag_array2;
+
+ // check whether it is a regular cache or scratch ram
+ if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
+ access_time = data_arr->access_time;
+ }
+ // Both tag and data lookup happen in parallel
+ // and the entire set is sent over the data array h-tree without
+ // waiting for the way-select signal --TODO add the corresponding
+ // power overhead Nav
+ else if (g_ip->fast_access == true) {
+ access_time = MAX(tag_arr->access_time, data_arr->access_time);
+ }
+ // Tag is accessed first. On a hit, way-select signal along with the
+ // address is sent to read/write the appropriate block in the data
+ // array
+ else if (g_ip->is_seq_acc == true) {
+ access_time = tag_arr->access_time + data_arr->access_time;
+ }
+ // Normal access: tag array access and data array access happen in parallel.
+ // But, the data array will wait for the way-select and transfer only the
+ // appropriate block over the h-tree.
+ else {
+ access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
+ data_arr->delay_before_subarray_output_driver) +
+ data_arr->delay_from_subarray_output_driver_to_output;
+ }
}
-void uca_org_t::find_energy()
-{
- if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
- power = data_array2->power + tag_array2->power;
- else
- power = data_array2->power;
+void uca_org_t::find_energy() {
+ if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc))
+ power = data_array2->power + tag_array2->power;
+ else
+ power = data_array2->power;
}
-void uca_org_t::find_area()
-{
- if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
- {
- cache_ht = data_array2->height;
- cache_len = data_array2->width;
- }
- else
- {
- cache_ht = MAX(tag_array2->height, data_array2->height);
- cache_len = tag_array2->width + data_array2->width;
- }
- area = cache_ht * cache_len;
+void uca_org_t::find_area() {
+ if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
+ cache_ht = data_array2->height;
+ cache_len = data_array2->width;
+ } else {
+ cache_ht = MAX(tag_array2->height, data_array2->height);
+ cache_len = tag_array2->width + data_array2->width;
+ }
+ area = cache_ht * cache_len;
}
-void uca_org_t::adjust_area()
-{
- double area_adjust;
- if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
- {
- if (data_array2->area_efficiency/100.0<0.2)
- {
- //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
- area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
- cache_ht = cache_ht/area_adjust;
- cache_len = cache_len/area_adjust;
+void uca_org_t::adjust_area() {
+ double area_adjust;
+ if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
+ if (data_array2->area_efficiency / 100.0 < 0.2) {
+ //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
+ area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0));
+ cache_ht = cache_ht / area_adjust;
+ cache_len = cache_len / area_adjust;
+ }
}
- }
- area = cache_ht * cache_len;
+ area = cache_ht * cache_len;
}
-void uca_org_t::find_cyc()
-{
- if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
- {
- cycle_time = data_array2->cycle_time;
- }
- else
- {
- cycle_time = MAX(tag_array2->cycle_time,
- data_array2->cycle_time);
- }
+void uca_org_t::find_cyc() {
+ if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
+ cycle_time = data_array2->cycle_time;
+ } else {
+ cycle_time = MAX(tag_array2->cycle_time,
+ data_array2->cycle_time);
+ }
}
uca_org_t :: uca_org_t()
-:tag_array2(0),
- data_array2(0)
-{
+ : tag_array2(0),
+ data_array2(0) {
}
-void uca_org_t :: cleanup()
-{
- if (data_array2!=0)
- delete data_array2;
- if (tag_array2!=0)
- delete tag_array2;
+void uca_org_t :: cleanup() {
+ if (data_array2 != 0)
+ delete data_array2;
+ if (tag_array2 != 0)
+ delete tag_array2;
}
diff --git a/ext/mcpat/cacti/cacti_interface.h b/ext/mcpat/cacti/cacti_interface.h
index f37596554..a2bddd819 100644
--- a/ext/mcpat/cacti/cacti_interface.h
+++ b/ext/mcpat/cacti/cacti_interface.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -50,9 +51,8 @@ class mem_array;
class uca_org_t;
-class powerComponents
-{
- public:
+class powerComponents {
+public:
double dynamic;
double leakage;
double gate_leakage;
@@ -60,17 +60,24 @@ class powerComponents
double longer_channel_leakage;
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
- powerComponents(const powerComponents & obj) { *this = obj; }
- powerComponents & operator=(const powerComponents & rhs)
- {
- dynamic = rhs.dynamic;
- leakage = rhs.leakage;
- gate_leakage = rhs.gate_leakage;
- short_circuit = rhs.short_circuit;
- longer_channel_leakage = rhs.longer_channel_leakage;
- return *this;
+ powerComponents(const powerComponents & obj) {
+ *this = obj;
+ }
+ powerComponents & operator=(const powerComponents & rhs) {
+ dynamic = rhs.dynamic;
+ leakage = rhs.leakage;
+ gate_leakage = rhs.gate_leakage;
+ short_circuit = rhs.short_circuit;
+ longer_channel_leakage = rhs.longer_channel_leakage;
+ return *this;
+ }
+ void reset() {
+ dynamic = 0;
+ leakage = 0;
+ gate_leakage = 0;
+ short_circuit = 0;
+ longer_channel_leakage = 0;
}
- void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
friend powerComponents operator*(const powerComponents & x, double const * const y);
@@ -78,22 +85,24 @@ class powerComponents
-class powerDef
-{
- public:
+class powerDef {
+public:
powerComponents readOp;
powerComponents writeOp;
powerComponents searchOp;//Sheng: for CAM and FA
powerDef() : readOp(), writeOp(), searchOp() { }
- void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
+ void reset() {
+ readOp.reset();
+ writeOp.reset();
+ searchOp.reset();
+ }
friend powerDef operator+(const powerDef & x, const powerDef & y);
friend powerDef operator*(const powerDef & x, double const * const y);
};
-enum Wire_type
-{
+enum Wire_type {
Global /* gloabl wires with repeaters */,
Global_5 /* 5% delay penalty */,
Global_10 /* 10% delay penalty */,
@@ -108,12 +117,12 @@ enum Wire_type
-class InputParameter
-{
- public:
+class InputParameter {
+public:
void parse_cfg(const string & infile);
- bool error_checking(); // return false if the input parameters are problematic
+ // return false if the input parameters are problematic
+ bool error_checking(string name = "CACTI");
void display_ip();
unsigned int cache_sz; // in bytes
@@ -172,14 +181,14 @@ class InputParameter
int force_nuca_bank;
int delay_wt, dynamic_power_wt, leakage_power_wt,
- cycle_time_wt, area_wt;
+ cycle_time_wt, area_wt;
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
- cycle_time_wt_nuca, area_wt_nuca;
+ cycle_time_wt_nuca, area_wt_nuca;
int delay_dev, dynamic_power_dev, leakage_power_dev,
- cycle_time_dev, area_dev;
+ cycle_time_dev, area_dev;
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
- cycle_time_dev_nuca, area_dev_nuca;
+ cycle_time_dev_nuca, area_dev_nuca;
int ed; //ED or ED2 optimization
int nuca;
@@ -194,167 +203,113 @@ class InputParameter
bool add_ecc_b_;
- //parameters for design constraint
- double throughput;
- double latency;
- bool pipelinable;
- int pipeline_stages;
- int per_stage_vector;
- bool with_clock_grid;
+ //parameters for design constraint
+ double throughput;
+ double latency;
+ bool pipelinable;
+ int pipeline_stages;
+ int per_stage_vector;
+ bool with_clock_grid;
};
-typedef struct{
- int Ndwl;
- int Ndbl;
- double Nspd;
- int deg_bl_muxing;
- int Ndsam_lev_1;
- int Ndsam_lev_2;
- int number_activated_mats_horizontal_direction;
- int number_subbanks;
- int page_size_in_bits;
- double delay_route_to_bank;
- double delay_crossbar;
- double delay_addr_din_horizontal_htree;
- double delay_addr_din_vertical_htree;
- double delay_row_predecode_driver_and_block;
- double delay_row_decoder;
- double delay_bitlines;
- double delay_sense_amp;
- double delay_subarray_output_driver;
- double delay_bit_mux_predecode_driver_and_block;
- double delay_bit_mux_decoder;
- double delay_senseamp_mux_lev_1_predecode_driver_and_block;
- double delay_senseamp_mux_lev_1_decoder;
- double delay_senseamp_mux_lev_2_predecode_driver_and_block;
- double delay_senseamp_mux_lev_2_decoder;
- double delay_input_htree;
- double delay_output_htree;
- double delay_dout_vertical_htree;
- double delay_dout_horizontal_htree;
- double delay_comparator;
- double access_time;
- double cycle_time;
- double multisubbank_interleave_cycle_time;
- double delay_request_network;
- double delay_inside_mat;
- double delay_reply_network;
- double trcd;
- double cas_latency;
- double precharge_delay;
- powerDef power_routing_to_bank;
- powerDef power_addr_input_htree;
- powerDef power_data_input_htree;
- powerDef power_data_output_htree;
- powerDef power_addr_horizontal_htree;
- powerDef power_datain_horizontal_htree;
- powerDef power_dataout_horizontal_htree;
- powerDef power_addr_vertical_htree;
- powerDef power_datain_vertical_htree;
- powerDef power_row_predecoder_drivers;
- powerDef power_row_predecoder_blocks;
- powerDef power_row_decoders;
- powerDef power_bit_mux_predecoder_drivers;
- powerDef power_bit_mux_predecoder_blocks;
- powerDef power_bit_mux_decoders;
- powerDef power_senseamp_mux_lev_1_predecoder_drivers;
- powerDef power_senseamp_mux_lev_1_predecoder_blocks;
- powerDef power_senseamp_mux_lev_1_decoders;
- powerDef power_senseamp_mux_lev_2_predecoder_drivers;
- powerDef power_senseamp_mux_lev_2_predecoder_blocks;
- powerDef power_senseamp_mux_lev_2_decoders;
- powerDef power_bitlines;
- powerDef power_sense_amps;
- powerDef power_prechg_eq_drivers;
- powerDef power_output_drivers_at_subarray;
- powerDef power_dataout_vertical_htree;
- powerDef power_comparators;
- powerDef power_crossbar;
- powerDef total_power;
- double area;
- double all_banks_height;
- double all_banks_width;
- double bank_height;
- double bank_width;
- double subarray_memory_cell_area_height;
- double subarray_memory_cell_area_width;
- double mat_height;
- double mat_width;
- double routing_area_height_within_bank;
- double routing_area_width_within_bank;
- double area_efficiency;
-// double perc_power_dyn_routing_to_bank;
-// double perc_power_dyn_addr_horizontal_htree;
-// double perc_power_dyn_datain_horizontal_htree;
-// double perc_power_dyn_dataout_horizontal_htree;
-// double perc_power_dyn_addr_vertical_htree;
-// double perc_power_dyn_datain_vertical_htree;
-// double perc_power_dyn_row_predecoder_drivers;
-// double perc_power_dyn_row_predecoder_blocks;
-// double perc_power_dyn_row_decoders;
-// double perc_power_dyn_bit_mux_predecoder_drivers;
-// double perc_power_dyn_bit_mux_predecoder_blocks;
-// double perc_power_dyn_bit_mux_decoders;
-// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
-// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
-// double perc_power_dyn_senseamp_mux_lev_1_decoders;
-// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
-// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
-// double perc_power_dyn_senseamp_mux_lev_2_decoders;
-// double perc_power_dyn_bitlines;
-// double perc_power_dyn_sense_amps;
-// double perc_power_dyn_prechg_eq_drivers;
-// double perc_power_dyn_subarray_output_drivers;
-// double perc_power_dyn_dataout_vertical_htree;
-// double perc_power_dyn_comparators;
-// double perc_power_dyn_crossbar;
-// double perc_power_dyn_spent_outside_mats;
-// double perc_power_leak_routing_to_bank;
-// double perc_power_leak_addr_horizontal_htree;
-// double perc_power_leak_datain_horizontal_htree;
-// double perc_power_leak_dataout_horizontal_htree;
-// double perc_power_leak_addr_vertical_htree;
-// double perc_power_leak_datain_vertical_htree;
-// double perc_power_leak_row_predecoder_drivers;
-// double perc_power_leak_row_predecoder_blocks;
-// double perc_power_leak_row_decoders;
-// double perc_power_leak_bit_mux_predecoder_drivers;
-// double perc_power_leak_bit_mux_predecoder_blocks;
-// double perc_power_leak_bit_mux_decoders;
-// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
-// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
-// double perc_power_leak_senseamp_mux_lev_1_decoders;
-// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
-// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
-// double perc_power_leak_senseamp_mux_lev_2_decoders;
-// double perc_power_leak_bitlines;
-// double perc_power_leak_sense_amps;
-// double perc_power_leak_prechg_eq_drivers;
-// double perc_power_leak_subarray_output_drivers;
-// double perc_power_leak_dataout_vertical_htree;
-// double perc_power_leak_comparators;
-// double perc_power_leak_crossbar;
-// double perc_leak_mats;
-// double perc_active_mats;
- double refresh_power;
- double dram_refresh_period;
- double dram_array_availability;
- double dyn_read_energy_from_closed_page;
- double dyn_read_energy_from_open_page;
- double leak_power_subbank_closed_page;
- double leak_power_subbank_open_page;
- double leak_power_request_and_reply_networks;
- double activate_energy;
- double read_energy;
- double write_energy;
- double precharge_energy;
+typedef struct {
+ int Ndwl;
+ int Ndbl;
+ double Nspd;
+ int deg_bl_muxing;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ int number_activated_mats_horizontal_direction;
+ int number_subbanks;
+ int page_size_in_bits;
+ double delay_route_to_bank;
+ double delay_crossbar;
+ double delay_addr_din_horizontal_htree;
+ double delay_addr_din_vertical_htree;
+ double delay_row_predecode_driver_and_block;
+ double delay_row_decoder;
+ double delay_bitlines;
+ double delay_sense_amp;
+ double delay_subarray_output_driver;
+ double delay_bit_mux_predecode_driver_and_block;
+ double delay_bit_mux_decoder;
+ double delay_senseamp_mux_lev_1_predecode_driver_and_block;
+ double delay_senseamp_mux_lev_1_decoder;
+ double delay_senseamp_mux_lev_2_predecode_driver_and_block;
+ double delay_senseamp_mux_lev_2_decoder;
+ double delay_input_htree;
+ double delay_output_htree;
+ double delay_dout_vertical_htree;
+ double delay_dout_horizontal_htree;
+ double delay_comparator;
+ double access_time;
+ double cycle_time;
+ double multisubbank_interleave_cycle_time;
+ double delay_request_network;
+ double delay_inside_mat;
+ double delay_reply_network;
+ double trcd;
+ double cas_latency;
+ double precharge_delay;
+ powerDef power_routing_to_bank;
+ powerDef power_addr_input_htree;
+ powerDef power_data_input_htree;
+ powerDef power_data_output_htree;
+ powerDef power_addr_horizontal_htree;
+ powerDef power_datain_horizontal_htree;
+ powerDef power_dataout_horizontal_htree;
+ powerDef power_addr_vertical_htree;
+ powerDef power_datain_vertical_htree;
+ powerDef power_row_predecoder_drivers;
+ powerDef power_row_predecoder_blocks;
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_predecoder_drivers;
+ powerDef power_bit_mux_predecoder_blocks;
+ powerDef power_bit_mux_decoders;
+ powerDef power_senseamp_mux_lev_1_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_1_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_1_decoders;
+ powerDef power_senseamp_mux_lev_2_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_2_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_2_decoders;
+ powerDef power_bitlines;
+ powerDef power_sense_amps;
+ powerDef power_prechg_eq_drivers;
+ powerDef power_output_drivers_at_subarray;
+ powerDef power_dataout_vertical_htree;
+ powerDef power_comparators;
+ powerDef power_crossbar;
+ powerDef total_power;
+ double area;
+ double all_banks_height;
+ double all_banks_width;
+ double bank_height;
+ double bank_width;
+ double subarray_memory_cell_area_height;
+ double subarray_memory_cell_area_width;
+ double mat_height;
+ double mat_width;
+ double routing_area_height_within_bank;
+ double routing_area_width_within_bank;
+ double area_efficiency;
+ double refresh_power;
+ double dram_refresh_period;
+ double dram_array_availability;
+ double dyn_read_energy_from_closed_page;
+ double dyn_read_energy_from_open_page;
+ double leak_power_subbank_closed_page;
+ double leak_power_subbank_open_page;
+ double leak_power_request_and_reply_networks;
+ double activate_energy;
+ double read_energy;
+ double write_energy;
+ double precharge_energy;
} results_mem_array;
-class uca_org_t
-{
- public:
+class uca_org_t {
+public:
mem_array * tag_array2;
mem_array * data_array2;
double access_time;
@@ -378,7 +333,7 @@ class uca_org_t
void find_cyc();
void adjust_area();//for McPAT only to adjust routing overhead
void cleanup();
- ~uca_org_t(){};
+ ~uca_org_t() {};
};
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
@@ -387,103 +342,62 @@ uca_org_t cacti_interface(const string & infile_name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(InputParameter * const local_interface);
//McPAT's plain interface, please keep !!!
-uca_org_t init_interface(InputParameter * const local_interface);
+uca_org_t init_interface(InputParameter * const local_interface,
+ const string &name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(
- int cache_size,
- int line_size,
- int associativity,
- int rw_ports,
- int excl_read_ports,
- int excl_write_ports,
- int single_ended_read_ports,
- int search_ports,
- int banks,
- double tech_node,
- int output_width,
- int specific_tag,
- int tag_width,
- int access_mode,
- int cache,
- int main_mem,
- int obj_func_delay,
- int obj_func_dynamic_power,
- int obj_func_leakage_power,
- int obj_func_cycle_time,
- int obj_func_area,
- int dev_func_delay,
- int dev_func_dynamic_power,
- int dev_func_leakage_power,
- int dev_func_area,
- int dev_func_cycle_time,
- int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
- int temp,
- int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
- int data_arr_ram_cell_tech_flavor_in,
- int data_arr_peri_global_tech_flavor_in,
- int tag_arr_ram_cell_tech_flavor_in,
- int tag_arr_peri_global_tech_flavor_in,
- int interconnect_projection_type_in,
- int wire_inside_mat_type_in,
- int wire_outside_mat_type_in,
- int REPEATERS_IN_HTREE_SEGMENTS_in,
- int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
- int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
- int PAGE_SIZE_BITS_in,
- int BURST_LENGTH_in,
- int INTERNAL_PREFETCH_WIDTH_in,
- int force_wiretype,
- int wiretype,
- int force_config,
- int ndwl,
- int ndbl,
- int nspd,
- int ndcm,
- int ndsam1,
- int ndsam2,
- int ecc);
-// int cache_size,
-// int line_size,
-// int associativity,
-// int rw_ports,
-// int excl_read_ports,
-// int excl_write_ports,
-// int single_ended_read_ports,
-// int banks,
-// double tech_node,
-// int output_width,
-// int specific_tag,
-// int tag_width,
-// int access_mode,
-// int cache,
-// int main_mem,
-// int obj_func_delay,
-// int obj_func_dynamic_power,
-// int obj_func_leakage_power,
-// int obj_func_area,
-// int obj_func_cycle_time,
-// int dev_func_delay,
-// int dev_func_dynamic_power,
-// int dev_func_leakage_power,
-// int dev_func_area,
-// int dev_func_cycle_time,
-// int temp,
-// int data_arr_ram_cell_tech_flavor_in,
-// int data_arr_peri_global_tech_flavor_in,
-// int tag_arr_ram_cell_tech_flavor_in,
-// int tag_arr_peri_global_tech_flavor_in,
-// int interconnect_projection_type_in,
-// int wire_inside_mat_type_in,
-// int wire_outside_mat_type_in,
-// int REPEATERS_IN_HTREE_SEGMENTS_in,
-// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
-// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
-//// double MAXAREACONSTRAINT_PERC_in,
-//// double MAXACCTIMECONSTRAINT_PERC_in,
-//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
-// int PAGE_SIZE_BITS_in,
-// int BURST_LENGTH_in,
-// int INTERNAL_PREFETCH_WIDTH_in);
+ int cache_size,
+ int line_size,
+ int associativity,
+ int rw_ports,
+ int excl_read_ports,
+ int excl_write_ports,
+ int single_ended_read_ports,
+ int search_ports,
+ int banks,
+ double tech_node,
+ int output_width,
+ int specific_tag,
+ int tag_width,
+ int access_mode,
+ int cache,
+ int main_mem,
+ int obj_func_delay,
+ int obj_func_dynamic_power,
+ int obj_func_leakage_power,
+ int obj_func_cycle_time,
+ int obj_func_area,
+ int dev_func_delay,
+ int dev_func_dynamic_power,
+ int dev_func_leakage_power,
+ int dev_func_area,
+ int dev_func_cycle_time,
+ int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
+ int temp,
+ int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
+ int data_arr_ram_cell_tech_flavor_in,
+ int data_arr_peri_global_tech_flavor_in,
+ int tag_arr_ram_cell_tech_flavor_in,
+ int tag_arr_peri_global_tech_flavor_in,
+ int interconnect_projection_type_in,
+ int wire_inside_mat_type_in,
+ int wire_outside_mat_type_in,
+ int REPEATERS_IN_HTREE_SEGMENTS_in,
+ int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
+ int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
+ int PAGE_SIZE_BITS_in,
+ int BURST_LENGTH_in,
+ int INTERNAL_PREFETCH_WIDTH_in,
+ int force_wiretype,
+ int wiretype,
+ int force_config,
+ int ndwl,
+ int ndbl,
+ int nspd,
+ int ndcm,
+ int ndsam1,
+ int ndsam2,
+ int ecc);
//Naveen's interface
uca_org_t cacti_interface(
@@ -542,91 +456,90 @@ uca_org_t cacti_interface(
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
int p_input);
-class mem_array
-{
- public:
- int Ndcm;
- int Ndwl;
- int Ndbl;
- double Nspd;
- int deg_bl_muxing;
- int Ndsam_lev_1;
- int Ndsam_lev_2;
- double access_time;
- double cycle_time;
- double multisubbank_interleave_cycle_time;
- double area_ram_cells;
- double area;
- powerDef power;
- double delay_senseamp_mux_decoder;
- double delay_before_subarray_output_driver;
- double delay_from_subarray_output_driver_to_output;
- double height;
- double width;
-
- double mat_height;
- double mat_length;
- double subarray_length;
- double subarray_height;
-
- double delay_route_to_bank,
- delay_input_htree,
- delay_row_predecode_driver_and_block,
- delay_row_decoder,
- delay_bitlines,
- delay_sense_amp,
- delay_subarray_output_driver,
- delay_dout_htree,
- delay_comparator,
- delay_matchlines;
-
- double all_banks_height,
- all_banks_width,
- area_efficiency;
-
- powerDef power_routing_to_bank;
- powerDef power_addr_input_htree;
- powerDef power_data_input_htree;
- powerDef power_data_output_htree;
- powerDef power_htree_in_search;
- powerDef power_htree_out_search;
- powerDef power_row_predecoder_drivers;
- powerDef power_row_predecoder_blocks;
- powerDef power_row_decoders;
- powerDef power_bit_mux_predecoder_drivers;
- powerDef power_bit_mux_predecoder_blocks;
- powerDef power_bit_mux_decoders;
- powerDef power_senseamp_mux_lev_1_predecoder_drivers;
- powerDef power_senseamp_mux_lev_1_predecoder_blocks;
- powerDef power_senseamp_mux_lev_1_decoders;
- powerDef power_senseamp_mux_lev_2_predecoder_drivers;
- powerDef power_senseamp_mux_lev_2_predecoder_blocks;
- powerDef power_senseamp_mux_lev_2_decoders;
- powerDef power_bitlines;
- powerDef power_sense_amps;
- powerDef power_prechg_eq_drivers;
- powerDef power_output_drivers_at_subarray;
- powerDef power_dataout_vertical_htree;
- powerDef power_comparators;
-
- powerDef power_cam_bitline_precharge_eq_drv;
- powerDef power_searchline;
- powerDef power_searchline_precharge;
- powerDef power_matchlines;
- powerDef power_matchline_precharge;
- powerDef power_matchline_to_wordline_drv;
-
- min_values_t *arr_min;
- enum Wire_type wt;
-
- // dram stats
- double activate_energy, read_energy, write_energy, precharge_energy,
- refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
- leak_power_request_and_reply_networks;
-
- double precharge_delay;
-
- static bool lt(const mem_array * m1, const mem_array * m2);
+class mem_array {
+public:
+ int Ndcm;
+ int Ndwl;
+ int Ndbl;
+ double Nspd;
+ int deg_bl_muxing;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ double access_time;
+ double cycle_time;
+ double multisubbank_interleave_cycle_time;
+ double area_ram_cells;
+ double area;
+ powerDef power;
+ double delay_senseamp_mux_decoder;
+ double delay_before_subarray_output_driver;
+ double delay_from_subarray_output_driver_to_output;
+ double height;
+ double width;
+
+ double mat_height;
+ double mat_length;
+ double subarray_length;
+ double subarray_height;
+
+ double delay_route_to_bank,
+ delay_input_htree,
+ delay_row_predecode_driver_and_block,
+ delay_row_decoder,
+ delay_bitlines,
+ delay_sense_amp,
+ delay_subarray_output_driver,
+ delay_dout_htree,
+ delay_comparator,
+ delay_matchlines;
+
+ double all_banks_height,
+ all_banks_width,
+ area_efficiency;
+
+ powerDef power_routing_to_bank;
+ powerDef power_addr_input_htree;
+ powerDef power_data_input_htree;
+ powerDef power_data_output_htree;
+ powerDef power_htree_in_search;
+ powerDef power_htree_out_search;
+ powerDef power_row_predecoder_drivers;
+ powerDef power_row_predecoder_blocks;
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_predecoder_drivers;
+ powerDef power_bit_mux_predecoder_blocks;
+ powerDef power_bit_mux_decoders;
+ powerDef power_senseamp_mux_lev_1_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_1_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_1_decoders;
+ powerDef power_senseamp_mux_lev_2_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_2_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_2_decoders;
+ powerDef power_bitlines;
+ powerDef power_sense_amps;
+ powerDef power_prechg_eq_drivers;
+ powerDef power_output_drivers_at_subarray;
+ powerDef power_dataout_vertical_htree;
+ powerDef power_comparators;
+
+ powerDef power_cam_bitline_precharge_eq_drv;
+ powerDef power_searchline;
+ powerDef power_searchline_precharge;
+ powerDef power_matchlines;
+ powerDef power_matchline_precharge;
+ powerDef power_matchline_to_wordline_drv;
+
+ min_values_t *arr_min;
+ enum Wire_type wt;
+
+ // dram stats
+ double activate_energy, read_energy, write_energy, precharge_energy,
+ refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
+ leak_power_request_and_reply_networks;
+
+ double precharge_delay;
+
+ static bool lt(const mem_array * m1, const mem_array * m2);
};
diff --git a/ext/mcpat/cacti/component.cc b/ext/mcpat/cacti/component.cc
index 733108407..90e9baedf 100644
--- a/ext/mcpat/cacti/component.cc
+++ b/ext/mcpat/cacti/component.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -45,34 +46,30 @@ using namespace std;
Component::Component()
- :area(), power(), rt_power(),delay(0)
-{
+ : area(), power(), rt_power(), delay(0) {
}
-Component::~Component()
-{
+Component::~Component() {
}
-double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
-{
- double w_poly = g_ip->F_sz_um;
- double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
- double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
- num_stacked_in * w_poly +
- (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
+double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) {
+ double w_poly = g_ip->F_sz_um;
+ double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
+ double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
+ num_stacked_in * w_poly +
+ (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
- if (num_folded_tr > 1)
- {
- total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
- (num_folded_tr - 1) * num_stacked_in * w_poly +
- (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
- }
+ if (num_folded_tr > 1) {
+ total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
+ (num_folded_tr - 1) * num_stacked_in * w_poly +
+ (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
+ }
- return total_diff_w;
+ return total_diff_w;
}
@@ -82,105 +79,96 @@ double Component::compute_gate_area(
int num_inputs,
double w_pmos,
double w_nmos,
- double h_gate)
-{
- if (w_pmos <= 0.0 || w_nmos <= 0.0)
- {
- return 0.0;
- }
-
- double w_folded_pmos, w_folded_nmos;
- int num_folded_pmos, num_folded_nmos;
- double total_ndiff_w, total_pdiff_w;
- Area gate;
-
- double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
- double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
-
- if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
- {
- return 0.0;
- }
-
- w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
- w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
- assert(w_folded_pmos > 0);
-
- num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
- num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
-
- switch (gate_type)
- {
+ double h_gate) {
+ if (w_pmos <= 0.0 || w_nmos <= 0.0) {
+ return 0.0;
+ }
+
+ double w_folded_pmos, w_folded_nmos;
+ int num_folded_pmos, num_folded_nmos;
+ double total_ndiff_w, total_pdiff_w;
+ Area gate;
+
+ double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
+ double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
+
+ if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) {
+ return 0.0;
+ }
+
+ w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
+ w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
+ assert(w_folded_pmos > 0);
+
+ num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
+ num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
+
+ switch (gate_type) {
case INV:
- total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
- total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
- break;
+ total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
+ break;
case NOR:
- total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
- total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
- break;
+ total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
+ break;
case NAND:
- total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
- total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
- break;
+ total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
+ break;
default:
- cout << "Unknown gate type: " << gate_type << endl;
- exit(1);
- }
-
- gate.w = MAX(total_ndiff_w, total_pdiff_w);
-
- if (w_folded_nmos > w_nmos)
- {
- //means that the height of the gate can
- //be made smaller than the input height specified, so calculate the height of the gate.
- gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
- }
- else
- {
- gate.h = h_gate;
- }
- return gate.get_area();
+ cout << "Unknown gate type: " << gate_type << endl;
+ exit(1);
+ }
+
+ gate.w = MAX(total_ndiff_w, total_pdiff_w);
+
+ if (w_folded_nmos > w_nmos) {
+ //means that the height of the gate can
+ //be made smaller than the input height specified, so calculate the height of the gate.
+ gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
+ } else {
+ gate.h = h_gate;
+ }
+ return gate.get_area();
}
double Component::compute_tr_width_after_folding(
double input_width,
- double threshold_folding_width)
-{//This is actually the width of the cell not the width of a device.
-//The width of a cell and the width of a device is orthogonal.
- if (input_width <= 0)
- {
- return 0;
- }
-
- int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
- double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
- double width_poly = g_ip->F_sz_um;
- double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
-
- return total_diff_width;
+ double threshold_folding_width) {
+ //This is actually the width of the cell not the width of a device.
+ //The width of a cell and the width of a device is orthogonal.
+ if (input_width <= 0) {
+ return 0;
+ }
+
+ int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
+ double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
+ double width_poly = g_ip->F_sz_um;
+ double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
+
+ return total_diff_width;
}
-double Component::height_sense_amplifier(double pitch_sense_amp)
-{
- // compute the height occupied by all PMOS transistors
- double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
- compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
- 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
+double Component::height_sense_amplifier(double pitch_sense_amp) {
+ // compute the height occupied by all PMOS transistors
+ double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
+ compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
+ 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
- // compute the height occupied by all NMOS transistors
- double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
- compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
- 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
+ // compute the height occupied by all NMOS transistors
+ double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
+ compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
+ 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
- // compute total height by considering gap between the p and n diffusion areas
- return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
+ // compute total height by considering gap between the p and n diffusion areas
+ return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
}
@@ -195,42 +183,39 @@ int Component::logical_effort(
double p_to_n_sz_ratio,
bool is_dram_,
bool is_wl_tr_,
- double max_w_nmos)
-{
- int num_gates = (int) (log(F) / log(fopt));
-
- // check if num_gates is odd. if so, add 1 to make it even
- num_gates+= (num_gates % 2) ? 1 : 0;
- num_gates = MAX(num_gates, num_gates_min);
-
- // recalculate the effective fanout of each stage
- double f = pow(F, 1.0 / num_gates);
- int i = num_gates - 1;
- double C_in = C_load / f;
- w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
- w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
- w_p[i] = p_to_n_sz_ratio * w_n[i];
-
- if (w_n[i] > max_w_nmos)
- {
- double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
- F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
- num_gates = (int) (log(F) / log(fopt)) + 1;
- num_gates+= (num_gates % 2) ? 1 : 0;
+ double max_w_nmos) {
+ int num_gates = (int) (log(F) / log(fopt));
+
+ // check if num_gates is odd. if so, add 1 to make it even
+ num_gates += (num_gates % 2) ? 1 : 0;
num_gates = MAX(num_gates, num_gates_min);
- f = pow(F, 1.0 / (num_gates - 1));
- i = num_gates - 1;
- w_n[i] = max_w_nmos;
- w_p[i] = p_to_n_sz_ratio * w_n[i];
- }
- for (i = num_gates - 2; i >= 1; i--)
- {
- w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
- w_p[i] = p_to_n_sz_ratio * w_n[i];
- }
+ // recalculate the effective fanout of each stage
+ double f = pow(F, 1.0 / num_gates);
+ int i = num_gates - 1;
+ double C_in = C_load / f;
+ w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
+ w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
- assert(num_gates <= MAX_NUMBER_GATES_STAGE);
- return num_gates;
+ if (w_n[i] > max_w_nmos) {
+ double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
+ F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
+ num_gates = (int) (log(F) / log(fopt)) + 1;
+ num_gates += (num_gates % 2) ? 1 : 0;
+ num_gates = MAX(num_gates, num_gates_min);
+ f = pow(F, 1.0 / (num_gates - 1));
+ i = num_gates - 1;
+ w_n[i] = max_w_nmos;
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+ }
+
+ for (i = num_gates - 2; i >= 1; i--) {
+ w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+ }
+
+ assert(num_gates <= MAX_NUMBER_GATES_STAGE);
+ return num_gates;
}
diff --git a/ext/mcpat/cacti/component.h b/ext/mcpat/cacti/component.h
index 75e2cb075..416e4e8e5 100644
--- a/ext/mcpat/cacti/component.h
+++ b/ext/mcpat/cacti/component.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -42,41 +43,32 @@ using namespace std;
class Crossbar;
class Bank;
-class Component
-{
- public:
+class Component {
+public:
Component();
~Component();
Area area;
- powerDef power,rt_power;
+ // TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE
+ // VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS
+ // MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER
+ // CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS
+ powerDef power, rt_power;
double delay;
double cycle_time;
- double compute_gate_area(
- int gate_type,
- int num_inputs,
- double w_pmos,
- double w_nmos,
- double h_gate);
-
- double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
+ double compute_gate_area(int gate_type, int num_inputs, double w_pmos,
+ double w_nmos, double h_gate);
+ double compute_tr_width_after_folding(double input_width,
+ double threshold_folding_width);
double height_sense_amplifier(double pitch_sense_amp);
- protected:
- int logical_effort(
- int num_gates_min,
- double g,
- double F,
- double * w_n,
- double * w_p,
- double C_load,
- double p_to_n_sz_ratio,
- bool is_dram_,
- bool is_wl_tr_,
- double max_w_nmos);
+protected:
+ int logical_effort(int num_gates_min, double g, double F, double * w_n,
+ double * w_p, double C_load, double p_to_n_sz_ratio,
+ bool is_dram_, bool is_wl_tr_, double max_w_nmos);
- private:
+private:
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
};
diff --git a/ext/mcpat/cacti/const.h b/ext/mcpat/cacti/const.h
index aef7d019b..c9b3905bf 100644
--- a/ext/mcpat/cacti/const.h
+++ b/ext/mcpat/cacti/const.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -249,21 +250,20 @@ const double bit_to_byte = 8.0;
// v : vertical or velocity
-enum ram_cell_tech_type_num
-{
- itrs_hp = 0,
- itrs_lstp = 1,
- itrs_lop = 2,
- lp_dram = 3,
- comm_dram = 4
+enum ram_cell_tech_type_num {
+ itrs_hp = 0,
+ itrs_lstp = 1,
+ itrs_lop = 2,
+ lp_dram = 3,
+ comm_dram = 4
};
-const double pppm[4] = {1,1,1,1};
-const double pppm_lkg[4] = {0,1,1,0};
-const double pppm_dyn[4] = {1,0,0,0};
-const double pppm_Isub[4] = {0,1,0,0};
-const double pppm_Ig[4] = {0,0,1,0};
-const double pppm_sc[4] = {0,0,0,1};
+const double pppm[4] = {1, 1, 1, 1};
+const double pppm_lkg[4] = {0, 1, 1, 0};
+const double pppm_dyn[4] = {1, 0, 0, 0};
+const double pppm_Isub[4] = {0, 1, 0, 0};
+const double pppm_Ig[4] = {0, 0, 1, 0};
+const double pppm_sc[4] = {0, 0, 0, 1};
diff --git a/ext/mcpat/cacti/crossbar.cc b/ext/mcpat/cacti/crossbar.cc
index a3d8532d5..ef2a373d6 100644
--- a/ext/mcpat/cacti/crossbar.cc
+++ b/ext/mcpat/cacti/crossbar.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,123 +40,140 @@ Crossbar::Crossbar(
double n_out_,
double flit_size_,
TechnologyParameter::DeviceType *dt
- ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
-{
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- Vdd = dt->Vdd;
- CB_ADJ = 1;
+): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ CB_ADJ = 1;
}
-Crossbar::~Crossbar(){}
+Crossbar::~Crossbar() {}
-double Crossbar::output_buffer()
-{
+double Crossbar::output_buffer() {
- //Wire winit(4, 4);
- double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
- Wire w1(g_ip->wt, l_eff);
- //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
- double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
- TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
- TriS2 = s1; //driver transistor
+ //Wire winit(4, 4);
+ double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
+ Wire w1(g_ip->wt, l_eff);
+ //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
+ double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
+ l_eff * ADJ / w1.repeater_spacing : ADJ);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
+ TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
+ TriS2 = s1; //driver transistor
- if (TriS1 < 1)
- TriS1 = 1;
+ if (TriS1 < 1)
+ TriS1 = 1;
- double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
- gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
+ double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) +
+ gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
// gate_C(TriS2*min_w_pmos, 0);
- tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
- gate_C(TriS2*g_tp.min_w_nmos_, 0)+
- drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
- drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(TriS2*min_w_pmos, 0);
- double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
- double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
-
- tri_inp_cap = input_cap;
- tri_out_cap = output_cap;
- tri_ctr_cap = ctr_cap;
- return input_cap + output_cap + ctr_cap;
+ tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
+ gate_C(TriS2 * g_tp.min_w_nmos_, 0) +
+ drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
+ drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(TriS2 * min_w_pmos, 0);
+ double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
+ g_tp.cell_h_def) +
+ drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
+ double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
+
+ tri_inp_cap = input_cap;
+ tri_out_cap = output_cap;
+ tri_ctr_cap = ctr_cap;
+ return input_cap + output_cap + ctr_cap;
}
-void Crossbar::compute_power()
-{
-
- Wire winit(4, 4);
- double tri_cap = output_buffer();
- assert(tri_cap > 0);
- //area of a tristate logic
- double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
- g_area *= 2; // to model area of output transistors
- g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
- g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
- double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
- // effective no. of tristate buffers that need to be laid side by side
- int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
- double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
- Wire w1(g_ip->wt, wire_len);
-
- area.w = wire_len;
- area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
- Wire w2(g_ip->wt, area.h);
-
- double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
- if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
-
- if (aspect_ratio_cb < ASPECT_THRESHOLD) {
- if (n_out > 2 && n_inp > 2) {
- CB_ADJ+=0.2;
- //cout << "CB ADJ " << CB_ADJ << endl;
- if (CB_ADJ < 4) {
- this->compute_power();
- }
+void Crossbar::compute_power() {
+
+ Wire winit(4, 4);
+ double tri_cap = output_buffer();
+ assert(tri_cap > 0);
+ //area of a tristate logic
+ double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
+ TriS2 * min_w_pmos, g_tp.cell_h_def);
+ g_area *= 2; // to model area of output transistors
+ g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
+ TriS1 * min_w_pmos, g_tp.cell_h_def);
+ g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
+ TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
+ double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
+ // effective no. of tristate buffers that need to be laid side by side
+ int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
+ double wire_len = MAX(width * ntri * n_out,
+ flit_size * g_tp.wire_outside_mat.pitch * n_out);
+ Wire w1(g_ip->wt, wire_len);
+
+ area.w = wire_len;
+ area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ;
+ Wire w2(g_ip->wt, area.h);
+
+ double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp);
+ if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
+
+ if (aspect_ratio_cb < ASPECT_THRESHOLD) {
+ if (n_out > 2 && n_inp > 2) {
+ CB_ADJ += 0.2;
+ //cout << "CB ADJ " << CB_ADJ << endl;
+ if (CB_ADJ < 4) {
+ this->compute_power();
+ }
+ }
}
- }
-
-
-
- power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
- power.readOp.leakage = n_inp * n_out * flit_size * (
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
- w1.power.readOp.leakage + w2.power.readOp.leakage);
- power.readOp.gate_leakage = n_inp * n_out * flit_size * (
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
- w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
-
- // delay calculation
- double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
- Wire wdriver(g_ip->wt, l_eff);
- double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
- double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
- delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
-
- Wire wreset();
+
+
+
+ power.readOp.dynamic =
+ (w1.power.readOp.dynamic + w2.power.readOp.dynamic +
+ (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
+ tri_int_cap) * Vdd * Vdd) * flit_size;
+ power.readOp.leakage = n_inp * n_out * flit_size * (
+ cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
+ 1, inv) * Vdd +
+ cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nand) * Vdd +
+ cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nor) * Vdd +
+ w1.power.readOp.leakage + w2.power.readOp.leakage);
+ power.readOp.gate_leakage = n_inp * n_out * flit_size * (
+ cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
+ 1, inv) * Vdd +
+ cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nand) * Vdd +
+ cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nor) * Vdd +
+ w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
+
+ // delay calculation
+ double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
+ Wire wdriver(g_ip->wt, l_eff);
+ double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
+ tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
+ double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
+ tri_inp_cap + n_inp * tri_out_cap;
+ delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
+ deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
+
+ Wire wreset();
}
-void Crossbar::print_crossbar()
-{
- cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
- cout << "Flit size : " << flit_size << " bits" << endl;
- cout << "Width : " << area.w << " u" << endl;
- cout << "Height : " << area.h << " u" << endl;
- cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
- cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
- cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
- cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
+void Crossbar::print_crossbar() {
+ cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Width : " << area.w << " u" << endl;
+ cout << "Height : " << area.h << " u" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
+ MIN(n_inp, n_out) << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
+ << endl;
+ cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
+ << " (mW)" << endl;
+ cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
}
diff --git a/ext/mcpat/cacti/crossbar.h b/ext/mcpat/cacti/crossbar.h
index 3b926517c..b8de7547b 100644
--- a/ext/mcpat/cacti/crossbar.h
+++ b/ext/mcpat/cacti/crossbar.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -44,14 +45,13 @@
#include "parameter.h"
#include "wire.h"
-class Crossbar : public Component
-{
- public:
+class Crossbar : public Component {
+public:
Crossbar(
- double in,
- double out,
- double flit_sz,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ double in,
+ double out,
+ double flit_sz,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
~Crossbar();
void print_crossbar();
@@ -62,18 +62,18 @@ class Crossbar : public Component
double flit_size;
double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
- private:
- double CB_ADJ;
- /*
- * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
- * buffer is adjusted to get an aspect ratio of whole cross bar close to one;
- * when adjust the ratio, the number of wires route over the tri-state buffers does not change,
- * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
- * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
- * will increase. As a result, the height of the crossbar (area.h) will increase.
- */
-
- TechnologyParameter::DeviceType *deviceType;
+private:
+ double CB_ADJ;
+ /*
+ * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
+ * buffer is adjusted to get an aspect ratio of whole cross bar close to one;
+ * when adjust the ratio, the number of wires route over the tri-state buffers does not change,
+ * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
+ * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
+ * will increase. As a result, the height of the crossbar (area.h) will increase.
+ */
+
+ TechnologyParameter::DeviceType *deviceType;
double TriS1, TriS2;
double min_w_pmos, Vdd;
diff --git a/ext/mcpat/cacti/decoder.cc b/ext/mcpat/cacti/decoder.cc
index 0de6f6157..7fa66b4ff 100644
--- a/ext/mcpat/cacti/decoder.cc
+++ b/ext/mcpat/cacti/decoder.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -51,207 +52,184 @@ Decoder::Decoder(
bool is_dram_,
bool is_wl_tr_,
const Area & cell_)
-:exist(false),
- C_ld_dec_out(_C_ld_dec_out),
- R_wire_dec_out(_R_wire_dec_out),
- num_gates(0), num_gates_min(2),
- delay(0),
- //power(),
- fully_assoc(fully_assoc_), is_dram(is_dram_),
- is_wl_tr(is_wl_tr_), cell(cell_)
-{
-
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- w_dec_n[i] = 0;
- w_dec_p[i] = 0;
- }
-
- /*
- * _num_dec_signals is the number of decoded signal as output
- * num_addr_bits_dec is the number of signal to be decoded
- * as the decoders input.
- */
- int num_addr_bits_dec = _log2(_num_dec_signals);
-
- if (num_addr_bits_dec < 4)
- {
- if (flag_way_select)
- {
- exist = true;
- num_in_signals = 2;
+ : exist(false),
+ C_ld_dec_out(_C_ld_dec_out),
+ R_wire_dec_out(_R_wire_dec_out),
+ num_gates(0), num_gates_min(2),
+ delay(0),
+ //power(),
+ fully_assoc(fully_assoc_), is_dram(is_dram_),
+ is_wl_tr(is_wl_tr_), cell(cell_) {
+
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ w_dec_n[i] = 0;
+ w_dec_p[i] = 0;
}
- else
- {
- num_in_signals = 0;
- }
- }
- else
- {
- exist = true;
- if (flag_way_select)
- {
- num_in_signals = 3;
- }
- else
- {
- num_in_signals = 2;
+ /*
+ * _num_dec_signals is the number of decoded signal as output
+ * num_addr_bits_dec is the number of signal to be decoded
+ * as the decoders input.
+ */
+ int num_addr_bits_dec = _log2(_num_dec_signals);
+
+ if (num_addr_bits_dec < 4) {
+ if (flag_way_select) {
+ exist = true;
+ num_in_signals = 2;
+ } else {
+ num_in_signals = 0;
+ }
+ } else {
+ exist = true;
+
+ if (flag_way_select) {
+ num_in_signals = 3;
+ } else {
+ num_in_signals = 2;
+ }
}
- }
- assert(cell.h>0);
- assert(cell.w>0);
- // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
- //area.h = 4 * cell.h;
- area.h = g_tp.h_dec * cell.h;
+ assert(cell.h > 0);
+ assert(cell.w > 0);
+ // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
+ //area.h = 4 * cell.h;
+ area.h = g_tp.h_dec * cell.h;
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void Decoder::compute_widths()
-{
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
-
- if (exist)
- {
- if (num_in_signals == 2 || fully_assoc)
- {
- w_dec_n[0] = 2 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2;
+void Decoder::compute_widths() {
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+
+ if (exist) {
+ if (num_in_signals == 2 || fully_assoc) {
+ w_dec_n[0] = 2 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2;
+ } else {
+ w_dec_n[0] = 3 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+
+ F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
+ gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
+ num_gates = logical_effort(
+ num_gates_min,
+ num_in_signals == 2 ? gnand2 : gnand3,
+ F,
+ w_dec_n,
+ w_dec_p,
+ C_ld_dec_out,
+ p_to_n_sz_ratio,
+ is_dram,
+ is_wl_tr,
+ g_tp.max_w_nmos_dec);
}
- else
- {
- w_dec_n[0] = 3 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3;
- }
-
- F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
- gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
- num_gates = logical_effort(
- num_gates_min,
- num_in_signals == 2 ? gnand2 : gnand3,
- F,
- w_dec_n,
- w_dec_p,
- C_ld_dec_out,
- p_to_n_sz_ratio,
- is_dram,
- is_wl_tr,
- g_tp.max_w_nmos_dec);
- }
}
-void Decoder::compute_area()
-{
- double cumulative_area = 0;
- double cumulative_curr = 0; // cumulative leakage current
- double cumulative_curr_Ig = 0; // cumulative leakage current
-
- if (exist)
- { // First check if this decoder exists
- if (num_in_signals == 2)
- {
- cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- }
- else if (num_in_signals == 3)
- {
- cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
- }
-
- for (int i = 1; i < num_gates; i++)
- {
- cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
- cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+void Decoder::compute_area() {
+ double cumulative_area = 0;
+ double cumulative_curr = 0; // cumulative leakage current
+ double cumulative_curr_Ig = 0; // cumulative leakage current
+
+ if (exist) { // First check if this decoder exists
+ if (num_in_signals == 2) {
+ cumulative_area =
+ compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr =
+ cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
+ } else if (num_in_signals == 3) {
+ cumulative_area =
+ compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr =
+ cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
+ }
+
+ for (int i = 1; i < num_gates; i++) {
+ cumulative_area +=
+ compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
+ cumulative_curr +=
+ cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ }
+ power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
+
+ area.w = (cumulative_area / area.h);
}
- power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
- power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
-
- area.w = (cumulative_area / area.h);
- }
}
-double Decoder::compute_delays(double inrisetime)
-{
- if (exist)
- {
- double ret_val = 0; // outrisetime
- int i;
- double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
- double Vdd = g_tp.peri_global.Vdd;
+double Decoder::compute_delays(double inrisetime) {
+ if (exist) {
+ double ret_val = 0; // outrisetime
+ int i;
+ double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
+ double Vdd = g_tp.peri_global.Vdd;
- if ((is_wl_tr) && (is_dram))
- {
- Vpp = g_tp.vpp;
- }
- else if (is_wl_tr)
- {
- Vpp = g_tp.sram_cell.Vdd;
- }
- else
- {
- Vpp = g_tp.peri_global.Vdd;
- }
+ if ((is_wl_tr) && (is_dram)) {
+ Vpp = g_tp.vpp;
+ } else if (is_wl_tr) {
+ Vpp = g_tp.sram_cell.Vdd;
+ } else {
+ Vpp = g_tp.peri_global.Vdd;
+ }
- // first check whether a decoder is required at all
- rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
- drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-
- for (i = 1; i < num_gates - 1; ++i)
- {
- rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
- drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ // first check whether a decoder is required at all
+ rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
+ drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ for (i = 1; i < num_gates - 1; ++i) {
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ }
+
+ // add delay of final inverter that drives the wordline
+ i = num_gates - 1;
+ c_load = C_ld_dec_out;
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ ret_val = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
+
+ return ret_val;
+ } else {
+ return 0.0;
}
-
- // add delay of final inverter that drives the wordline
- i = num_gates - 1;
- c_load = C_ld_dec_out;
- rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
- drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- ret_val = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
-
- return ret_val;
- }
- else
- {
- return 0.0;
- }
}
void Decoder::leakage_feedback(double temperature)
@@ -291,610 +269,568 @@ PredecBlk::PredecBlk(
int num_dec_per_predec,
bool is_dram,
bool is_blk1)
- :dec(dec_),
- exist(false),
- number_input_addr_bits(0),
- C_ld_predec_blk_out(0),
- R_wire_predec_blk_out(0),
- branch_effort_nand2_gate_output(1),
- branch_effort_nand3_gate_output(1),
- flag_two_unique_paths(false),
- flag_L2_gate(0),
- number_inputs_L1_gate(0),
- number_gates_L1_nand2_path(0),
- number_gates_L1_nand3_path(0),
- number_gates_L2(0),
- min_number_gates_L1(2),
- min_number_gates_L2(2),
- num_L1_active_nand2_path(0),
- num_L1_active_nand3_path(0),
- delay_nand2_path(0),
- delay_nand3_path(0),
- power_nand2_path(),
- power_nand3_path(),
- power_L2(),
- is_dram_(is_dram)
-{
- int branch_effort_predec_out;
- double C_ld_dec_gate;
- int num_addr_bits_dec = _log2(num_dec_signals);
- int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
- int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
-
- w_L1_nand2_n[0] = 0;
- w_L1_nand2_p[0] = 0;
- w_L1_nand3_n[0] = 0;
- w_L1_nand3_p[0] = 0;
-
- if (is_blk1 == true)
- {
- if (num_addr_bits_dec <= 0)
- {
- return;
+ : dec(dec_),
+ exist(false),
+ number_input_addr_bits(0),
+ C_ld_predec_blk_out(0),
+ R_wire_predec_blk_out(0),
+ branch_effort_nand2_gate_output(1),
+ branch_effort_nand3_gate_output(1),
+ flag_two_unique_paths(false),
+ flag_L2_gate(0),
+ number_inputs_L1_gate(0),
+ number_gates_L1_nand2_path(0),
+ number_gates_L1_nand3_path(0),
+ number_gates_L2(0),
+ min_number_gates_L1(2),
+ min_number_gates_L2(2),
+ num_L1_active_nand2_path(0),
+ num_L1_active_nand3_path(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ power_L2(),
+ is_dram_(is_dram) {
+ int branch_effort_predec_out;
+ double C_ld_dec_gate;
+ int num_addr_bits_dec = _log2(num_dec_signals);
+ int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
+ int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
+
+ w_L1_nand2_n[0] = 0;
+ w_L1_nand2_p[0] = 0;
+ w_L1_nand3_n[0] = 0;
+ w_L1_nand3_p[0] = 0;
+
+ if (is_blk1 == true) {
+ if (num_addr_bits_dec <= 0) {
+ return;
+ } else if (num_addr_bits_dec < 4) {
+ // Just one predecoder block is required with NAND2 gates. No decoder required.
+ // The first level of predecoding directly drives the decoder output load
+ exist = true;
+ number_input_addr_bits = num_addr_bits_dec;
+ R_wire_predec_blk_out = dec->R_wire_dec_out;
+ C_ld_predec_blk_out = dec->C_ld_dec_out;
+ } else {
+ exist = true;
+ number_input_addr_bits = blk1_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
+ } else {
+ if (num_addr_bits_dec >= 4) {
+ exist = true;
+ number_input_addr_bits = blk2_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
}
- else if (num_addr_bits_dec < 4)
- {
- // Just one predecoder block is required with NAND2 gates. No decoder required.
- // The first level of predecoding directly drives the decoder output load
- exist = true;
- number_input_addr_bits = num_addr_bits_dec;
- R_wire_predec_blk_out = dec->R_wire_dec_out;
- C_ld_predec_blk_out = dec->C_ld_dec_out;
- }
- else
- {
- exist = true;
- number_input_addr_bits = blk1_num_input_addr_bits;
- branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
- C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
- R_wire_predec_blk_out = R_wire_predec_blk_out_;
- C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
- }
- }
- else
- {
- if (num_addr_bits_dec >= 4)
- {
- exist = true;
- number_input_addr_bits = blk2_num_input_addr_bits;
- branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
- C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
- R_wire_predec_blk_out = R_wire_predec_blk_out_;
- C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
- }
- }
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void PredecBlk::compute_widths()
-{
- double F, c_load_nand3_path, c_load_nand2_path;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+void PredecBlk::compute_widths() {
+ double F, c_load_nand3_path, c_load_nand2_path;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- if (exist == false) return;
+ if (exist == false) return;
- switch (number_input_addr_bits)
- {
+ switch (number_input_addr_bits) {
case 1:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 0;
- break;
- case 2:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 0;
- break;
- case 3:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 0;
- break;
- case 4:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 2;
- branch_effort_nand2_gate_output = 4;
- break;
- case 5:
- flag_two_unique_paths = true;
- flag_L2_gate = 2;
- branch_effort_nand2_gate_output = 8;
- branch_effort_nand3_gate_output = 4;
- break;
- case 6:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 2;
- branch_effort_nand3_gate_output = 8;
- break;
- case 7:
- flag_two_unique_paths = true;
- flag_L2_gate = 3;
- branch_effort_nand2_gate_output = 32;
- branch_effort_nand3_gate_output = 16;
- break;
- case 8:
- flag_two_unique_paths = true;
- flag_L2_gate = 3;
- branch_effort_nand2_gate_output = 64;
- branch_effort_nand3_gate_output = 32;
- break;
- case 9:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 3;
- branch_effort_nand3_gate_output = 64;
- break;
- default:
- assert(0);
- break;
- }
-
- // find the number of gates and sizing in second level of predecoder (if there is a second level)
- if (flag_L2_gate)
- {
- if (flag_L2_gate == 2)
- { // 2nd level is a NAND2 gate
- w_L2_n[0] = 2 * g_tp.min_w_nmos_;
- F = gnand2;
- }
- else
- { // 2nd level is a NAND3 gate
- w_L2_n[0] = 3 * g_tp.min_w_nmos_;
- F = gnand3;
- }
- w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- number_gates_L2 = logical_effort(
- min_number_gates_L2,
- flag_L2_gate == 2 ? gnand2 : gnand3,
- F,
- w_L2_n,
- w_L2_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
-
- // Now find the number of gates and widths in first level of predecoder
- if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
- { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
- // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
- // a NAND2 gate is used in the first level of the predecoder
- c_load_nand2_path = branch_effort_nand2_gate_output *
- (gate_C(w_L2_n[0], 0, is_dram_) +
- gate_C(w_L2_p[0], 0, is_dram_));
- w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
- w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2 * c_load_nand2_path /
- (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
- gate_C(w_L1_nand2_p[0], 0, is_dram_));
- number_gates_L1_nand2_path = logical_effort(
- min_number_gates_L1,
- gnand2,
- F,
- w_L1_nand2_n,
- w_L1_nand2_p,
- c_load_nand2_path,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
-
- //Now find widths of gates along path in which first gate is a NAND3
- if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
- { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
- // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
- // a NAND3 gate is used in the first level of the predecoder
- c_load_nand3_path = branch_effort_nand3_gate_output *
- (gate_C(w_L2_n[0], 0, is_dram_) +
- gate_C(w_L2_p[0], 0, is_dram_));
- w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
- w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3 * c_load_nand3_path /
- (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
- gate_C(w_L1_nand3_p[0], 0, is_dram_));
- number_gates_L1_nand3_path = logical_effort(
- min_number_gates_L1,
- gnand3,
- F,
- w_L1_nand3_n,
- w_L1_nand3_p,
- c_load_nand3_path,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- }
- else
- { // find number of gates and widths in first level of predecoder block when there is no second level
- if (number_inputs_L1_gate == 2)
- {
- w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
- w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2*C_ld_predec_blk_out /
- (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
- gate_C(w_L1_nand2_p[0], 0, is_dram_));
- number_gates_L1_nand2_path = logical_effort(
- min_number_gates_L1,
- gnand2,
- F,
- w_L1_nand2_n,
- w_L1_nand2_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- else if (number_inputs_L1_gate == 3)
- {
- w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
- w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3*C_ld_predec_blk_out /
- (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
- gate_C(w_L1_nand3_p[0], 0, is_dram_));
- number_gates_L1_nand3_path = logical_effort(
- min_number_gates_L1,
- gnand3,
- F,
- w_L1_nand3_n,
- w_L1_nand3_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- }
-}
-
-
-
-void PredecBlk::compute_area()
-{
- if (exist)
- { // First check whether a predecoder block is needed
- int num_L1_nand2 = 0;
- int num_L1_nand3 = 0;
- int num_L2 = 0;
- double tot_area_L1_nand3 =0;
- double leak_L1_nand3 =0;
- double gate_leak_L1_nand3 =0;
-
- double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
- double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
- double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
- if (number_inputs_L1_gate != 3) {
- tot_area_L1_nand3 = 0;
- leak_L1_nand3 = 0;
- gate_leak_L1_nand3 =0;
- }
- else {
- tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
- leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
- gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
- }
-
- switch (number_input_addr_bits)
- {
- case 1: //2 NAND2 gates
- num_L1_nand2 = 2;
- num_L2 = 0;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =0;
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
break;
- case 2: //4 NAND2 gates
- num_L1_nand2 = 4;
- num_L2 = 0;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =0;
+ case 2:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
break;
- case 3: //8 NAND3 gates
- num_L1_nand3 = 8;
- num_L2 = 0;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =1;
+ case 3:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 0;
break;
- case 4: //4 + 4 NAND2 gates
- num_L1_nand2 = 8;
- num_L2 = 16;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =0;
+ case 4:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 4;
break;
- case 5: //4 NAND2 gates, 8 NAND3 gates
- num_L1_nand2 = 4;
- num_L1_nand3 = 8;
- num_L2 = 32;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =1;
+ case 5:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 8;
+ branch_effort_nand3_gate_output = 4;
break;
- case 6: //8 + 8 NAND3 gates
- num_L1_nand3 = 16;
- num_L2 = 64;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =2;
+ case 6:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 2;
+ branch_effort_nand3_gate_output = 8;
break;
- case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
- num_L1_nand2 = 8;
- num_L1_nand3 = 8;
- num_L2 = 128;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =1;
+ case 7:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 32;
+ branch_effort_nand3_gate_output = 16;
break;
- case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
- num_L1_nand2 = 4;
- num_L1_nand3 = 16;
- num_L2 = 256;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =2;
+ case 8:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 64;
+ branch_effort_nand3_gate_output = 32;
break;
- case 9: //8 + 8 + 8 NAND3 gates
- num_L1_nand3 = 24;
- num_L2 = 512;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =3;
+ case 9:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 3;
+ branch_effort_nand3_gate_output = 64;
break;
- default:
+ default:
+ assert(0);
break;
}
- for (int i = 1; i < number_gates_L1_nand2_path; ++i)
- {
- tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
- leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
- gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ // find the number of gates and sizing in second level of predecoder (if there is a second level)
+ if (flag_L2_gate) {
+ if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
+ w_L2_n[0] = 2 * g_tp.min_w_nmos_;
+ F = gnand2;
+ } else { // 2nd level is a NAND3 gate
+ w_L2_n[0] = 3 * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+ w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
+ number_gates_L2 = logical_effort(
+ min_number_gates_L2,
+ flag_L2_gate == 2 ? gnand2 : gnand3,
+ F,
+ w_L2_n,
+ w_L2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+
+ // Now find the number of gates and widths in first level of predecoder
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
+ // Whenever flag_two_unique_paths is true, it means first level of
+ // decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
+ // it means
+ // a NAND2 gate is used in the first level of the predecoder
+ c_load_nand2_path = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * c_load_nand2_path /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ c_load_nand2_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+
+ //Now find widths of gates along path in which first gate is a NAND3
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
+ // a NAND3 gate is used in the first level of the predecoder
+ c_load_nand3_path = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * c_load_nand3_path /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ c_load_nand3_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+ } else { // find number of gates and widths in first level of predecoder block when there is no second level
+ if (number_inputs_L1_gate == 2) {
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * C_ld_predec_blk_out /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ } else if (number_inputs_L1_gate == 3) {
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * C_ld_predec_blk_out /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
}
- tot_area_L1_nand2 *= num_L1_nand2;
- leak_L1_nand2 *= num_L1_nand2;
- gate_leak_L1_nand2 *= num_L1_nand2;
-
- for (int i = 1; i < number_gates_L1_nand3_path; ++i)
- {
- tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
- leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
- gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
- }
- tot_area_L1_nand3 *= num_L1_nand3;
- leak_L1_nand3 *= num_L1_nand3;
- gate_leak_L1_nand3 *= num_L1_nand3;
+}
- double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
- double cumulative_area_L2 = 0.0;
- double leakage_L2 = 0.0;
- double gate_leakage_L2 = 0.0;
- if (flag_L2_gate == 2)
- {
- cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
- leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
- gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
- }
- else if (flag_L2_gate == 3)
- {
- cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
- leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
- gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
- }
- for (int i = 1; i < number_gates_L2; ++i)
- {
- cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
- leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
- gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+void PredecBlk::compute_area() {
+ if (exist) { // First check whether a predecoder block is needed
+ int num_L1_nand2 = 0;
+ int num_L1_nand3 = 0;
+ int num_L2 = 0;
+ double tot_area_L1_nand3 = 0;
+ double leak_L1_nand3 = 0;
+ double gate_leak_L1_nand3 = 0;
+
+ double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
+ double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ if (number_inputs_L1_gate != 3) {
+ tot_area_L1_nand3 = 0;
+ leak_L1_nand3 = 0;
+ gate_leak_L1_nand3 = 0;
+ } else {
+ tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
+ leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ }
+
+ switch (number_input_addr_bits) {
+ case 1: //2 NAND2 gates
+ num_L1_nand2 = 2;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 2: //4 NAND2 gates
+ num_L1_nand2 = 4;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 3: //8 NAND3 gates
+ num_L1_nand3 = 8;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 4: //4 + 4 NAND2 gates
+ num_L1_nand2 = 8;
+ num_L2 = 16;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 5: //4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 8;
+ num_L2 = 32;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 6: //8 + 8 NAND3 gates
+ num_L1_nand3 = 16;
+ num_L2 = 64;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 2;
+ break;
+ case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 8;
+ num_L1_nand3 = 8;
+ num_L2 = 128;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 16;
+ num_L2 = 256;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 2;
+ break;
+ case 9: //8 + 8 + 8 NAND3 gates
+ num_L1_nand3 = 24;
+ num_L2 = 512;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 3;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
+ tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
+ leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ }
+ tot_area_L1_nand2 *= num_L1_nand2;
+ leak_L1_nand2 *= num_L1_nand2;
+ gate_leak_L1_nand2 *= num_L1_nand2;
+
+ for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
+ tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
+ leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ }
+ tot_area_L1_nand3 *= num_L1_nand3;
+ leak_L1_nand3 *= num_L1_nand3;
+ gate_leak_L1_nand3 *= num_L1_nand3;
+
+ double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
+ double cumulative_area_L2 = 0.0;
+ double leakage_L2 = 0.0;
+ double gate_leakage_L2 = 0.0;
+
+ if (flag_L2_gate == 2) {
+ cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ } else if (flag_L2_gate == 3) {
+ cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ }
+
+ for (int i = 1; i < number_gates_L2; ++i) {
+ cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
+ leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ }
+ cumulative_area_L2 *= num_L2;
+ leakage_L2 *= num_L2;
+ gate_leakage_L2 *= num_L2;
+
+ power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
+ area.set_area(cumulative_area_L1 + cumulative_area_L2);
+ power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
}
- cumulative_area_L2 *= num_L2;
- leakage_L2 *= num_L2;
- gate_leakage_L2 *= num_L2;
-
- power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
- power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
- area.set_area(cumulative_area_L1 + cumulative_area_L2);
- power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
- power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
- }
}
pair<double, double> PredecBlk::compute_delays(
- pair<double, double> inrisetime) // <nand2, nand3>
-{
- pair<double, double> ret_val;
- ret_val.first = 0; // outrisetime_nand2_path
- ret_val.second = 0; // outrisetime_nand3_path
-
- double inrisetime_nand2_path = inrisetime.first;
- double inrisetime_nand3_path = inrisetime.second;
- int i;
- double rd, c_load, c_intrinsic, tf, this_delay;
- double Vdd = g_tp.peri_global.Vdd;
-
- // TODO: following delay calculation part can be greatly simplified.
- // first check whether a predecoder block is required
- if (exist)
- {
- //Find delay in first level of predecoder block
- //First find delay in path
- if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
- {
- //First gate is a NAND2 gate
- rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
- c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
- c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-
- //Add delays of all but the last inverter in the chain
- for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
- {
- rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of the last inverter
- i = number_gates_L1_nand2_path - 1;
- rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
- if (flag_L2_gate)
- {
- c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { //First level directly drives decoder output load
- c_load = C_ld_predec_blk_out;
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- }
+ pair<double, double> inrisetime) { // <nand2, nand3>
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
- if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
- { //Check if the number of gates in the first level is more than 1.
- //First gate is a NAND3 gate
- rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
- c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
- c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
-
- //Add delays of all but the last inverter in the chain
- for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
- {
- rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of the last inverter
- i = number_gates_L1_nand3_path - 1;
- rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
- if (flag_L2_gate)
- {
- c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { //First level directly drives decoder output load
- c_load = C_ld_predec_blk_out;
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- }
+ double inrisetime_nand2_path = inrisetime.first;
+ double inrisetime_nand3_path = inrisetime.second;
+ int i;
+ double rd, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
- // Find delay through second level
- if (flag_L2_gate)
- {
- if (flag_L2_gate == 2)
- {
- rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
- c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
- c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { // flag_L2_gate = 3
- rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
- c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
- c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- for (i = 1; i < number_gates_L2 - 1; ++i)
- {
- rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of final inverter that drives the wordline decoders
- i = number_gates_L2 - 1;
- c_load = C_ld_predec_blk_out;
- rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ // TODO: following delay calculation part can be greatly simplified.
+ // first check whether a predecoder block is required
+ if (exist) {
+ //Find delay in first level of predecoder block
+ //First find delay in path
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
+ //First gate is a NAND2 gate
+ rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand2_path - 1;
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate) {
+ c_load = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
+ //Check if the number of gates in the first level is more than 1.
+ //First gate is a NAND3 gate
+ rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand3_path - 1;
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate) {
+ c_load = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
+ is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ // Find delay through second level
+ if (flag_L2_gate) {
+ if (flag_L2_gate == 2) {
+ rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { // flag_L2_gate = 3
+ rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ for (i = 1; i < number_gates_L2 - 1; ++i) {
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of final inverter that drives the wordline decoders
+ i = number_gates_L2 - 1;
+ c_load = C_ld_predec_blk_out;
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
}
- }
- delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
- return ret_val;
+ delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
+ return ret_val;
}
void PredecBlk::leakage_feedback(double temperature)
@@ -1033,302 +969,287 @@ PredecBlkDrv::PredecBlkDrv(
int way_select_,
PredecBlk * blk_,
bool is_dram)
- :flag_driver_exists(0),
- number_gates_nand2_path(0),
- number_gates_nand3_path(0),
- min_number_gates(2),
- num_buffers_driving_1_nand2_load(0),
- num_buffers_driving_2_nand2_load(0),
- num_buffers_driving_4_nand2_load(0),
- num_buffers_driving_2_nand3_load(0),
- num_buffers_driving_8_nand3_load(0),
- num_buffers_nand3_path(0),
- c_load_nand2_path_out(0),
- c_load_nand3_path_out(0),
- r_load_nand2_path_out(0),
- r_load_nand3_path_out(0),
- delay_nand2_path(0),
- delay_nand3_path(0),
- power_nand2_path(),
- power_nand3_path(),
- blk(blk_), dec(blk->dec),
- is_dram_(is_dram),
- way_select(way_select_)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_nand2_path_n[i] = 0;
- width_nand2_path_p[i] = 0;
- width_nand3_path_n[i] = 0;
- width_nand3_path_p[i] = 0;
- }
-
- number_input_addr_bits = blk->number_input_addr_bits;
-
- if (way_select > 1)
- {
- flag_driver_exists = 1;
- number_input_addr_bits = way_select;
- if (dec->num_in_signals == 2)
- {
- c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
- num_buffers_driving_2_nand2_load = number_input_addr_bits;
- }
- else if (dec->num_in_signals == 3)
- {
- c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
- num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ : flag_driver_exists(0),
+ number_gates_nand2_path(0),
+ number_gates_nand3_path(0),
+ min_number_gates(2),
+ num_buffers_driving_1_nand2_load(0),
+ num_buffers_driving_2_nand2_load(0),
+ num_buffers_driving_4_nand2_load(0),
+ num_buffers_driving_2_nand3_load(0),
+ num_buffers_driving_8_nand3_load(0),
+ num_buffers_nand3_path(0),
+ c_load_nand2_path_out(0),
+ c_load_nand3_path_out(0),
+ r_load_nand2_path_out(0),
+ r_load_nand3_path_out(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ blk(blk_), dec(blk->dec),
+ is_dram_(is_dram),
+ way_select(way_select_) {
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ width_nand2_path_n[i] = 0;
+ width_nand2_path_p[i] = 0;
+ width_nand3_path_n[i] = 0;
+ width_nand3_path_p[i] = 0;
}
- }
- else if (way_select == 0)
- {
- if (blk->exist)
- {
- flag_driver_exists = 1;
+
+ number_input_addr_bits = blk->number_input_addr_bits;
+
+ if (way_select > 1) {
+ flag_driver_exists = 1;
+ number_input_addr_bits = way_select;
+ if (dec->num_in_signals == 2) {
+ c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand2_load = number_input_addr_bits;
+ } else if (dec->num_in_signals == 3) {
+ c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ }
+ } else if (way_select == 0) {
+ if (blk->exist) {
+ flag_driver_exists = 1;
+ }
}
- }
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void PredecBlkDrv::compute_widths()
-{
- // The predecode block driver accepts as input the address bits from the h-tree network. For
- // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
- // inversion to generate addrbar and simply treat addrbar as addr.
-
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
-
- if (flag_driver_exists)
- {
- double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
- double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
-
- if (way_select == 0)
- {
- if (blk->number_input_addr_bits == 1)
- { //2 NAND2 gates
- num_buffers_driving_2_nand2_load = 1;
- c_load_nand2_path_out = 2 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 2)
- { //4 NAND2 gates one 2-4 decoder
- num_buffers_driving_4_nand2_load = 2;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 3)
- { //8 NAND3 gates one 3-8 decoder
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 4)
- { //4 + 4 NAND2 gates two 2-4 decoder
- num_buffers_driving_4_nand2_load = 4;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 5)
- { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
- num_buffers_driving_4_nand2_load = 2;
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 6)
- { //8 + 8 NAND3 gates two 3-8 decoder
- num_buffers_driving_8_nand3_load = 6;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 7)
- { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
- num_buffers_driving_4_nand2_load = 4;
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 8)
- { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
- num_buffers_driving_4_nand2_load = 2;
- num_buffers_driving_8_nand3_load = 6;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 9)
- { //8 + 8 + 8 NAND3 gates three 3-8 decoder
- num_buffers_driving_8_nand3_load = 9;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- }
-
- if ((blk->flag_two_unique_paths) ||
- (blk->number_inputs_L1_gate == 2) ||
- (number_input_addr_bits == 0) ||
- ((way_select)&&(dec->num_in_signals == 2)))
- { //this means that way_select is driving NAND2 in decoder.
- width_nand2_path_n[0] = g_tp.min_w_nmos_;
- width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
- F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
- number_gates_nand2_path = logical_effort(
- min_number_gates,
- 1,
- F,
- width_nand2_path_n,
- width_nand2_path_p,
- c_load_nand2_path_out,
- p_to_n_sz_ratio,
- is_dram_, false, g_tp.max_w_nmos_);
- }
-
- if ((blk->flag_two_unique_paths) ||
- (blk->number_inputs_L1_gate == 3) ||
- ((way_select)&&(dec->num_in_signals == 3)))
- { //this means that way_select is driving NAND3 in decoder.
- width_nand3_path_n[0] = g_tp.min_w_nmos_;
- width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
- F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
- number_gates_nand3_path = logical_effort(
- min_number_gates,
- 1,
- F,
- width_nand3_path_n,
- width_nand3_path_p,
- c_load_nand3_path_out,
- p_to_n_sz_ratio,
- is_dram_, false, g_tp.max_w_nmos_);
+void PredecBlkDrv::compute_widths() {
+ // The predecode block driver accepts as input the address bits from the h-tree network. For
+ // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
+ // inversion to generate addrbar and simply treat addrbar as addr.
+
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+
+ if (flag_driver_exists) {
+ double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
+ double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
+
+ if (way_select == 0) {
+ if (blk->number_input_addr_bits == 1) {
+ //2 NAND2 gates
+ num_buffers_driving_2_nand2_load = 1;
+ c_load_nand2_path_out = 2 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 2) {
+ //4 NAND2 gates one 2-4 decoder
+ num_buffers_driving_4_nand2_load = 2;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 3) {
+ //8 NAND3 gates one 3-8 decoder
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 4) {
+ //4 + 4 NAND2 gates two 2-4 decoder
+ num_buffers_driving_4_nand2_load = 4;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 5) {
+ //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 6) {
+ //8 + 8 NAND3 gates two 3-8 decoder
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 7) {
+ //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 4;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 8) {
+ //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 9) {
+ //8 + 8 + 8 NAND3 gates three 3-8 decoder
+ num_buffers_driving_8_nand3_load = 9;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 2) ||
+ (number_input_addr_bits == 0) ||
+ ((way_select) && (dec->num_in_signals == 2))) {
+ //this means that way_select is driving NAND2 in decoder.
+ width_nand2_path_n[0] = g_tp.min_w_nmos_;
+ width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
+ F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
+ number_gates_nand2_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand2_path_n,
+ width_nand2_path_p,
+ c_load_nand2_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 3) ||
+ ((way_select) && (dec->num_in_signals == 3))) {
+ //this means that way_select is driving NAND3 in decoder.
+ width_nand3_path_n[0] = g_tp.min_w_nmos_;
+ width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
+ F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
+ number_gates_nand3_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand3_path_n,
+ width_nand3_path_p,
+ c_load_nand3_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
}
- }
}
-void PredecBlkDrv::compute_area()
-{
- double area_nand2_path = 0;
- double area_nand3_path = 0;
- double leak_nand2_path = 0;
- double leak_nand3_path = 0;
- double gate_leak_nand2_path = 0;
- double gate_leak_nand3_path = 0;
-
- if (flag_driver_exists)
- { // first check whether a predecoder block driver is needed
- for (int i = 0; i < number_gates_nand2_path; ++i)
- {
- area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
- leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- }
- area_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- leak_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+void PredecBlkDrv::compute_area() {
+ double area_nand2_path = 0;
+ double area_nand3_path = 0;
+ double leak_nand2_path = 0;
+ double leak_nand3_path = 0;
+ double gate_leak_nand2_path = 0;
+ double gate_leak_nand3_path = 0;
+
+ if (flag_driver_exists) {
+ // first check whether a predecoder block driver is needed
+ for (int i = 0; i < number_gates_nand2_path; ++i) {
+ area_nand2_path +=
+ compute_gate_area(INV, 1, width_nand2_path_p[i],
+ width_nand2_path_n[i], g_tp.cell_h_def);
+ leak_nand2_path +=
+ cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
+ 1, inv, is_dram_);
+ gate_leak_nand2_path +=
+ cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
+ 1, inv, is_dram_);
+ }
+ area_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
-
- for (int i = 0; i < number_gates_nand3_path; ++i)
- {
- area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
- leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
- gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+ gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+
+ for (int i = 0; i < number_gates_nand3_path; ++i) {
+ area_nand3_path +=
+ compute_gate_area(INV, 1, width_nand3_path_p[i],
+ width_nand3_path_n[i], g_tp.cell_h_def);
+ leak_nand3_path +=
+ cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
+ 1, inv, is_dram_);
+ gate_leak_nand3_path +=
+ cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
+ 1, inv, is_dram_);
+ }
+ area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+
+ power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
+ power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
+ area.set_area(area_nand2_path + area_nand3_path);
}
- area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
-
- power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
- power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
- area.set_area(area_nand2_path + area_nand3_path);
- }
}
pair<double, double> PredecBlkDrv::compute_delays(
double inrisetime_nand2_path,
- double inrisetime_nand3_path)
-{
- pair<double, double> ret_val;
- ret_val.first = 0; // outrisetime_nand2_path
- ret_val.second = 0; // outrisetime_nand3_path
- int i;
- double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
- double Vdd = g_tp.peri_global.Vdd;
-
- if (flag_driver_exists)
- {
- for (i = 0; i < number_gates_nand2_path - 1; ++i)
- {
- rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
- c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
- }
+ double inrisetime_nand3_path) {
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
+ int i;
+ double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
- // Final inverter drives the predecoder block or the decoder output load
- if (number_gates_nand2_path != 0)
- {
- i = number_gates_nand2_path - 1;
- rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- c_load = c_load_nand2_path_out;
- tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ if (flag_driver_exists) {
+ for (i = 0; i < number_gates_nand2_path - 1; ++i) {
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand2_path != 0) {
+ i = number_gates_nand2_path - 1;
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand2_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
+ }
+
+ for (i = 0; i < number_gates_nand3_path - 1; ++i) {
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand3_path != 0) {
+ i = number_gates_nand3_path - 1;
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand3_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ }
}
-
- for (i = 0; i < number_gates_nand3_path - 1; ++i)
- {
- rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
- c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
- }
-
- // Final inverter drives the predecoder block or the decoder output load
- if (number_gates_nand3_path != 0)
- {
- i = number_gates_nand3_path - 1;
- rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- c_load = c_load_nand3_path_out;
- tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
- }
- }
- return ret_val;
+ return ret_val;
}
-double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
-{
- return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
- num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
+double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
+ return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
+ num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
}
@@ -1336,31 +1257,30 @@ double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
Predec::Predec(
PredecBlkDrv * drv1_,
PredecBlkDrv * drv2_)
-:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
-{
- driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
- drv1->power_nand3_path.readOp.leakage +
- drv2->power_nand2_path.readOp.leakage +
- drv2->power_nand3_path.readOp.leakage;
- block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
- blk1->power_nand3_path.readOp.leakage +
- blk1->power_L2.readOp.leakage +
- blk2->power_nand2_path.readOp.leakage +
- blk2->power_nand3_path.readOp.leakage +
- blk2->power_L2.readOp.leakage;
- power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
-
- driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
- drv1->power_nand3_path.readOp.gate_leakage +
- drv2->power_nand2_path.readOp.gate_leakage +
- drv2->power_nand3_path.readOp.gate_leakage;
- block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
- blk1->power_nand3_path.readOp.gate_leakage +
- blk1->power_L2.readOp.gate_leakage +
- blk2->power_nand2_path.readOp.gate_leakage +
- blk2->power_nand3_path.readOp.gate_leakage +
- blk2->power_L2.readOp.gate_leakage;
- power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
+ : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
+ driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
+ drv1->power_nand3_path.readOp.leakage +
+ drv2->power_nand2_path.readOp.leakage +
+ drv2->power_nand3_path.readOp.leakage;
+ block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
+ blk1->power_nand3_path.readOp.leakage +
+ blk1->power_L2.readOp.leakage +
+ blk2->power_nand2_path.readOp.leakage +
+ blk2->power_nand3_path.readOp.leakage +
+ blk2->power_L2.readOp.leakage;
+ power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
+
+ driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
+ drv1->power_nand3_path.readOp.gate_leakage +
+ drv2->power_nand2_path.readOp.gate_leakage +
+ drv2->power_nand3_path.readOp.gate_leakage;
+ block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
+ blk1->power_nand3_path.readOp.gate_leakage +
+ blk1->power_L2.readOp.gate_leakage +
+ blk2->power_nand2_path.readOp.gate_leakage +
+ blk2->power_nand3_path.readOp.gate_leakage +
+ blk2->power_L2.readOp.gate_leakage;
+ power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
}
void PredecBlkDrv::leakage_feedback(double temperature)
@@ -1399,37 +1319,35 @@ void PredecBlkDrv::leakage_feedback(double temperature)
}
}
-double Predec::compute_delays(double inrisetime)
-{
- // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
- pair<double, double> tmp_pair1, tmp_pair2;
- tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
- tmp_pair1 = blk1->compute_delays(tmp_pair1);
- tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
- tmp_pair2 = blk2->compute_delays(tmp_pair2);
- tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
-
- driver_power.readOp.dynamic =
- drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
- drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
- drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
- drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
-
- block_power.readOp.dynamic =
- blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk1->power_L2.readOp.dynamic +
- blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk2->power_L2.readOp.dynamic;
-
- power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
-
- delay = tmp_pair1.first;
- return tmp_pair1.second;
+double Predec::compute_delays(double inrisetime) {
+ // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
+ pair<double, double> tmp_pair1, tmp_pair2;
+ tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
+ tmp_pair1 = blk1->compute_delays(tmp_pair1);
+ tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
+ tmp_pair2 = blk2->compute_delays(tmp_pair2);
+ tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
+
+ driver_power.readOp.dynamic =
+ drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
+ drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
+ drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
+ drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
+
+ block_power.readOp.dynamic =
+ blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
+ blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
+ blk1->power_L2.readOp.dynamic +
+ blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
+ blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
+ blk2->power_L2.readOp.dynamic;
+
+ power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
+
+ delay = tmp_pair1.first;
+ return tmp_pair1.second;
}
-
void Predec::leakage_feedback(double temperature)
{
drv1->leakage_feedback(temperature);
@@ -1465,113 +1383,116 @@ void Predec::leakage_feedback(double temperature)
// returns <delay, risetime>
pair<double, double> Predec::get_max_delay_before_decoder(
pair<double, double> input_pair1,
- pair<double, double> input_pair2)
-{
- pair<double, double> ret_val;
- double delay;
-
- delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
- ret_val.first = delay;
- ret_val.second = input_pair1.first;
- delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair1.second;
- }
- delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair2.first;
- }
- delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
- if (ret_val.first < delay)
- {
+ pair<double, double> input_pair2) {
+ pair<double, double> ret_val;
+ double delay;
+
+ delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
ret_val.first = delay;
- ret_val.second = input_pair2.second;
- }
+ ret_val.second = input_pair1.first;
+ delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair1.second;
+ }
+ delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.first;
+ }
+ delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.second;
+ }
- return ret_val;
+ return ret_val;
}
-Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
-:number_gates(0),
- min_number_gates(2),
- c_gate_load(c_gate_load_),
- c_wire_load(c_wire_load_),
- r_wire_load(r_wire_load_),
- delay(0),
- power(),
- is_dram_(is_dram)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_n[i] = 0;
- width_p[i] = 0;
- }
+Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
+ bool is_dram)
+ : number_gates(0),
+ min_number_gates(2),
+ c_gate_load(c_gate_load_),
+ c_wire_load(c_wire_load_),
+ r_wire_load(r_wire_load_),
+ delay(0),
+ power(),
+ is_dram_(is_dram) {
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ width_n[i] = 0;
+ width_p[i] = 0;
+ }
- compute_widths();
+ compute_widths();
}
-void Driver::compute_widths()
-{
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double c_load = c_gate_load + c_wire_load;
- width_n[0] = g_tp.min_w_nmos_;
- width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
-
- double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
- number_gates = logical_effort(
- min_number_gates,
- 1,
- F,
- width_n,
- width_p,
- c_load,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
+void Driver::compute_widths() {
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double c_load = c_gate_load + c_wire_load;
+ width_n[0] = g_tp.min_w_nmos_;
+ width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+
+ double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
+ number_gates = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_n,
+ width_p,
+ c_load,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
}
-double Driver::compute_delay(double inrisetime)
-{
- int i;
- double rd, c_load, c_intrinsic, tf;
- double this_delay = 0;
+double Driver::compute_delay(double inrisetime) {
+ int i;
+ double rd, c_load, c_intrinsic, tf;
+ double this_delay = 0;
+
+ for (i = 0; i < number_gates - 1; ++i) {
+ rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd;
+ power.readOp.leakage +=
+ cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ }
- for (i = 0; i < number_gates - 1; ++i)
- {
+ i = number_gates - 1;
+ c_load = c_gate_load + c_wire_load;
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + r_wire_load *
+ (c_wire_load / 2 + c_gate_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
- }
-
- i = number_gates - 1;
- c_load = c_gate_load + c_wire_load;
- rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
-
- return this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd;
+ power.readOp.leakage +=
+ cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+
+ return this_delay / (1.0 - 0.5);
}
diff --git a/ext/mcpat/cacti/decoder.h b/ext/mcpat/cacti/decoder.h
index 35631e84b..a2ddf722c 100644
--- a/ext/mcpat/cacti/decoder.h
+++ b/ext/mcpat/cacti/decoder.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -42,9 +43,8 @@
using namespace std;
-class Decoder : public Component
-{
- public:
+class Decoder : public Component {
+public:
Decoder(
int _num_dec_signals,
bool flag_way_select,
@@ -80,125 +80,120 @@ class Decoder : public Component
-class PredecBlk : public Component
-{
- public:
- PredecBlk(
- int num_dec_signals,
- Decoder * dec,
- double C_wire_predec_blk_out,
- double R_wire_predec_blk_out,
- int num_dec_per_predec,
- bool is_dram_,
- bool is_blk1);
-
- Decoder * dec;
- bool exist;
- int number_input_addr_bits;
- double C_ld_predec_blk_out;
- double R_wire_predec_blk_out;
- int branch_effort_nand2_gate_output;
- int branch_effort_nand3_gate_output;
- bool flag_two_unique_paths;
- int flag_L2_gate;
- int number_inputs_L1_gate;
- int number_gates_L1_nand2_path;
- int number_gates_L1_nand3_path;
- int number_gates_L2;
- int min_number_gates_L1;
- int min_number_gates_L2;
- int num_L1_active_nand2_path;
- int num_L1_active_nand3_path;
- double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
- double w_L2_n[MAX_NUMBER_GATES_STAGE];
- double w_L2_p[MAX_NUMBER_GATES_STAGE];
- double delay_nand2_path;
- double delay_nand3_path;
- powerDef power_nand2_path;
- powerDef power_nand3_path;
- powerDef power_L2;
-
- bool is_dram_;
-
- void compute_widths();
- void compute_area();
-
- void leakage_feedback(double temperature);
-
- pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
- // return <outrise_nand2, outrise_nand3>
+class PredecBlk : public Component {
+public:
+ PredecBlk(
+ int num_dec_signals,
+ Decoder * dec,
+ double C_wire_predec_blk_out,
+ double R_wire_predec_blk_out,
+ int num_dec_per_predec,
+ bool is_dram_,
+ bool is_blk1);
+
+ Decoder * dec;
+ bool exist;
+ int number_input_addr_bits;
+ double C_ld_predec_blk_out;
+ double R_wire_predec_blk_out;
+ int branch_effort_nand2_gate_output;
+ int branch_effort_nand3_gate_output;
+ bool flag_two_unique_paths;
+ int flag_L2_gate;
+ int number_inputs_L1_gate;
+ int number_gates_L1_nand2_path;
+ int number_gates_L1_nand3_path;
+ int number_gates_L2;
+ int min_number_gates_L1;
+ int min_number_gates_L2;
+ int num_L1_active_nand2_path;
+ int num_L1_active_nand3_path;
+ double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
+ double w_L2_n[MAX_NUMBER_GATES_STAGE];
+ double w_L2_p[MAX_NUMBER_GATES_STAGE];
+ double delay_nand2_path;
+ double delay_nand3_path;
+ powerDef power_nand2_path;
+ powerDef power_nand3_path;
+ powerDef power_L2;
+
+ bool is_dram_;
+
+ void compute_widths();
+ void compute_area();
+
+ void leakage_feedback(double temperature);
+
+ pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
+ // return <outrise_nand2, outrise_nand3>
};
-class PredecBlkDrv : public Component
-{
- public:
- PredecBlkDrv(
- int way_select,
- PredecBlk * blk_,
- bool is_dram);
-
- int flag_driver_exists;
- int number_input_addr_bits;
- int number_gates_nand2_path;
- int number_gates_nand3_path;
- int min_number_gates;
- int num_buffers_driving_1_nand2_load;
- int num_buffers_driving_2_nand2_load;
- int num_buffers_driving_4_nand2_load;
- int num_buffers_driving_2_nand3_load;
- int num_buffers_driving_8_nand3_load;
- int num_buffers_nand3_path;
- double c_load_nand2_path_out;
- double c_load_nand3_path_out;
- double r_load_nand2_path_out;
- double r_load_nand3_path_out;
- double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
- double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
- double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
- double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
- double delay_nand2_path;
- double delay_nand3_path;
- powerDef power_nand2_path;
- powerDef power_nand3_path;
-
- PredecBlk * blk;
- Decoder * dec;
- bool is_dram_;
- int way_select;
-
- void compute_widths();
- void compute_area();
-
- void leakage_feedback(double temperature);
-
-
- pair<double, double> compute_delays(
- double inrisetime_nand2_path,
- double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
-
- inline int num_addr_bits_nand2_path()
- {
- return num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load;
- }
- inline int num_addr_bits_nand3_path()
- {
- return num_buffers_driving_2_nand3_load +
- num_buffers_driving_8_nand3_load;
- }
- double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
+class PredecBlkDrv : public Component {
+public:
+ PredecBlkDrv(
+ int way_select,
+ PredecBlk * blk_,
+ bool is_dram);
+
+ int flag_driver_exists;
+ int number_input_addr_bits;
+ int number_gates_nand2_path;
+ int number_gates_nand3_path;
+ int min_number_gates;
+ int num_buffers_driving_1_nand2_load;
+ int num_buffers_driving_2_nand2_load;
+ int num_buffers_driving_4_nand2_load;
+ int num_buffers_driving_2_nand3_load;
+ int num_buffers_driving_8_nand3_load;
+ int num_buffers_nand3_path;
+ double c_load_nand2_path_out;
+ double c_load_nand3_path_out;
+ double r_load_nand2_path_out;
+ double r_load_nand3_path_out;
+ double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
+ double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
+ double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
+ double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
+ double delay_nand2_path;
+ double delay_nand3_path;
+ powerDef power_nand2_path;
+ powerDef power_nand3_path;
+
+ PredecBlk * blk;
+ Decoder * dec;
+ bool is_dram_;
+ int way_select;
+
+ void compute_widths();
+ void compute_area();
+
+ void leakage_feedback(double temperature);
+
+
+ pair<double, double> compute_delays(
+ double inrisetime_nand2_path,
+ double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
+
+ inline int num_addr_bits_nand2_path() {
+ return num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load;
+ }
+ inline int num_addr_bits_nand3_path() {
+ return num_buffers_driving_2_nand3_load +
+ num_buffers_driving_8_nand3_load;
+ }
+ double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
};
-class Predec : public Component
-{
- public:
+class Predec : public Component {
+public:
Predec(
PredecBlkDrv * drv1,
PredecBlkDrv * drv2);
@@ -214,7 +209,7 @@ class Predec : public Component
powerDef block_power;
powerDef driver_power;
- private:
+private:
// returns <delay, risetime>
pair<double, double> get_max_delay_before_decoder(
pair<double, double> input_pair1,
@@ -223,24 +218,23 @@ class Predec : public Component
-class Driver : public Component
-{
- public:
- Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
+class Driver : public Component {
+public:
+ Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
- int number_gates;
- int min_number_gates;
- double width_n[MAX_NUMBER_GATES_STAGE];
- double width_p[MAX_NUMBER_GATES_STAGE];
- double c_gate_load;
- double c_wire_load;
- double r_wire_load;
- double delay;
- powerDef power;
- bool is_dram_;
+ int number_gates;
+ int min_number_gates;
+ double width_n[MAX_NUMBER_GATES_STAGE];
+ double width_p[MAX_NUMBER_GATES_STAGE];
+ double c_gate_load;
+ double c_wire_load;
+ double r_wire_load;
+ double delay;
+ powerDef power;
+ bool is_dram_;
- void compute_widths();
- double compute_delay(double inrisetime);
+ void compute_widths();
+ double compute_delay(double inrisetime);
};
diff --git a/ext/mcpat/cacti/htree2.cc b/ext/mcpat/cacti/htree2.cc
index 817ea6a7c..55724c397 100644
--- a/ext/mcpat/cacti/htree2.cc
+++ b/ext/mcpat/cacti/htree2.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,15 +40,17 @@
Htree2::Htree2(
enum Wire_type wire_model, double mat_w, double mat_h,
- int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
+ int a_bits, int d_inbits, int search_data_in, int d_outbits,
+ int search_data_out, int bl, int wl, enum Htree_type htree_type,
bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
- :in_rise_time(0), out_rise_time(0),
- tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
- add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
- search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
- uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
-{
- assert(ndbl >= 2 && ndwl >= 2);
+ : in_rise_time(0), out_rise_time(0),
+ tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
+ add_bits(a_bits), data_in_bits(d_inbits),
+ search_data_in_bits(search_data_in), data_out_bits(d_outbits),
+ search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
+ uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model),
+ deviceType(dt) {
+ assert(ndbl >= 2 && ndwl >= 2);
// if (ndbl == 1 && ndwl == 1)
// {
@@ -61,177 +64,211 @@ Htree2::Htree2(
// if (ndwl == 1) ndwl++;
// if (ndbl == 1) ndbl++;
- max_unpipelined_link_delay = 0; //TODO
- min_w_nmos = g_tp.min_w_nmos_;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
+ max_unpipelined_link_delay = 0; //TODO
+ min_w_nmos = g_tp.min_w_nmos_;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
- switch (htree_type)
- {
+ switch (htree_type) {
case Add_htree:
- wire_bw = init_wire_bw = add_bits;
- in_htree();
- break;
+ wire_bw = init_wire_bw = add_bits;
+ in_htree();
+ break;
case Data_in_htree:
- wire_bw = init_wire_bw = data_in_bits;
- in_htree();
- break;
+ wire_bw = init_wire_bw = data_in_bits;
+ in_htree();
+ break;
case Data_out_htree:
- wire_bw = init_wire_bw = data_out_bits;
- out_htree();
- break;
+ wire_bw = init_wire_bw = data_out_bits;
+ out_htree();
+ break;
case Search_in_htree:
- wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
- in_htree();
- break;
+ wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
+ in_htree();
+ break;
case Search_out_htree:
- wire_bw = init_wire_bw = search_data_out_bits;
- out_htree();
- break;
+ wire_bw = init_wire_bw = search_data_out_bits;
+ out_htree();
+ break;
default:
- assert(0);
- break;
- }
+ assert(0);
+ break;
+ }
- power_bit = power;
- power.readOp.dynamic *= init_wire_bw;
+ power_bit = power;
+ power.readOp.dynamic *= init_wire_bw;
- assert(power.readOp.dynamic >= 0);
- assert(power.readOp.leakage >= 0);
+ assert(power.readOp.dynamic >= 0);
+ assert(power.readOp.leakage >= 0);
}
// nand gate sizing calculation
-void Htree2::input_nand(double s1, double s2, double l_eff)
-{
- Wire w1(wt, l_eff);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // input capacitance of a repeater = input capacitance of nand.
- double nsize = s1*(1 + pton_size)/(2 + pton_size);
- nsize = (nsize < 1) ? 1 : nsize;
-
- double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
- (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
- 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
- delay+= horowitz (w1.out_rise_time, tc,
- deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
- power.readOp.dynamic += 0.5 *
- (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+void Htree2::input_nand(double s1, double s2, double l_eff) {
+ Wire w1(wt, l_eff);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // input capacitance of a repeater = input capacitance of nand.
+ double nsize = s1 * (1 + pton_size) / (2 + pton_size);
+ nsize = (nsize < 1) ? 1 : nsize;
+
+ double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) *
+ (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
+ 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0));
+ delay += horowitz(w1.out_rise_time, tc,
+ deviceType->Vth / deviceType->Vdd, deviceType->Vth /
+ deviceType->Vdd, RISE);
+ power.readOp.dynamic += 0.5 *
+ (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd * wire_bw ;
- power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
- power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
+ (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * wire_bw ;
+ power.readOp.leakage += (wire_bw *
+ cmos_Isub_leakage(min_w_nmos * (nsize * 2),
+ min_w_pmos * nsize * 2, 2,
+ nand)) * deviceType->Vdd;
+ power.readOp.gate_leakage += (wire_bw *
+ cmos_Ig_leakage(min_w_nmos * (nsize * 2),
+ min_w_pmos * nsize * 2, 2,
+ nand)) * deviceType->Vdd;
}
// tristate buffer model consisting of not, nand, nor, and driver transistors
-void Htree2::output_buffer(double s1, double s2, double l_eff)
-{
- Wire w1(wt, l_eff);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // input capacitance of repeater = input capacitance of nand + nor.
- double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
- double s_eff = //stage eff of a repeater in a wire
- (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
- gate_C(s2*(min_w_nmos + min_w_pmos), 0);
- double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
- size = (size < 1) ? 1 : size;
-
- double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
- double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
- double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
- gate_C(tr_size*min_w_pmos, 0);
- double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
- gate_C(s1*(min_w_nmos + min_w_pmos), 0);
-
- double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
-
-
- delay += horowitz (w1.out_rise_time, tc,
- deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
-
- //nand
- power.readOp.dynamic += 0.5 *
- (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(tr_size*(min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+void Htree2::output_buffer(double s1, double s2, double l_eff) {
+ Wire w1(wt, l_eff);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // input capacitance of repeater = input capacitance of nand + nor.
+ double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
+ double s_eff = //stage eff of a repeater in a wire
+ (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6,
+ true)) /
+ gate_C(s2 * (min_w_nmos + min_w_pmos), 0);
+ double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 /
+ (s_eff * gate_C(min_w_pmos, 0));
+ size = (size < 1) ? 1 : size;
+
+ double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1);
+ double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1);
+ double cap_nand_out =
+ drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
+ gate_C(tr_size * min_w_pmos, 0);
+ double cap_ptrans_out = 2 *
+ (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
+ gate_C(s1 * (min_w_nmos + min_w_pmos), 0);
+
+ double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
+
+
+ delay += horowitz(w1.out_rise_time, tc,
+ deviceType->Vth / deviceType->Vdd, deviceType->Vth /
+ deviceType->Vdd, RISE);
+
+ //nand
+ power.readOp.dynamic += 0.5 *
+ (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(tr_size * (min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(tr_size*(min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //not
- power.readOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+ (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(tr_size * (min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ //not
+ power.readOp.dynamic += 0.5 *
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //nor
- power.readOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ //nor
+ power.readOp.dynamic += 0.5 *
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //output transistor
- power.readOp.dynamic += 0.5 *
- ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
- + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ //output transistor
+ power.readOp.dynamic += 0.5 *
+ ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
+ + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
- + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- if(uca_tree) {
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
-
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
- //power.readOp.gate_leakage *=;
- }
- else {
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
-
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
- //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
- }
+ ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
+ + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ if (uca_tree) {
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
+ 2, 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
+ 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+ } else {
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
+ 2, 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
+ 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+ }
}
@@ -250,192 +287,200 @@ void Htree2::output_buffer(double s1, double s2, double l_eff)
* hor. links left. After this it goes through the remaining vertical
* links.
*/
- void
-Htree2::in_htree()
-{
- //temp var
- double s1 = 0, s2 = 0, s3 = 0;
- double l_eff = 0;
- Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
- double len = 0, ht = 0;
- int option = 0;
-
- int h = (int) _log2(ndwl/2); // horizontal nodes
- int v = (int) _log2(ndbl/2); // vertical nodes
- double len_temp;
- double ht_temp;
- if (uca_tree)
- {//Sheng: this computation do not consider the wires that route from edge to middle.
- ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,h))))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,v))))/2;
- }
- else
- {
- if (ndwl == ndbl) {
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else if (ndwl > ndbl) {
- double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
- (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
+void
+Htree2::in_htree() {
+ //temp var
+ double s1 = 0, s2 = 0, s3 = 0;
+ double l_eff = 0;
+ Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
+ double len = 0, ht = 0;
+ int option = 0;
+
+ int h = (int) _log2(ndwl / 2); // horizontal nodes
+ int v = (int) _log2(ndbl / 2); // vertical nodes
+ double len_temp;
+ double ht_temp;
+ if (uca_tree) {
+ //Sheng: this computation do not consider the wires that route from
+ //edge to middle.
+ ht_temp = (mat_height * ndbl / 2 +
+ /* since uca_tree models interbank tree,
+ mat_height => bank height */
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, h)))) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, v)))) / 2;
+ } else {
+ if (ndwl == ndbl) {
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) * (ndbl / 2 - 1) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) * (ndwl / 2 - 1) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+ } else if (ndwl > ndbl) {
+ double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits + + (search_data_in_bits +
+ search_data_out_bits)) *
+ ((ndbl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+ } else {
+ double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (h + 2 * (1 - pow(0.5, v - h)))) / 2;
+ }
}
- else {
- double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
+
+ area.h = ht_temp * 2;
+ area.w = len_temp * 2;
+ delay = 0;
+ power.readOp.dynamic = 0;
+ power.readOp.leakage = 0;
+ power.searchOp.dynamic = 0;
+ len = len_temp;
+ ht = ht_temp / 2;
+
+ while (v > 0 || h > 0) {
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+
+ if (h > v) {
+ //the iteration considers only one horizontal link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, len / 2); // ver
+ len_temp = len;
+ len /= 2;
+ wtemp3 = 0;
+ h--;
+ option = 0;
+ } else if (v > 0 && h > 0) {
+ //considers one horizontal link and one vertical link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, ht); // ver
+ wtemp3 = new Wire(wt, len / 2); // next hor
+ len_temp = len;
+ ht_temp = ht;
+ len /= 2;
+ ht /= 2;
+ v--;
+ h--;
+ option = 1;
+ } else {
+ // considers only one vertical link
+ assert(h == 0);
+ wtemp1 = new Wire(wt, ht); // ver
+ wtemp2 = new Wire(wt, ht / 2); // hor
+ ht_temp = ht;
+ ht /= 2;
+ wtemp3 = 0;
+ v--;
+ option = 2;
+ }
+
+ delay += wtemp1->delay;
+ power.readOp.dynamic += wtemp1->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw;
+ power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
+ if ((uca_tree == false && option == 2) || search_tree == true) {
+ wire_bw *= 2; // wire bandwidth doubles only for vertical branches
+ }
+
+ if (uca_tree == false) {
+ if (len_temp > wtemp1->repeater_spacing) {
+ s1 = wtemp1->repeater_size;
+ l_eff = wtemp1->repeater_spacing;
+ } else {
+ s1 = (len_temp / wtemp1->repeater_spacing) *
+ wtemp1->repeater_size;
+ l_eff = len_temp;
+ }
+
+ if (ht_temp > wtemp2->repeater_spacing) {
+ s2 = wtemp2->repeater_size;
+ } else {
+ s2 = (len_temp / wtemp2->repeater_spacing) *
+ wtemp2->repeater_size;
+ }
+ // first level
+ input_nand(s1, s2, l_eff);
+ }
+
+
+ if (option != 1) {
+ continue;
+ }
+
+ // second level
+ delay += wtemp2->delay;
+ power.readOp.dynamic += wtemp2->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw;
+ power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
+
+ if (uca_tree) {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ } else {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ wire_bw *= 2;
+
+ if (ht_temp > wtemp3->repeater_spacing) {
+ s3 = wtemp3->repeater_size;
+ l_eff = wtemp3->repeater_spacing;
+ } else {
+ s3 = (len_temp / wtemp3->repeater_spacing) *
+ wtemp3->repeater_size;
+ l_eff = ht_temp;
+ }
+
+ input_nand(s2, s3, l_eff);
+ }
}
- }
-
- area.h = ht_temp * 2;
- area.w = len_temp * 2;
- delay = 0;
- power.readOp.dynamic = 0;
- power.readOp.leakage = 0;
- power.searchOp.dynamic =0;
- len = len_temp;
- ht = ht_temp/2;
-
- while (v > 0 || h > 0)
- {
+
if (wtemp1) delete wtemp1;
if (wtemp2) delete wtemp2;
if (wtemp3) delete wtemp3;
-
- if (h > v)
- {
- //the iteration considers only one horizontal link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, len/2); // ver
- len_temp = len;
- len /= 2;
- wtemp3 = 0;
- h--;
- option = 0;
- }
- else if (v>0 && h>0)
- {
- //considers one horizontal link and one vertical link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, ht); // ver
- wtemp3 = new Wire(wt, len/2); // next hor
- len_temp = len;
- ht_temp = ht;
- len /= 2;
- ht /= 2;
- v--;
- h--;
- option = 1;
- }
- else
- {
- // considers only one vertical link
- assert(h == 0);
- wtemp1 = new Wire(wt, ht); // ver
- wtemp2 = new Wire(wt, ht/2); // hor
- ht_temp = ht;
- ht /= 2;
- wtemp3 = 0;
- v--;
- option = 2;
- }
-
- delay += wtemp1->delay;
- power.readOp.dynamic += wtemp1->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
- power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
- if ((uca_tree == false && option == 2) || search_tree==true)
- {
- wire_bw*=2; // wire bandwidth doubles only for vertical branches
- }
-
- if (uca_tree == false)
- {
- if (len_temp > wtemp1->repeater_spacing)
- {
- s1 = wtemp1->repeater_size;
- l_eff = wtemp1->repeater_spacing;
- }
- else
- {
- s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
- l_eff = len_temp;
- }
-
- if (ht_temp > wtemp2->repeater_spacing)
- {
- s2 = wtemp2->repeater_size;
- }
- else
- {
- s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
- }
- // first level
- input_nand(s1, s2, l_eff);
- }
-
-
- if (option != 1)
- {
- continue;
- }
-
- // second level
- delay += wtemp2->delay;
- power.readOp.dynamic += wtemp2->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
- power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
-
- if (uca_tree)
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- }
- else
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- wire_bw*=2;
-
- if (ht_temp > wtemp3->repeater_spacing)
- {
- s3 = wtemp3->repeater_size;
- l_eff = wtemp3->repeater_spacing;
- }
- else
- {
- s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
- l_eff = ht_temp;
- }
-
- input_nand(s2, s3, l_eff);
- }
- }
-
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
}
@@ -452,190 +497,198 @@ Htree2::in_htree()
* hor. links left. After this it goes through the remaining vertical
* links.
*/
-void Htree2::out_htree()
-{
- //temp var
- double s1 = 0, s2 = 0, s3 = 0;
- double l_eff = 0;
- Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
- double len = 0, ht = 0;
- int option = 0;
-
- int h = (int) _log2(ndwl/2);
- int v = (int) _log2(ndbl/2);
- double len_temp;
- double ht_temp;
- if (uca_tree)
- {
- ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,h))))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,v))))/2;
- }
- else
- {
- if (ndwl == ndbl) {
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
-
+void Htree2::out_htree() {
+ //temp var
+ double s1 = 0, s2 = 0, s3 = 0;
+ double l_eff = 0;
+ Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
+ double len = 0, ht = 0;
+ int option = 0;
+
+ int h = (int) _log2(ndwl / 2);
+ int v = (int) _log2(ndbl / 2);
+ double len_temp;
+ double ht_temp;
+ if (uca_tree) {
+ ht_temp = (mat_height * ndbl / 2 +
+ /* since uca_tree models interbank tree,
+ mat_height => bank height */
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, h)))) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, v)))) / 2;
+ } else {
+ if (ndwl == ndbl) {
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) *
+ (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) * (ndwl / 2 - 1) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+
+ } else if (ndwl > ndbl) {
+ double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndbl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+ } else {
+ double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (h + 2 * (1 - pow(0.5, v - h)))) / 2;
+ }
}
- else if (ndwl > ndbl) {
- double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
- (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else {
- double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
+ area.h = ht_temp * 2;
+ area.w = len_temp * 2;
+ delay = 0;
+ power.readOp.dynamic = 0;
+ power.readOp.leakage = 0;
+ power.readOp.gate_leakage = 0;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ len = len_temp;
+ ht = ht_temp / 2;
+
+ while (v > 0 || h > 0) { //finds delay/power of each link in the tree
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+
+ if (h > v) {
+ //the iteration considers only one horizontal link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, len / 2); // ver
+ len_temp = len;
+ len /= 2;
+ wtemp3 = 0;
+ h--;
+ option = 0;
+ } else if (v > 0 && h > 0) {
+ //considers one horizontal link and one vertical link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, ht); // ver
+ wtemp3 = new Wire(wt, len / 2); // next hor
+ len_temp = len;
+ ht_temp = ht;
+ len /= 2;
+ ht /= 2;
+ v--;
+ h--;
+ option = 1;
+ } else {
+ // considers only one vertical link
+ assert(h == 0);
+ wtemp1 = new Wire(wt, ht); // hor
+ wtemp2 = new Wire(wt, ht / 2); // ver
+ ht_temp = ht;
+ ht /= 2;
+ wtemp3 = 0;
+ v--;
+ option = 2;
+ }
+ delay += wtemp1->delay;
+ power.readOp.dynamic += wtemp1->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw;
+ power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
+ if ((uca_tree == false && option == 2) || search_tree == true) {
+ wire_bw *= 2;
+ }
+
+ if (uca_tree == false) {
+ if (len_temp > wtemp1->repeater_spacing) {
+ s1 = wtemp1->repeater_size;
+ l_eff = wtemp1->repeater_spacing;
+ } else {
+ s1 = (len_temp / wtemp1->repeater_spacing) *
+ wtemp1->repeater_size;
+ l_eff = len_temp;
+ }
+ if (ht_temp > wtemp2->repeater_spacing) {
+ s2 = wtemp2->repeater_size;
+ } else {
+ s2 = (len_temp / wtemp2->repeater_spacing) *
+ wtemp2->repeater_size;
+ }
+ // first level
+ output_buffer(s1, s2, l_eff);
+ }
+
+
+ if (option != 1) {
+ continue;
+ }
+
+ // second level
+ delay += wtemp2->delay;
+ power.readOp.dynamic += wtemp2->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw;
+ power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ if (uca_tree) {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ } else {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ wire_bw *= 2;
+
+ if (ht_temp > wtemp3->repeater_spacing) {
+ s3 = wtemp3->repeater_size;
+ l_eff = wtemp3->repeater_spacing;
+ } else {
+ s3 = (len_temp / wtemp3->repeater_spacing) *
+ wtemp3->repeater_size;
+ l_eff = ht_temp;
+ }
+
+ output_buffer(s2, s3, l_eff);
+ }
+ //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
}
- }
- area.h = ht_temp * 2;
- area.w = len_temp * 2;
- delay = 0;
- power.readOp.dynamic = 0;
- power.readOp.leakage = 0;
- power.readOp.gate_leakage = 0;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- len = len_temp;
- ht = ht_temp/2;
-
- while (v > 0 || h > 0)
- { //finds delay/power of each link in the tree
+
if (wtemp1) delete wtemp1;
if (wtemp2) delete wtemp2;
if (wtemp3) delete wtemp3;
-
- if(h > v) {
- //the iteration considers only one horizontal link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, len/2); // ver
- len_temp = len;
- len /= 2;
- wtemp3 = 0;
- h--;
- option = 0;
- }
- else if (v>0 && h>0) {
- //considers one horizontal link and one vertical link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, ht); // ver
- wtemp3 = new Wire(wt, len/2); // next hor
- len_temp = len;
- ht_temp = ht;
- len /= 2;
- ht /= 2;
- v--;
- h--;
- option = 1;
- }
- else {
- // considers only one vertical link
- assert(h == 0);
- wtemp1 = new Wire(wt, ht); // hor
- wtemp2 = new Wire(wt, ht/2); // ver
- ht_temp = ht;
- ht /= 2;
- wtemp3 = 0;
- v--;
- option = 2;
- }
- delay += wtemp1->delay;
- power.readOp.dynamic += wtemp1->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
- power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- if ((uca_tree == false && option == 2) || search_tree==true)
- {
- wire_bw*=2;
- }
-
- if (uca_tree == false)
- {
- if (len_temp > wtemp1->repeater_spacing)
- {
- s1 = wtemp1->repeater_size;
- l_eff = wtemp1->repeater_spacing;
- }
- else
- {
- s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
- l_eff = len_temp;
- }
- if (ht_temp > wtemp2->repeater_spacing)
- {
- s2 = wtemp2->repeater_size;
- }
- else
- {
- s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
- }
- // first level
- output_buffer(s1, s2, l_eff);
- }
-
-
- if (option != 1)
- {
- continue;
- }
-
- // second level
- delay += wtemp2->delay;
- power.readOp.dynamic += wtemp2->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
- power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- if (uca_tree)
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- }
- else
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- wire_bw*=2;
-
- if (ht_temp > wtemp3->repeater_spacing)
- {
- s3 = wtemp3->repeater_size;
- l_eff = wtemp3->repeater_spacing;
- }
- else
- {
- s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
- l_eff = ht_temp;
- }
-
- output_buffer(s2, s3, l_eff);
- }
- //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
- }
-
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
}
diff --git a/ext/mcpat/cacti/htree2.h b/ext/mcpat/cacti/htree2.h
index 053e43a27..cae71c62e 100644
--- a/ext/mcpat/cacti/htree2.h
+++ b/ext/mcpat/cacti/htree2.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -44,13 +45,12 @@
// leakge power includes entire htree in a bank (when uca_tree == false)
// leakge power includes only part to one bank when uca_tree == true
-class Htree2 : public Component
-{
- public:
+class Htree2 : public Component {
+public:
Htree2(enum Wire_type wire_model,
- double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
- enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
+ enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
~Htree2() {};
void in_htree();
@@ -64,16 +64,15 @@ class Htree2 : public Component
double in_rise_time, out_rise_time;
- void set_in_rise_time(double rt)
- {
- in_rise_time = rt;
+ void set_in_rise_time(double rt) {
+ in_rise_time = rt;
}
double max_unpipelined_link_delay;
powerDef power_bit;
- private:
+private:
double wire_bw;
double init_wire_bw; // bus width at root
enum Htree_type tree_type;
@@ -81,7 +80,11 @@ class Htree2 : public Component
double htree_vnodes;
double mat_width;
double mat_height;
- int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
+ int add_bits;
+ int data_in_bits;
+ int search_data_in_bits;
+ int data_out_bits;
+ int search_data_out_bits;
int ndbl, ndwl;
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
bool search_tree;
diff --git a/ext/mcpat/cacti/io.cc b/ext/mcpat/cacti/io.cc
index 56725ab7c..c3035d70f 100644
--- a/ext/mcpat/cacti/io.cc
+++ b/ext/mcpat/cacti/io.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -49,629 +50,578 @@ using namespace std;
/* Parses "cache.cfg" file */
- void
-InputParameter::parse_cfg(const string & in_file)
-{
- FILE *fp = fopen(in_file.c_str(), "r");
- char line[5000];
- char jk[5000];
- char temp_var[5000];
-
- if(!fp) {
- cout << in_file << " is missing!\n";
- exit(-1);
- }
+void
+InputParameter::parse_cfg(const string & in_file) {
+ FILE *fp = fopen(in_file.c_str(), "r");
+ char line[5000];
+ char jk[5000];
+ char temp_var[5000];
+
+ if (!fp) {
+ cout << in_file << " is missing!\n";
+ exit(-1);
+ }
- while(fscanf(fp, "%[^\n]\n", line) != EOF) {
+ while (fscanf(fp, "%[^\n]\n", line) != EOF) {
- if (!strncmp("-size", line, strlen("-size"))) {
- sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
- continue;
- }
+ if (!strncmp("-size", line, strlen("-size"))) {
+ sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
+ continue;
+ }
- if (!strncmp("-page size", line, strlen("-page size"))) {
- sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
- continue;
- }
+ if (!strncmp("-page size", line, strlen("-page size"))) {
+ sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
+ continue;
+ }
- if (!strncmp("-burst length", line, strlen("-burst length"))) {
- sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
- continue;
- }
+ if (!strncmp("-burst length", line, strlen("-burst length"))) {
+ sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
+ continue;
+ }
- if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
- sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
- continue;
- }
+ if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
+ sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
+ continue;
+ }
- if (!strncmp("-block", line, strlen("-block"))) {
- sscanf(line, "-block size (bytes) %d", &(line_sz));
- continue;
- }
+ if (!strncmp("-block", line, strlen("-block"))) {
+ sscanf(line, "-block size (bytes) %d", &(line_sz));
+ continue;
+ }
- if (!strncmp("-associativity", line, strlen("-associativity"))) {
- sscanf(line, "-associativity %d", &(assoc));
- continue;
- }
+ if (!strncmp("-associativity", line, strlen("-associativity"))) {
+ sscanf(line, "-associativity %d", &(assoc));
+ continue;
+ }
- if (!strncmp("-read-write", line, strlen("-read-write"))) {
- sscanf(line, "-read-write port %d", &(num_rw_ports));
- continue;
- }
+ if (!strncmp("-read-write", line, strlen("-read-write"))) {
+ sscanf(line, "-read-write port %d", &(num_rw_ports));
+ continue;
+ }
- if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
- sscanf(line, "-exclusive read port %d", &(num_rd_ports));
- continue;
- }
+ if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
+ sscanf(line, "-exclusive read port %d", &(num_rd_ports));
+ continue;
+ }
- if(!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
- sscanf(line, "-exclusive write port %d", &(num_wr_ports));
- continue;
- }
+ if (!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
+ sscanf(line, "-exclusive write port %d", &(num_wr_ports));
+ continue;
+ }
- if (!strncmp("-single ended", line, strlen("-single ended"))) {
- sscanf(line, "-single %[(:-~)*]%d", jk,
- &(num_se_rd_ports));
- continue;
- }
+ if (!strncmp("-single ended", line, strlen("-single ended"))) {
+ sscanf(line, "-single %[(:-~)*]%d", jk,
+ &(num_se_rd_ports));
+ continue;
+ }
- if (!strncmp("-search", line, strlen("-search"))) {
- sscanf(line, "-search port %d", &(num_search_ports));
- continue;
- }
+ if (!strncmp("-search", line, strlen("-search"))) {
+ sscanf(line, "-search port %d", &(num_search_ports));
+ continue;
+ }
- if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
- sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
- continue;
- }
+ if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
+ sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
+ continue;
+ }
- if (!strncmp("-technology", line, strlen("-technology"))) {
- sscanf(line, "-technology (u) %lf", &(F_sz_um));
- F_sz_nm = F_sz_um*1000;
- continue;
- }
+ if (!strncmp("-technology", line, strlen("-technology"))) {
+ sscanf(line, "-technology (u) %lf", &(F_sz_um));
+ F_sz_nm = F_sz_um * 1000;
+ continue;
+ }
- if (!strncmp("-output/input", line, strlen("-output/input"))) {
- sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
- continue;
- }
+ if (!strncmp("-output/input", line, strlen("-output/input"))) {
+ sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
+ continue;
+ }
- if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
- sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
- continue;
- }
+ if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
+ sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
+ continue;
+ }
- if (!strncmp("-cache type", line, strlen("-cache type"))) {
- sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("cache", temp_var, sizeof("cache"))) {
- is_cache = true;
- }
- else
- {
- is_cache = false;
- }
-
- if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
- is_main_mem = true;
- }
- else {
- is_main_mem = false;
- }
-
- if (!strncmp("cam", temp_var, sizeof("cam"))) {
- pure_cam = true;
- }
- else {
- pure_cam = false;
- }
-
- if (!strncmp("ram", temp_var, sizeof("ram"))) {
- pure_ram = true;
- }
- else {
- if (!is_main_mem)
- pure_ram = false;
- else
- pure_ram = true;
- }
-
- continue;
- }
+ if (!strncmp("-cache type", line, strlen("-cache type"))) {
+ sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("cache", temp_var, sizeof("cache"))) {
+ is_cache = true;
+ } else {
+ is_cache = false;
+ }
+
+ if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
+ is_main_mem = true;
+ } else {
+ is_main_mem = false;
+ }
+
+ if (!strncmp("cam", temp_var, sizeof("cam"))) {
+ pure_cam = true;
+ } else {
+ pure_cam = false;
+ }
+
+ if (!strncmp("ram", temp_var, sizeof("ram"))) {
+ pure_ram = true;
+ } else {
+ if (!is_main_mem)
+ pure_ram = false;
+ else
+ pure_ram = true;
+ }
+
+ continue;
+ }
- if (!strncmp("-tag size", line, strlen("-tag size"))) {
- sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_tag = false;
- tag_w = 42; /* the acutal value is calculated
+ if (!strncmp("-tag size", line, strlen("-tag size"))) {
+ sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("default", temp_var, sizeof("default"))) {
+ specific_tag = false;
+ tag_w = 42; /* the acutal value is calculated
* later based on the cache size, bank count, and associativity
*/
- }
- else {
- specific_tag = true;
- sscanf(line, "-tag size (b) %d", &(tag_w));
- }
- continue;
- }
+ } else {
+ specific_tag = true;
+ sscanf(line, "-tag size (b) %d", &(tag_w));
+ }
+ continue;
+ }
- if (!strncmp("-access mode", line, strlen("-access mode"))) {
- sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("fast", temp_var, strlen("fast"))) {
- access_mode = 2;
- }
- else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
- access_mode = 1;
- }
- else if(!strncmp("normal", temp_var, strlen("normal"))) {
- access_mode = 0;
- }
- else {
- cout << "ERROR: Invalid access mode!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-access mode", line, strlen("-access mode"))) {
+ sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("fast", temp_var, strlen("fast"))) {
+ access_mode = 2;
+ } else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
+ access_mode = 1;
+ } else if (!strncmp("normal", temp_var, strlen("normal"))) {
+ access_mode = 0;
+ } else {
+ cout << "ERROR: Invalid access mode!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) {
- sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- data_arr_ram_cell_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- data_arr_ram_cell_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- data_arr_ram_cell_tech_type = 2;
- }
- else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
- data_arr_ram_cell_tech_type = 3;
- }
- else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
- data_arr_ram_cell_tech_type = 4;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-Data array cell type", line,
+ strlen("-Data array cell type"))) {
+ sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ data_arr_ram_cell_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ data_arr_ram_cell_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ data_arr_ram_cell_tech_type = 2;
+ } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
+ data_arr_ram_cell_tech_type = 3;
+ } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
+ data_arr_ram_cell_tech_type = 4;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
- sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- data_arr_peri_global_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- data_arr_peri_global_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- data_arr_peri_global_tech_type = 2;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
+ sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ data_arr_peri_global_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ data_arr_peri_global_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ data_arr_peri_global_tech_type = 2;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
- sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- tag_arr_ram_cell_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- tag_arr_ram_cell_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- tag_arr_ram_cell_tech_type = 2;
- }
- else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
- tag_arr_ram_cell_tech_type = 3;
- }
- else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
- tag_arr_ram_cell_tech_type = 4;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
+ sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ tag_arr_ram_cell_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ tag_arr_ram_cell_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ tag_arr_ram_cell_tech_type = 2;
+ } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
+ tag_arr_ram_cell_tech_type = 3;
+ } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
+ tag_arr_ram_cell_tech_type = 4;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
- sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- tag_arr_peri_global_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- tag_arr_peri_global_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- tag_arr_peri_global_tech_type = 2;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
- if(!strncmp("-design", line, strlen("-design"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_wt), &(dynamic_power_wt),
- &(leakage_power_wt),
- &(cycle_time_wt), &(area_wt));
- continue;
- }
+ if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
+ sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ tag_arr_peri_global_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ tag_arr_peri_global_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ tag_arr_peri_global_tech_type = 2;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
+ if (!strncmp("-design", line, strlen("-design"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_wt), &(dynamic_power_wt),
+ &(leakage_power_wt),
+ &(cycle_time_wt), &(area_wt));
+ continue;
+ }
- if(!strncmp("-deviate", line, strlen("-deviate"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_dev), &(dynamic_power_dev),
- &(leakage_power_dev),
- &(cycle_time_dev), &(area_dev));
- continue;
- }
+ if (!strncmp("-deviate", line, strlen("-deviate"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_dev), &(dynamic_power_dev),
+ &(leakage_power_dev),
+ &(cycle_time_dev), &(area_dev));
+ continue;
+ }
- if(!strncmp("-Optimize", line, strlen("-Optimize"))) {
- sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("ED^2", temp_var, strlen("ED^2"))) {
- ed = 2;
- }
- else if(!strncmp("ED", temp_var, strlen("ED"))) {
- ed = 1;
- }
- else {
- ed = 0;
- }
- }
+ if (!strncmp("-Optimize", line, strlen("-Optimize"))) {
+ sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_wt_nuca), &(dynamic_power_wt_nuca),
- &(leakage_power_wt_nuca),
- &(cycle_time_wt_nuca), &(area_wt_nuca));
- continue;
- }
+ if (!strncmp("ED^2", temp_var, strlen("ED^2"))) {
+ ed = 2;
+ } else if (!strncmp("ED", temp_var, strlen("ED"))) {
+ ed = 1;
+ } else {
+ ed = 0;
+ }
+ }
- if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_dev_nuca), &(dynamic_power_dev_nuca),
- &(leakage_power_dev_nuca),
- &(cycle_time_dev_nuca), &(area_dev_nuca));
- continue;
- }
+ if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_wt_nuca), &(dynamic_power_wt_nuca),
+ &(leakage_power_wt_nuca),
+ &(cycle_time_wt_nuca), &(area_wt_nuca));
+ continue;
+ }
- if(!strncmp("-Cache model", line, strlen("-cache model"))) {
- sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_dev_nuca), &(dynamic_power_dev_nuca),
+ &(leakage_power_dev_nuca),
+ &(cycle_time_dev_nuca), &(area_dev_nuca));
+ continue;
+ }
- if (!strncmp("UCA", temp_var, strlen("UCA"))) {
- nuca = 0;
- }
- else {
- nuca = 1;
- }
- continue;
- }
+ if (!strncmp("-Cache model", line, strlen("-cache model"))) {
+ sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
- sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
+ if (!strncmp("UCA", temp_var, strlen("UCA"))) {
+ nuca = 0;
+ } else {
+ nuca = 1;
+ }
+ continue;
+ }
- if (nuca_bank_count != 0) {
- force_nuca_bank = 1;
- }
- continue;
- }
+ if (!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
+ sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
- if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("global", temp_var, strlen("global"))) {
- wire_is_mat_type = 2;
- continue;
- }
- else if (!strncmp("local", temp_var, strlen("local"))) {
- wire_is_mat_type = 0;
- continue;
- }
- else {
- wire_is_mat_type = 1;
- continue;
- }
- }
+ if (nuca_bank_count != 0) {
+ force_nuca_bank = 1;
+ }
+ continue;
+ }
- if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("global", temp_var, strlen("global"))) {
+ wire_is_mat_type = 2;
+ continue;
+ } else if (!strncmp("local", temp_var, strlen("local"))) {
+ wire_is_mat_type = 0;
+ continue;
+ } else {
+ wire_is_mat_type = 1;
+ continue;
+ }
+ }
- if (!strncmp("global", temp_var, strlen("global"))) {
- wire_os_mat_type = 2;
- }
- else {
- wire_os_mat_type = 1;
- }
- continue;
- }
+ if (!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
- sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("global", temp_var, strlen("global"))) {
+ wire_os_mat_type = 2;
+ } else {
+ wire_os_mat_type = 1;
+ }
+ continue;
+ }
- if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
- ic_proj_type = 0;
- }
- else {
- ic_proj_type = 1;
- }
- continue;
- }
+ if (!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
+ sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("default", temp_var, strlen("default"))) {
- force_wiretype = 0;
- wt = Global;
- }
- else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
- force_wiretype = 1;
- wt = Global_10;
- }
- else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
- force_wiretype = 1;
- wt = Global_20;
- }
- else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
- force_wiretype = 1;
- wt = Global_30;
- }
- else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
- force_wiretype = 1;
- wt = Global_5;
- }
- else if (!(strncmp("Global", temp_var, strlen("Global")))) {
- force_wiretype = 1;
- wt = Global;
- }
- else {
- wt = Low_swing;
- force_wiretype = 1;
- }
- continue;
- }
+ if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
+ ic_proj_type = 0;
+ } else {
+ ic_proj_type = 1;
+ }
+ continue;
+ }
+ if (!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("default", temp_var, strlen("default"))) {
+ force_wiretype = 0;
+ wt = Global;
+ } else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
+ force_wiretype = 1;
+ wt = Global_10;
+ } else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
+ force_wiretype = 1;
+ wt = Global_20;
+ } else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
+ force_wiretype = 1;
+ wt = Global_30;
+ } else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
+ force_wiretype = 1;
+ wt = Global_5;
+ } else if (!(strncmp("Global", temp_var, strlen("Global")))) {
+ force_wiretype = 1;
+ wt = Global;
+ } else {
+ wt = Low_swing;
+ force_wiretype = 1;
+ }
+ continue;
+ }
- if(!strncmp("-Core", line, strlen("-Core"))) {
- sscanf(line, "-Core count %d\n", &(cores));
- if (cores > 16) {
- printf("No. of cores should be less than 16!\n");
- }
- continue;
- }
- if(!strncmp("-Cache level", line, strlen("-Cache level"))) {
- sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("L2", temp_var, strlen("L2"))) {
- cache_level = 0;
- }
- else {
- cache_level = 1;
- }
- }
+ if (!strncmp("-Core", line, strlen("-Core"))) {
+ sscanf(line, "-Core count %d\n", &(cores));
+ if (cores > 16) {
+ printf("No. of cores should be less than 16!\n");
+ }
+ continue;
+ }
- if(!strncmp("-Print level", line, strlen("-Print level"))) {
- sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
- print_detail = 1;
- }
- else {
- print_detail = 0;
- }
+ if (!strncmp("-Cache level", line, strlen("-Cache level"))) {
+ sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("L2", temp_var, strlen("L2"))) {
+ cache_level = 0;
+ } else {
+ cache_level = 1;
+ }
+ }
- }
- if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
- sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- add_ecc_b_ = true;
- }
- else {
- add_ecc_b_ = false;
- }
- }
+ if (!strncmp("-Print level", line, strlen("-Print level"))) {
+ sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
+ print_detail = 1;
+ } else {
+ print_detail = 0;
+ }
- if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
- sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- print_input_args = true;
- }
- else {
- print_input_args = false;
- }
- }
+ }
+ if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
+ sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ add_ecc_b_ = true;
+ } else {
+ add_ecc_b_ = false;
+ }
+ }
- if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
- sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- force_cache_config = true;
- }
- else {
- force_cache_config = false;
- }
- }
+ if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
+ sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ print_input_args = true;
+ } else {
+ print_input_args = false;
+ }
+ }
- if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
- sscanf(line, "-Ndbl %d\n", &(ndbl));
- continue;
- }
- if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
- sscanf(line, "-Ndwl %d\n", &(ndwl));
- continue;
- }
- if(!strncmp("-Nspd", line, strlen("-Nspd"))) {
- sscanf(line, "-Nspd %d\n", &(nspd));
- continue;
- }
- if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
- sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
- continue;
- }
- if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
- sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
- continue;
- }
- if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
- sscanf(line, "-Ndcm %d\n", &(ndcm));
- continue;
- }
+ if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
+ sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ force_cache_config = true;
+ } else {
+ force_cache_config = false;
+ }
+ }
- }
- rpters_in_htree = true;
- fclose(fp);
+ if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
+ sscanf(line, "-Ndbl %d\n", &(ndbl));
+ continue;
+ }
+ if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
+ sscanf(line, "-Ndwl %d\n", &(ndwl));
+ continue;
+ }
+ if (!strncmp("-Nspd", line, strlen("-Nspd"))) {
+ sscanf(line, "-Nspd %d\n", &(nspd));
+ continue;
+ }
+ if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
+ sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
+ continue;
+ }
+ if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
+ sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
+ continue;
+ }
+ if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
+ sscanf(line, "-Ndcm %d\n", &(ndcm));
+ continue;
+ }
+
+ }
+ rpters_in_htree = true;
+ fclose(fp);
}
- void
-InputParameter::display_ip()
-{
- cout << "Cache size : " << cache_sz << endl;
- cout << "Block size : " << line_sz << endl;
- cout << "Associativity : " << assoc << endl;
- cout << "Read only ports : " << num_rd_ports << endl;
- cout << "Write only ports : " << num_wr_ports << endl;
- cout << "Read write ports : " << num_rw_ports << endl;
- cout << "Single ended read ports : " << num_se_rd_ports << endl;
- if (fully_assoc||pure_cam)
- {
- cout << "Search ports : " << num_search_ports << endl;
- }
- cout << "Cache banks (UCA) : " << nbanks << endl;
- cout << "Technology : " << F_sz_um << endl;
- cout << "Temperature : " << temp << endl;
- cout << "Tag size : " << tag_w << endl;
- if (is_cache) {
- cout << "array type : " << "Cache" << endl;
- }
- if (pure_ram) {
- cout << "array type : " << "Scratch RAM" << endl;
- }
- if (pure_cam)
- {
- cout << "array type : " << "CAM" << endl;
- }
- cout << "Model as memory : " << is_main_mem << endl;
- cout << "Access mode : " << access_mode << endl;
- cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
- cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
- cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
- cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
- cout << "Optimization target : " << ed << endl;
- cout << "Design objective (UCA wt) : " << delay_wt << " "
- << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
- << " " << area_wt << endl;
- cout << "Design objective (UCA dev) : " << delay_dev << " "
- << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
- << " " << area_dev << endl;
- if (nuca)
- {
- cout << "Cores : " << cores << endl;
-
-
- cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
- << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
- << " " << area_wt_nuca << endl;
- cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
- << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
- << " " << area_dev_nuca << endl;
+void
+InputParameter::display_ip() {
+ cout << "Cache size : " << cache_sz << endl;
+ cout << "Block size : " << line_sz << endl;
+ cout << "Associativity : " << assoc << endl;
+ cout << "Read only ports : " << num_rd_ports << endl;
+ cout << "Write only ports : " << num_wr_ports << endl;
+ cout << "Read write ports : " << num_rw_ports << endl;
+ cout << "Single ended read ports : " << num_se_rd_ports << endl;
+ if (fully_assoc || pure_cam) {
+ cout << "Search ports : " << num_search_ports << endl;
+ }
+ cout << "Cache banks (UCA) : " << nbanks << endl;
+ cout << "Technology : " << F_sz_um << endl;
+ cout << "Temperature : " << temp << endl;
+ cout << "Tag size : " << tag_w << endl;
+ if (is_cache) {
+ cout << "array type : " << "Cache" << endl;
+ }
+ if (pure_ram) {
+ cout << "array type : " << "Scratch RAM" << endl;
+ }
+ if (pure_cam) {
+ cout << "array type : " << "CAM" << endl;
+ }
+ cout << "Model as memory : " << is_main_mem << endl;
+ cout << "Access mode : " << access_mode << endl;
+ cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
+ cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
+ cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
+ cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
+ cout << "Optimization target : " << ed << endl;
+ cout << "Design objective (UCA wt) : " << delay_wt << " "
+ << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
+ << " " << area_wt << endl;
+ cout << "Design objective (UCA dev) : " << delay_dev << " "
+ << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
+ << " " << area_dev << endl;
+ if (nuca) {
+ cout << "Cores : " << cores << endl;
+
+
+ cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
+ << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
+ << " " << area_wt_nuca << endl;
+ cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
+ << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
+ << " " << area_dev_nuca << endl;
+ }
+ cout << "Cache model : " << nuca << endl;
+ cout << "Nuca bank : " << nuca_bank_count << endl;
+ cout << "Wire inside mat : " << wire_is_mat_type << endl;
+ cout << "Wire outside mat : " << wire_os_mat_type << endl;
+ cout << "Interconnect projection : " << ic_proj_type << endl;
+ cout << "Wire signalling : " << force_wiretype << endl;
+ cout << "Print level : " << print_detail << endl;
+ cout << "ECC overhead : " << add_ecc_b_ << endl;
+ cout << "Page size : " << page_sz_bits << endl;
+ cout << "Burst length : " << burst_len << endl;
+ cout << "Internal prefetch width : " << int_prefetch_w << endl;
+ cout << "Force cache config : " << g_ip->force_cache_config << endl;
+ if (g_ip->force_cache_config) {
+ cout << "Ndwl : " << g_ip->ndwl << endl;
+ cout << "Ndbl : " << g_ip->ndbl << endl;
+ cout << "Nspd : " << g_ip->nspd << endl;
+ cout << "Ndcm : " << g_ip->ndcm << endl;
+ cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
+ cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
}
- cout << "Cache model : " << nuca << endl;
- cout << "Nuca bank : " << nuca_bank_count << endl;
- cout << "Wire inside mat : " << wire_is_mat_type << endl;
- cout << "Wire outside mat : " << wire_os_mat_type << endl;
- cout << "Interconnect projection : " << ic_proj_type << endl;
- cout << "Wire signalling : " << force_wiretype << endl;
- cout << "Print level : " << print_detail << endl;
- cout << "ECC overhead : " << add_ecc_b_ << endl;
- cout << "Page size : " << page_sz_bits << endl;
- cout << "Burst length : " << burst_len << endl;
- cout << "Internal prefetch width : " << int_prefetch_w << endl;
- cout << "Force cache config : " << g_ip->force_cache_config << endl;
- if (g_ip->force_cache_config) {
- cout << "Ndwl : " << g_ip->ndwl << endl;
- cout << "Ndbl : " << g_ip->ndbl << endl;
- cout << "Nspd : " << g_ip->nspd << endl;
- cout << "Ndcm : " << g_ip->ndcm << endl;
- cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
- cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
- }
}
-powerComponents operator+(const powerComponents & x, const powerComponents & y)
-{
- powerComponents z;
+powerComponents operator+(const powerComponents & x, const powerComponents & y) {
+ powerComponents z;
- z.dynamic = x.dynamic + y.dynamic;
- z.leakage = x.leakage + y.leakage;
- z.gate_leakage = x.gate_leakage + y.gate_leakage;
- z.short_circuit = x.short_circuit + y.short_circuit;
- z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
+ z.dynamic = x.dynamic + y.dynamic;
+ z.leakage = x.leakage + y.leakage;
+ z.gate_leakage = x.gate_leakage + y.gate_leakage;
+ z.short_circuit = x.short_circuit + y.short_circuit;
+ z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
- return z;
+ return z;
}
-powerComponents operator*(const powerComponents & x, double const * const y)
-{
- powerComponents z;
+powerComponents operator*(const powerComponents & x, double const * const y) {
+ powerComponents z;
- z.dynamic = x.dynamic*y[0];
- z.leakage = x.leakage*y[1];
- z.gate_leakage = x.gate_leakage*y[2];
- z.short_circuit = x.short_circuit*y[3];
- z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage
+ z.dynamic = x.dynamic * y[0];
+ z.leakage = x.leakage * y[1];
+ z.gate_leakage = x.gate_leakage * y[2];
+ z.short_circuit = x.short_circuit * y[3];
+ //longer channel leakage has the same behavior as normal leakage
+ z.longer_channel_leakage = x.longer_channel_leakage * y[1];
- return z;
+ return z;
}
-powerDef operator+(const powerDef & x, const powerDef & y)
-{
- powerDef z;
+powerDef operator+(const powerDef & x, const powerDef & y) {
+ powerDef z;
- z.readOp = x.readOp + y.readOp;
- z.writeOp = x.writeOp + y.writeOp;
- z.searchOp = x.searchOp + y.searchOp;
- return z;
+ z.readOp = x.readOp + y.readOp;
+ z.writeOp = x.writeOp + y.writeOp;
+ z.searchOp = x.searchOp + y.searchOp;
+ return z;
}
-powerDef operator*(const powerDef & x, double const * const y)
-{
- powerDef z;
+powerDef operator*(const powerDef & x, double const * const y) {
+ powerDef z;
- z.readOp = x.readOp*y;
- z.writeOp = x.writeOp*y;
- z.searchOp = x.searchOp*y;
- return z;
+ z.readOp = x.readOp * y;
+ z.writeOp = x.writeOp * y;
+ z.searchOp = x.searchOp * y;
+ return z;
}
-uca_org_t cacti_interface(const string & infile_name)
-{
+uca_org_t cacti_interface(const string & infile_name) {
- uca_org_t fin_res;
- //uca_org_t result;
- fin_res.valid = false;
+ uca_org_t fin_res;
+ //uca_org_t result;
+ fin_res.valid = false;
- g_ip = new InputParameter();
- g_ip->parse_cfg(infile_name);
- if(!g_ip->error_checking())
- exit(0);
- if (g_ip->print_input_args)
- g_ip->display_ip();
+ g_ip = new InputParameter();
+ g_ip->parse_cfg(infile_name);
+ if (!g_ip->error_checking(infile_name))
+ exit(0);
+ if (g_ip->print_input_args)
+ g_ip->display_ip();
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
// For HighRadix Only
@@ -703,19 +653,18 @@ uca_org_t cacti_interface(const string & infile_name)
// exit(0);
// For HighRadix Only End
- if (g_ip->nuca == 1)
- {
- Nuca n(&g_tp.peri_global);
- n.sim_nuca();
- }
- g_ip->display_ip();
- solve(&fin_res);
+ if (g_ip->nuca == 1) {
+ Nuca n(&g_tp.peri_global);
+ n.sim_nuca();
+ }
+ g_ip->display_ip();
+ solve(&fin_res);
- output_UCA(&fin_res);
- output_data_csv(fin_res);
+ output_UCA(&fin_res);
+ output_data_csv(fin_res);
- delete (g_ip);
- return fin_res;
+ delete (g_ip);
+ return fin_res;
}
//cacti6.5's plain interface, please keep !!!
@@ -773,142 +722,139 @@ uca_org_t cacti_interface(
int nuca_dev_func_area,
int nuca_dev_func_cycle_time,
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
- int p_input)
-{
- g_ip = new InputParameter();
- g_ip->add_ecc_b_ = true;
-
- g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
- g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
- g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
- g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-
- g_ip->ic_proj_type = interconnect_projection_type_in;
- g_ip->wire_is_mat_type = wire_inside_mat_type_in;
- g_ip->wire_os_mat_type = wire_outside_mat_type_in;
- g_ip->burst_len = burst_length;
- g_ip->int_prefetch_w = pre_width;
- g_ip->page_sz_bits = page_sz;
-
- g_ip->cache_sz = cache_size;
- g_ip->line_sz = line_size;
- g_ip->assoc = associativity;
- g_ip->nbanks = banks;
- g_ip->out_w = output_width;
- g_ip->specific_tag = specific_tag;
- if (tag_width == 0) {
- g_ip->tag_w = 42;
- }
- else {
- g_ip->tag_w = tag_width;
- }
-
- g_ip->access_mode = access_mode;
- g_ip->delay_wt = obj_func_delay;
- g_ip->dynamic_power_wt = obj_func_dynamic_power;
- g_ip->leakage_power_wt = obj_func_leakage_power;
- g_ip->area_wt = obj_func_area;
- g_ip->cycle_time_wt = obj_func_cycle_time;
- g_ip->delay_dev = dev_func_delay;
- g_ip->dynamic_power_dev = dev_func_dynamic_power;
- g_ip->leakage_power_dev = dev_func_leakage_power;
- g_ip->area_dev = dev_func_area;
- g_ip->cycle_time_dev = dev_func_cycle_time;
- g_ip->ed = ed_ed2_none;
-
- switch(wt) {
+ int p_input) {
+ g_ip = new InputParameter();
+ g_ip->add_ecc_b_ = true;
+
+ g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+ g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+ g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+ g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+
+ g_ip->ic_proj_type = interconnect_projection_type_in;
+ g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+ g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+ g_ip->burst_len = burst_length;
+ g_ip->int_prefetch_w = pre_width;
+ g_ip->page_sz_bits = page_sz;
+
+ g_ip->cache_sz = cache_size;
+ g_ip->line_sz = line_size;
+ g_ip->assoc = associativity;
+ g_ip->nbanks = banks;
+ g_ip->out_w = output_width;
+ g_ip->specific_tag = specific_tag;
+ if (tag_width == 0) {
+ g_ip->tag_w = 42;
+ } else {
+ g_ip->tag_w = tag_width;
+ }
+
+ g_ip->access_mode = access_mode;
+ g_ip->delay_wt = obj_func_delay;
+ g_ip->dynamic_power_wt = obj_func_dynamic_power;
+ g_ip->leakage_power_wt = obj_func_leakage_power;
+ g_ip->area_wt = obj_func_area;
+ g_ip->cycle_time_wt = obj_func_cycle_time;
+ g_ip->delay_dev = dev_func_delay;
+ g_ip->dynamic_power_dev = dev_func_dynamic_power;
+ g_ip->leakage_power_dev = dev_func_leakage_power;
+ g_ip->area_dev = dev_func_area;
+ g_ip->cycle_time_dev = dev_func_cycle_time;
+ g_ip->ed = ed_ed2_none;
+
+ switch (wt) {
case (0):
- g_ip->force_wiretype = 0;
- g_ip->wt = Global;
- break;
+ g_ip->force_wiretype = 0;
+ g_ip->wt = Global;
+ break;
case (1):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global;
+ break;
case (2):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_5;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_5;
+ break;
case (3):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_10;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_10;
+ break;
case (4):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_20;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_20;
+ break;
case (5):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_30;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_30;
+ break;
case (6):
- g_ip->force_wiretype = 1;
- g_ip->wt = Low_swing;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Low_swing;
+ break;
default:
- cout << "Unknown wire type!\n";
- exit(0);
- }
-
- g_ip->delay_wt_nuca = nuca_obj_func_delay;
- g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
- g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
- g_ip->area_wt_nuca = nuca_obj_func_area;
- g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
- g_ip->delay_dev_nuca = dev_func_delay;
- g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
- g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
- g_ip->area_dev_nuca = nuca_dev_func_area;
- g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
- g_ip->nuca = is_nuca;
- g_ip->nuca_bank_count = nuca_bank_count;
- if(nuca_bank_count > 0) {
- g_ip->force_nuca_bank = 1;
- }
- g_ip->cores = core_count;
- g_ip->cache_level = cache_level;
-
- g_ip->temp = temp;
-
- g_ip->F_sz_nm = tech_node;
- g_ip->F_sz_um = tech_node / 1000;
- g_ip->is_main_mem = (main_mem != 0) ? true : false;
- g_ip->is_cache = (cache != 0) ? true : false;
- g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-
- g_ip->num_rw_ports = rw_ports;
- g_ip->num_rd_ports = excl_read_ports;
- g_ip->num_wr_ports = excl_write_ports;
- g_ip->num_se_rd_ports = single_ended_read_ports;
- g_ip->print_detail = 1;
- g_ip->nuca = 0;
-
- g_ip->wt = Global_5;
- g_ip->force_cache_config = false;
- g_ip->force_wiretype = false;
- g_ip->print_input_args = p_input;
-
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- if (g_ip->error_checking() == false) exit(0);
- if (g_ip->print_input_args)
- g_ip->display_ip();
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
-
- if (g_ip->nuca == 1)
- {
- Nuca n(&g_tp.peri_global);
- n.sim_nuca();
- }
- solve(&fin_res);
-
- output_UCA(&fin_res);
+ cout << "Unknown wire type!\n";
+ exit(0);
+ }
- delete (g_ip);
- return fin_res;
+ g_ip->delay_wt_nuca = nuca_obj_func_delay;
+ g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
+ g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
+ g_ip->area_wt_nuca = nuca_obj_func_area;
+ g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
+ g_ip->delay_dev_nuca = dev_func_delay;
+ g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
+ g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
+ g_ip->area_dev_nuca = nuca_dev_func_area;
+ g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
+ g_ip->nuca = is_nuca;
+ g_ip->nuca_bank_count = nuca_bank_count;
+ if (nuca_bank_count > 0) {
+ g_ip->force_nuca_bank = 1;
+ }
+ g_ip->cores = core_count;
+ g_ip->cache_level = cache_level;
+
+ g_ip->temp = temp;
+
+ g_ip->F_sz_nm = tech_node;
+ g_ip->F_sz_um = tech_node / 1000;
+ g_ip->is_main_mem = (main_mem != 0) ? true : false;
+ g_ip->is_cache = (cache != 0) ? true : false;
+ g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+
+ g_ip->num_rw_ports = rw_ports;
+ g_ip->num_rd_ports = excl_read_ports;
+ g_ip->num_wr_ports = excl_write_ports;
+ g_ip->num_se_rd_ports = single_ended_read_ports;
+ g_ip->print_detail = 1;
+ g_ip->nuca = 0;
+
+ g_ip->wt = Global_5;
+ g_ip->force_cache_config = false;
+ g_ip->force_wiretype = false;
+ g_ip->print_input_args = p_input;
+
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ if (g_ip->error_checking() == false) exit(0);
+ if (g_ip->print_input_args)
+ g_ip->display_ip();
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+
+ if (g_ip->nuca == 1) {
+ Nuca n(&g_tp.peri_global);
+ n.sim_nuca();
+ }
+ solve(&fin_res);
+
+ output_UCA(&fin_res);
+
+ delete (g_ip);
+ return fin_res;
}
//McPAT's plain interface, please keep !!!
@@ -964,200 +910,187 @@ uca_org_t cacti_interface(
int ndcm,
int ndsam1,//para50
int ndsam2,
- int ecc)
-{
- g_ip = new InputParameter();
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
- g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
- g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
- g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-
- g_ip->ic_proj_type = interconnect_projection_type_in;
- g_ip->wire_is_mat_type = wire_inside_mat_type_in;
- g_ip->wire_os_mat_type = wire_outside_mat_type_in;
- g_ip->burst_len = BURST_LENGTH_in;
- g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
- g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-
- g_ip->cache_sz = cache_size;
- g_ip->line_sz = line_size;
- g_ip->assoc = associativity;
- g_ip->nbanks = banks;
- g_ip->out_w = output_width;
- g_ip->specific_tag = specific_tag;
- if (specific_tag == 0) {
- g_ip->tag_w = 42;
- }
- else {
- g_ip->tag_w = tag_width;
- }
-
- g_ip->access_mode = access_mode;
- g_ip->delay_wt = obj_func_delay;
- g_ip->dynamic_power_wt = obj_func_dynamic_power;
- g_ip->leakage_power_wt = obj_func_leakage_power;
- g_ip->area_wt = obj_func_area;
- g_ip->cycle_time_wt = obj_func_cycle_time;
- g_ip->delay_dev = dev_func_delay;
- g_ip->dynamic_power_dev = dev_func_dynamic_power;
- g_ip->leakage_power_dev = dev_func_leakage_power;
- g_ip->area_dev = dev_func_area;
- g_ip->cycle_time_dev = dev_func_cycle_time;
- g_ip->temp = temp;
- g_ip->ed = ed_ed2_none;
-
- g_ip->F_sz_nm = tech_node;
- g_ip->F_sz_um = tech_node / 1000;
- g_ip->is_main_mem = (main_mem != 0) ? true : false;
- g_ip->is_cache = (cache ==1) ? true : false;
- g_ip->pure_ram = (cache ==0) ? true : false;
- g_ip->pure_cam = (cache ==2) ? true : false;
- g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
- g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
- g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-
- g_ip->num_rw_ports = rw_ports;
- g_ip->num_rd_ports = excl_read_ports;
- g_ip->num_wr_ports = excl_write_ports;
- g_ip->num_se_rd_ports = single_ended_read_ports;
- g_ip->num_search_ports = search_ports;
-
- g_ip->print_detail = 1;
- g_ip->nuca = 0;
-
- if (force_wiretype == 0)
- {
- g_ip->wt = Global;
- g_ip->force_wiretype = false;
- }
- else
- { g_ip->force_wiretype = true;
- if (wiretype==10) {
- g_ip->wt = Global_10;
- }
- if (wiretype==20) {
- g_ip->wt = Global_20;
- }
- if (wiretype==30) {
- g_ip->wt = Global_30;
- }
- if (wiretype==5) {
- g_ip->wt = Global_5;
- }
- if (wiretype==0) {
- g_ip->wt = Low_swing;
- }
- }
- //g_ip->wt = Global_5;
- if (force_config == 0)
- {
- g_ip->force_cache_config = false;
+ int ecc) {
+ g_ip = new InputParameter();
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+ g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+ g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+ g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+
+ g_ip->ic_proj_type = interconnect_projection_type_in;
+ g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+ g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+ g_ip->burst_len = BURST_LENGTH_in;
+ g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
+ g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
+
+ g_ip->cache_sz = cache_size;
+ g_ip->line_sz = line_size;
+ g_ip->assoc = associativity;
+ g_ip->nbanks = banks;
+ g_ip->out_w = output_width;
+ g_ip->specific_tag = specific_tag;
+ if (specific_tag == 0) {
+ g_ip->tag_w = 42;
+ } else {
+ g_ip->tag_w = tag_width;
+ }
+
+ g_ip->access_mode = access_mode;
+ g_ip->delay_wt = obj_func_delay;
+ g_ip->dynamic_power_wt = obj_func_dynamic_power;
+ g_ip->leakage_power_wt = obj_func_leakage_power;
+ g_ip->area_wt = obj_func_area;
+ g_ip->cycle_time_wt = obj_func_cycle_time;
+ g_ip->delay_dev = dev_func_delay;
+ g_ip->dynamic_power_dev = dev_func_dynamic_power;
+ g_ip->leakage_power_dev = dev_func_leakage_power;
+ g_ip->area_dev = dev_func_area;
+ g_ip->cycle_time_dev = dev_func_cycle_time;
+ g_ip->temp = temp;
+ g_ip->ed = ed_ed2_none;
+
+ g_ip->F_sz_nm = tech_node;
+ g_ip->F_sz_um = tech_node / 1000;
+ g_ip->is_main_mem = (main_mem != 0) ? true : false;
+ g_ip->is_cache = (cache == 1) ? true : false;
+ g_ip->pure_ram = (cache == 0) ? true : false;
+ g_ip->pure_cam = (cache == 2) ? true : false;
+ g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+ g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
+ g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
+
+ g_ip->num_rw_ports = rw_ports;
+ g_ip->num_rd_ports = excl_read_ports;
+ g_ip->num_wr_ports = excl_write_ports;
+ g_ip->num_se_rd_ports = single_ended_read_ports;
+ g_ip->num_search_ports = search_ports;
+
+ g_ip->print_detail = 1;
+ g_ip->nuca = 0;
+
+ if (force_wiretype == 0) {
+ g_ip->wt = Global;
+ g_ip->force_wiretype = false;
+ } else {
+ g_ip->force_wiretype = true;
+ if (wiretype == 10) {
+ g_ip->wt = Global_10;
+ }
+ if (wiretype == 20) {
+ g_ip->wt = Global_20;
+ }
+ if (wiretype == 30) {
+ g_ip->wt = Global_30;
+ }
+ if (wiretype == 5) {
+ g_ip->wt = Global_5;
+ }
+ if (wiretype == 0) {
+ g_ip->wt = Low_swing;
+ }
}
- else
- {
+ //g_ip->wt = Global_5;
+ if (force_config == 0) {
+ g_ip->force_cache_config = false;
+ } else {
g_ip->force_cache_config = true;
- g_ip->ndbl=ndbl;
- g_ip->ndwl=ndwl;
- g_ip->nspd=nspd;
- g_ip->ndcm=ndcm;
- g_ip->ndsam1=ndsam1;
- g_ip->ndsam2=ndsam2;
+ g_ip->ndbl = ndbl;
+ g_ip->ndwl = ndwl;
+ g_ip->nspd = nspd;
+ g_ip->ndcm = ndcm;
+ g_ip->ndsam1 = ndsam1;
+ g_ip->ndsam2 = ndsam2;
}
- if (ecc==0){
- g_ip->add_ecc_b_=false;
- }
- else
- {
- g_ip->add_ecc_b_=true;
- }
+ if (ecc == 0) {
+ g_ip->add_ecc_b_ = false;
+ } else {
+ g_ip->add_ecc_b_ = true;
+ }
- if(!g_ip->error_checking())
- exit(0);
+ if (!g_ip->error_checking())
+ exit(0);
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
- g_ip->display_ip();
- solve(&fin_res);
- output_UCA(&fin_res);
- output_data_csv(fin_res);
- delete (g_ip);
+ g_ip->display_ip();
+ solve(&fin_res);
+ output_UCA(&fin_res);
+ output_data_csv(fin_res);
+ delete (g_ip);
- return fin_res;
+ return fin_res;
}
-bool InputParameter::error_checking()
-{
- int A;
- bool seq_access = false;
- fast_access = true;
+bool InputParameter::error_checking(string name) {
+ int A;
+ bool seq_access = false;
+ fast_access = true;
- switch (access_mode)
- {
+ switch (access_mode) {
case 0:
- seq_access = false;
- fast_access = false;
- break;
+ seq_access = false;
+ fast_access = false;
+ break;
case 1:
- seq_access = true;
- fast_access = false;
- break;
+ seq_access = true;
+ fast_access = false;
+ break;
case 2:
- seq_access = false;
- fast_access = true;
- break;
- }
-
- if(is_main_mem)
- {
- if(ic_proj_type == 0)
- {
- cerr << "DRAM model supports only conservative interconnect projection!\n\n";
- return false;
+ seq_access = false;
+ fast_access = true;
+ break;
}
- }
-
-
- uint32_t B = line_sz;
-
- if (B < 1)
- {
- cerr << "Block size must >= 1" << endl;
- return false;
- }
- else if (B*8 < out_w)
- {
- cerr << "Block size must be at least " << out_w/8 << endl;
- return false;
- }
-
- if (F_sz_um <= 0)
- {
- cerr << "Feature size must be > 0" << endl;
- return false;
- }
- else if (F_sz_um > 0.091)
- {
- cerr << "Feature size must be <= 90 nm" << endl;
- return false;
- }
-
-
- uint32_t RWP = num_rw_ports;
- uint32_t ERP = num_rd_ports;
- uint32_t EWP = num_wr_ports;
- uint32_t NSER = num_se_rd_ports;
- uint32_t SCHP = num_search_ports;
+
+ if (is_main_mem) {
+ if (ic_proj_type == 0) {
+ cerr << name
+ << ": DRAM model supports only conservative interconnect "
+ << "projection but is set to aggressive!\n\n";
+ return false;
+ }
+ }
+
+
+ uint32_t B = line_sz;
+
+ if (B < 1) {
+ cerr << name << ": Block size must be >= 1, but is set to " << B
+ << endl;
+ return false;
+ } else if (B*8 < out_w) {
+ cerr << name << ": Block size must be at least " << out_w / 8
+ << ", but is set to " << B << endl;
+ return false;
+ }
+
+ if (F_sz_um <= 0) {
+ cerr << name << ": Feature size must be > 0, but is set to "
+ << F_sz_um << endl;
+ return false;
+ } else if (F_sz_um > 0.091) {
+ cerr << name << ": Feature size must be <= 90 nm, but is set to "
+ << F_sz_um << endl;
+ return false;
+ }
+
+
+ uint32_t RWP = num_rw_ports;
+ uint32_t ERP = num_rd_ports;
+ uint32_t EWP = num_wr_ports;
+ uint32_t NSER = num_se_rd_ports;
+ uint32_t SCHP = num_search_ports;
//TODO: revisit this. This is an important feature. Sheng thought this should be used
// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to
@@ -1181,26 +1114,26 @@ bool InputParameter::error_checking()
// return false;
// }
// else if ((RWP+ERP+EWP) < 1)
- // Changed to new implementation:
- // The number of ports specified at input is per bank
- if ((RWP+ERP+EWP) < 1)
- {
- cerr << "Must have at least one port" << endl;
- return false;
- }
-
- if (is_pow2(nbanks) == false)
- {
- cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl;
- return false;
- }
-
- int C = cache_sz/nbanks;
- if (C < 64)
- {
- cerr << "Cache size must >=64" << endl;
- return false;
- }
+ // Changed to new implementation:
+ // The number of ports specified at input is per bank
+ if ((RWP + ERP + EWP) < 1) {
+ cerr << name << ": Must have at least one port" << endl;
+ return false;
+ }
+
+ if (is_pow2(nbanks) == false) {
+ cerr << name << ": Number of subbanks should be greater than or "
+ << "equal to 1 and should be a power of 2, but is set to "
+ << nbanks << endl;
+ return false;
+ }
+
+ int C = cache_sz / nbanks;
+ if (C < 64) {
+ cerr << name << ": Cache size must be >=64, but is set to " << C
+ << endl;
+ return false;
+ }
//TODO: revisit this
// if (pure_ram==true && assoc!=1)
@@ -1210,54 +1143,64 @@ bool InputParameter::error_checking()
// }
//fully assoc and cam check
- if (is_cache && assoc==0)
- fully_assoc =true;
+ if (is_cache && assoc == 0)
+ fully_assoc = true;
else
fully_assoc = false;
- if (pure_cam==true && assoc!=0)
- {
- cerr << "Pure CAM must have associativity as 0" << endl;
- return false;
+ if (pure_cam == true && assoc != 0) {
+ cerr << name
+ << ": Pure CAM must have associativity as 0, but is set to"
+ << assoc << endl;
+ return false;
}
- if (assoc==0 && (pure_cam==false && is_cache ==false))
- {
- cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl;
- return false;
+ if (assoc == 0 && (pure_cam == false && is_cache == false)) {
+ cerr << name
+ << ": Only CAM or Fully associative cache can have associativity "
+ << "as 0" << endl;
+ return false;
}
- if ((fully_assoc==true || pure_cam==true)
- && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type
- || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type ))
- {
- cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl;
- return false;
+ if ((fully_assoc == true || pure_cam == true)
+ && (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type
+ || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) {
+ cerr << name
+ << ": CAM and fully associative cache must have same device type "
+ << "for both data and tag array" << endl;
+ cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type
+ << ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl
+ << "\tData array peripheral = " << data_arr_peri_global_tech_type
+ << ", Tag array peripheral = " << tag_arr_peri_global_tech_type
+ << endl;
+ return false;
}
- if ((fully_assoc==true || pure_cam==true)
- && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram))
- {
- cerr << "DRAM based CAM and fully associative cache are not supported" << endl;
- return false;
+ if ((fully_assoc == true || pure_cam == true)
+ && (data_arr_ram_cell_tech_type == lp_dram ||
+ data_arr_ram_cell_tech_type == comm_dram)) {
+ cerr << name << ": DRAM based CAM and fully associative cache are not "
+ << "supported" << endl;
+ return false;
}
- if ((fully_assoc==true || pure_cam==true)
- && (is_main_mem==true))
- {
- cerr << "CAM and fully associative cache cannot be as main memory" << endl;
- return false;
+ if ((fully_assoc == true || pure_cam == true)
+ && (is_main_mem == true)) {
+ cerr << name
+ << ": CAM and fully associative cache cannot be as main memory"
+ << endl;
+ return false;
}
- if ((fully_assoc || pure_cam) && SCHP<1)
- {
- cerr << "CAM and fully associative must have at least 1 search port" << endl;
- return false;
+ if ((fully_assoc || pure_cam) && SCHP < 1) {
+ cerr << name
+ << ": CAM and fully associative must have at least 1 search port,"
+ << " but are set to " << SCHP << endl;
+ return false;
}
- if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam)))
- {
- ERP=SCHP;
+ if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) {
+ ERP = SCHP;
}
// if ((!(fully_assoc || pure_cam)) && SCHP>=1)
@@ -1266,140 +1209,112 @@ bool InputParameter::error_checking()
// return false;
// }
- if (assoc == 0)
- {
- A = C/B;
- //fully_assoc = true;
- }
- else
- {
- if (assoc == 1)
- {
- A = 1;
- //fully_assoc = false;
+ if (assoc == 0) {
+ A = C / B;
+ //fully_assoc = true;
+ } else {
+ if (assoc == 1) {
+ A = 1;
+ //fully_assoc = false;
+ } else {
+ //fully_assoc = false;
+ A = assoc;
+ if (is_pow2(A) == false) {
+ cerr << name
+ << ": Associativity must be a power of 2, but is set to "
+ << A << endl;
+ return false;
+ }
+ }
}
- else
- {
- //fully_assoc = false;
- A = assoc;
- if (is_pow2(A) == false)
- {
- cerr << "Associativity must be a power of 2" << endl;
+
+ if (C / (B*A) <= 1 && assoc != 0) {
+ cerr << name << ": Number of sets (" << (C / (B * A))
+ << ") is too small: " << endl;
+ cerr << " Need to either increase cache size, or decrease "
+ << "associativity or block size" << endl;
+ cerr << " (or use fully associative cache)" << endl;
return false;
- }
}
- }
-
- if (C/(B*A) <= 1 && assoc!=0)
- {
- cerr << "Number of sets is too small: " << endl;
- cerr << " Need to either increase cache size, or decrease associativity or block size" << endl;
- cerr << " (or use fully associative cache)" << endl;
- return false;
- }
-
- block_sz = B;
-
- /*dt: testing sequential access mode*/
- if(seq_access)
- {
- tag_assoc = A;
- data_assoc = 1;
- is_seq_acc = true;
- }
- else
- {
- tag_assoc = A;
- data_assoc = A;
- is_seq_acc = false;
- }
-
- if (assoc==0)
- {
- data_assoc = 1;
- }
- num_rw_ports = RWP;
- num_rd_ports = ERP;
- num_wr_ports = EWP;
- num_se_rd_ports = NSER;
- if (!(fully_assoc || pure_cam))
- num_search_ports = 0;
- nsets = C/(B*A);
-
- if (temp < 300 || temp > 400 || temp%10 != 0)
- {
- cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl;
- return false;
- }
-
- if (nsets < 1)
- {
- cerr << "Less than one set..." << endl;
- return false;
- }
-
- return true;
+
+ block_sz = B;
+
+ /*dt: testing sequential access mode*/
+ if (seq_access) {
+ tag_assoc = A;
+ data_assoc = 1;
+ is_seq_acc = true;
+ } else {
+ tag_assoc = A;
+ data_assoc = A;
+ is_seq_acc = false;
+ }
+
+ if (assoc == 0) {
+ data_assoc = 1;
+ }
+ num_rw_ports = RWP;
+ num_rd_ports = ERP;
+ num_wr_ports = EWP;
+ num_se_rd_ports = NSER;
+ if (!(fully_assoc || pure_cam))
+ num_search_ports = 0;
+ nsets = C / (B * A);
+
+ if (temp < 300 || temp > 400 || temp % 10 != 0) {
+ cerr << name << ": " << temp
+ << " Temperature must be between 300 and 400 Kelvin and multiple "
+ << "of 10." << endl;
+ return false;
+ }
+
+ if (nsets < 1) {
+ cerr << name << ": Less than one set..." << endl;
+ return false;
+ }
+
+ return true;
}
-void output_data_csv(const uca_org_t & fin_res)
-{
- //TODO: the csv output should remain
- fstream file("out.csv", ios::in);
- bool print_index = file.fail();
- file.close();
-
- file.open("out.csv", ios::out|ios::app);
- if (file.fail() == true)
- {
- cerr << "File out.csv could not be opened successfully" << endl;
- }
- else
- {
- if (print_index == true)
- {
- file << "Tech node (nm), ";
- file << "Capacity (bytes), ";
- file << "Number of banks, ";
- file << "Associativity, ";
- file << "Output width (bits), ";
- file << "Access time (ns), ";
- file << "Random cycle time (ns), ";
-// file << "Multisubbank interleave cycle time (ns), ";
-
-// file << "Delay request network (ns), ";
-// file << "Delay inside mat (ns), ";
-// file << "Delay reply network (ns), ";
-// file << "Tag array access time (ns), ";
-// file << "Data array access time (ns), ";
-// file << "Refresh period (microsec), ";
-// file << "DRAM array availability (%), ";
- file << "Dynamic search energy (nJ), ";
- file << "Dynamic read energy (nJ), ";
- file << "Dynamic write energy (nJ), ";
-// file << "Tag Dynamic read energy (nJ), ";
-// file << "Data Dynamic read energy (nJ), ";
-// file << "Dynamic read power (mW), ";
- file << "Standby leakage per bank(mW), ";
-// file << "Leakage per bank with leak power management (mW), ";
-// file << "Leakage per bank with leak power management (mW), ";
-// file << "Refresh power as percentage of standby leakage, ";
- file << "Area (mm2), ";
- file << "Ndwl, ";
- file << "Ndbl, ";
- file << "Nspd, ";
- file << "Ndcm, ";
- file << "Ndsam_level_1, ";
- file << "Ndsam_level_2, ";
- file << "Data arrary area efficiency %, ";
- file << "Ntwl, ";
- file << "Ntbl, ";
- file << "Ntspd, ";
- file << "Ntcm, ";
- file << "Ntsam_level_1, ";
- file << "Ntsam_level_2, ";
- file << "Tag arrary area efficiency %, ";
+void output_data_csv(const uca_org_t & fin_res) {
+ //TODO: the csv output should remain
+ fstream file("out.csv", ios::in);
+ bool print_index = file.fail();
+ file.close();
+
+ file.open("out.csv", ios::out | ios::app);
+ if (file.fail() == true) {
+ cerr << "File out.csv could not be opened successfully" << endl;
+ } else {
+ if (print_index == true) {
+ file << "Tech node (nm), ";
+ file << "Capacity (bytes), ";
+ file << "Number of banks, ";
+ file << "Associativity, ";
+ file << "Output width (bits), ";
+ file << "Access time (ns), ";
+ file << "Random cycle time (ns), ";
+ file << "Dynamic search energy (nJ), ";
+ file << "Dynamic read energy (nJ), ";
+ file << "Dynamic write energy (nJ), ";
+ file << "Standby leakage per bank(mW), ";
+ file << "Area (mm2), ";
+ file << "Ndwl, ";
+ file << "Ndbl, ";
+ file << "Nspd, ";
+ file << "Ndcm, ";
+ file << "Ndsam_level_1, ";
+ file << "Ndsam_level_2, ";
+ file << "Data arrary area efficiency %, ";
+ file << "Ntwl, ";
+ file << "Ntbl, ";
+ file << "Ntspd, ";
+ file << "Ntcm, ";
+ file << "Ntsam_level_1, ";
+ file << "Ntsam_level_2, ";
+ file << "Tag arrary area efficiency %, ";
// file << "Resistance per unit micron (ohm-micron), ";
// file << "Capacitance per unit micron (fF per micron), ";
@@ -1428,15 +1343,15 @@ void output_data_csv(const uca_org_t & fin_res)
// file << "Delay opt (perc), ";
// file << "Repeater opt (perc), ";
// file << "Aspect ratio";
- file << endl;
- }
- file << g_ip->F_sz_nm << ", ";
- file << g_ip->cache_sz << ", ";
- file << g_ip->nbanks << ", ";
- file << g_ip->tag_assoc << ", ";
- file << g_ip->out_w << ", ";
- file << fin_res.access_time*1e+9 << ", ";
- file << fin_res.cycle_time*1e+9 << ", ";
+ file << endl;
+ }
+ file << g_ip->F_sz_nm << ", ";
+ file << g_ip->cache_sz << ", ";
+ file << g_ip->nbanks << ", ";
+ file << g_ip->tag_assoc << ", ";
+ file << g_ip->out_w << ", ";
+ file << fin_res.access_time*1e+9 << ", ";
+ file << fin_res.cycle_time*1e+9 << ", ";
// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", ";
// file << fin_res.data_array2->delay_request_network*1e+9 << ", ";
// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", ";
@@ -1453,16 +1368,13 @@ void output_data_csv(const uca_org_t & fin_res)
// file << fin_res.data_array2->access_time*1e+9 << ", ";
// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", ";
// file << fin_res.data_array2->dram_array_availability << ", ";
- if (g_ip->fully_assoc || g_ip->pure_cam)
- {
- file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
- }
- else
- {
- file << "N/A" << ", ";
- }
- file << fin_res.power.readOp.dynamic*1e+9 << ", ";
- file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
+ if (g_ip->fully_assoc || g_ip->pure_cam) {
+ file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
+ } else {
+ file << "N/A" << ", ";
+ }
+ file << fin_res.power.readOp.dynamic*1e+9 << ", ";
+ file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
// {
// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", ";
@@ -1484,27 +1396,24 @@ void output_data_csv(const uca_org_t & fin_res)
file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", ";
// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", ";
// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", ";
- file << fin_res.area*1e-6 << ", ";
-
- file << fin_res.data_array2->Ndwl << ", ";
- file << fin_res.data_array2->Ndbl << ", ";
- file << fin_res.data_array2->Nspd << ", ";
- file << fin_res.data_array2->deg_bl_muxing << ", ";
- file << fin_res.data_array2->Ndsam_lev_1 << ", ";
- file << fin_res.data_array2->Ndsam_lev_2 << ", ";
- file << fin_res.data_array2->area_efficiency << ", ";
- if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
- {
- file << fin_res.tag_array2->Ndwl << ", ";
- file << fin_res.tag_array2->Ndbl << ", ";
- file << fin_res.tag_array2->Nspd << ", ";
- file << fin_res.tag_array2->deg_bl_muxing << ", ";
- file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
- file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
- file << fin_res.tag_array2->area_efficiency << ", ";
- }
- else
- {
+ file << fin_res.area*1e-6 << ", ";
+
+ file << fin_res.data_array2->Ndwl << ", ";
+ file << fin_res.data_array2->Ndbl << ", ";
+ file << fin_res.data_array2->Nspd << ", ";
+ file << fin_res.data_array2->deg_bl_muxing << ", ";
+ file << fin_res.data_array2->Ndsam_lev_1 << ", ";
+ file << fin_res.data_array2->Ndsam_lev_2 << ", ";
+ file << fin_res.data_array2->area_efficiency << ", ";
+ if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) {
+ file << fin_res.tag_array2->Ndwl << ", ";
+ file << fin_res.tag_array2->Ndbl << ", ";
+ file << fin_res.tag_array2->Nspd << ", ";
+ file << fin_res.tag_array2->deg_bl_muxing << ", ";
+ file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
+ file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
+ file << fin_res.tag_array2->area_efficiency << ", ";
+ } else {
file << "N/A" << ", ";
file << "N/A"<< ", ";
file << "N/A" << ", ";
@@ -1535,803 +1444,552 @@ void output_data_csv(const uca_org_t & fin_res)
// file << fin_res.data_array.cas_latency * 1e9 << ", " ;
// file << fin_res.data_array.precharge_delay * 1e9 << ", " ;
// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width;
- file<<endl;
- }
- file.close();
+ file<<endl;
+ }
+ file.close();
}
-void output_UCA(uca_org_t *fr)
-{
- // if (NUCA)
- if (0) {
- cout << "\n\n Detailed Bank Stats:\n";
- cout << " Bank Size (bytes): %d\n" <<
- (int) (g_ip->cache_sz);
- }
- else {
- if (g_ip->data_arr_ram_cell_tech_type == 3) {
- cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
- "Logic Process Based DRAM Model ----------\n";
- }
- else if (g_ip->data_arr_ram_cell_tech_type == 4) {
- cout << "\n---------- CACTI version 6.5, Uniform" <<
- "Cache Access Commodity DRAM Model ----------\n";
+void output_UCA(uca_org_t *fr) {
+ // if (NUCA)
+ if (0) {
+ cout << "\n\n Detailed Bank Stats:\n";
+ cout << " Bank Size (bytes): %d\n" <<
+ (int) (g_ip->cache_sz);
+ } else {
+ if (g_ip->data_arr_ram_cell_tech_type == 3) {
+ cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
+ "Logic Process Based DRAM Model ----------\n";
+ } else if (g_ip->data_arr_ram_cell_tech_type == 4) {
+ cout << "\n---------- CACTI version 6.5, Uniform" <<
+ "Cache Access Commodity DRAM Model ----------\n";
+ } else {
+ cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
+ "SRAM Model ----------\n";
+ }
+ cout << "\nCache Parameters:\n";
+ cout << " Total cache size (bytes): " <<
+ (int) (g_ip->cache_sz) << endl;
}
+
+ cout << " Number of banks: " << (int) g_ip->nbanks << endl;
+ if (g_ip->fully_assoc || g_ip->pure_cam)
+ cout << " Associativity: fully associative\n";
else {
- cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
- "SRAM Model ----------\n";
+ if (g_ip->tag_assoc == 1)
+ cout << " Associativity: direct mapped\n";
+ else
+ cout << " Associativity: " <<
+ g_ip->tag_assoc << endl;
}
- cout << "\nCache Parameters:\n";
- cout << " Total cache size (bytes): " <<
- (int) (g_ip->cache_sz) << endl;
- }
-
- cout << " Number of banks: " << (int) g_ip->nbanks << endl;
- if (g_ip->fully_assoc|| g_ip->pure_cam)
- cout << " Associativity: fully associative\n";
- else {
- if (g_ip->tag_assoc == 1)
- cout << " Associativity: direct mapped\n";
- else
- cout << " Associativity: " <<
- g_ip->tag_assoc << endl;
- }
-
-
- cout << " Block size (bytes): " << g_ip->line_sz << endl;
- cout << " Read/write Ports: " <<
- g_ip->num_rw_ports << endl;
- cout << " Read ports: " <<
- g_ip->num_rd_ports << endl;
- cout << " Write ports: " <<
- g_ip->num_wr_ports << endl;
- if (g_ip->fully_assoc|| g_ip->pure_cam)
- cout << " search ports: " <<
- g_ip->num_search_ports << endl;
- cout << " Technology size (nm): " <<
- g_ip->F_sz_nm << endl << endl;
-
- cout << " Access time (ns): " << fr->access_time*1e9 << endl;
- cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl;
- if (g_ip->data_arr_ram_cell_tech_type >= 4) {
- cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
- cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
- cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
- cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
- cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
- cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
- cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
- cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
- cout << " Refresh power (mW): " <<
- fr->data_array2->refresh_power*1e3 << endl;
- }
- else {
- if ((g_ip->fully_assoc|| g_ip->pure_cam))
- {
- cout << " Total dynamic associative search energy per access (nJ): " <<
- fr->power.searchOp.dynamic*1e9 << endl;
+
+
+ cout << " Block size (bytes): " << g_ip->line_sz << endl;
+ cout << " Read/write Ports: " <<
+ g_ip->num_rw_ports << endl;
+ cout << " Read ports: " <<
+ g_ip->num_rd_ports << endl;
+ cout << " Write ports: " <<
+ g_ip->num_wr_ports << endl;
+ if (g_ip->fully_assoc || g_ip->pure_cam)
+ cout << " search ports: " <<
+ g_ip->num_search_ports << endl;
+ cout << " Technology size (nm): " <<
+ g_ip->F_sz_nm << endl << endl;
+
+ cout << " Access time (ns): " << fr->access_time*1e9 << endl;
+ cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl;
+ if (g_ip->data_arr_ram_cell_tech_type >= 4) {
+ cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
+ cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
+ cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
+ cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
+ cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
+ cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
+ cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
+ cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
+ cout << " Refresh power (mW): " <<
+ fr->data_array2->refresh_power*1e3 << endl;
+ } else {
+ if ((g_ip->fully_assoc || g_ip->pure_cam)) {
+ cout << " Total dynamic associative search energy per access (nJ): " <<
+ fr->power.searchOp.dynamic*1e9 << endl;
// cout << " Total dynamic read energy per access (nJ): " <<
// fr->power.readOp.dynamic*1e9 << endl;
// cout << " Total dynamic write energy per access (nJ): " <<
// fr->power.writeOp.dynamic*1e9 << endl;
- }
+ }
// else
// {
- cout << " Total dynamic read energy per access (nJ): " <<
- fr->power.readOp.dynamic*1e9 << endl;
- cout << " Total dynamic write energy per access (nJ): " <<
- fr->power.writeOp.dynamic*1e9 << endl;
+ cout << " Total dynamic read energy per access (nJ): " <<
+ fr->power.readOp.dynamic*1e9 << endl;
+ cout << " Total dynamic write energy per access (nJ): " <<
+ fr->power.writeOp.dynamic*1e9 << endl;
// }
- cout << " Total leakage power of a bank"
- " (mW): " << fr->power.readOp.leakage*1e3 << endl;
- cout << " Total gate leakage power of a bank"
- " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
- }
-
- if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4)
- {
- }
- cout << " Cache height x width (mm): " <<
- fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
-
-
- cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl;
- cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
- cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
- cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
- cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
- cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
- cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
- cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
- cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
- cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
- cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
- }
-
- switch (fr->data_array2->wt) {
+ cout << " Total leakage power of a bank"
+ " (mW): " << fr->power.readOp.leakage*1e3 << endl;
+ cout << " Total gate leakage power of a bank"
+ " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
+ }
+
+ if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) {
+ }
+ cout << " Cache height x width (mm): " <<
+ fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
+
+
+ cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl;
+ cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
+ cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
+ cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
+ cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
+ cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
+
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
+ cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
+ cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
+ cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
+ cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
+ cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
+ }
+
+ switch (fr->data_array2->wt) {
case (0):
- cout << " Data array, H-tree wire type: Delay optimized global wires\n";
- break;
- case (1):
- cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
- break;
- case (2):
- cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
- break;
- case (3):
- cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
- break;
- case (4):
- cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
- break;
- case (5):
- cout << " Data array, wire type: Low swing wires\n";
- break;
- default:
- cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <<endl;
- exit(0);
- }
-
- if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) {
- switch (fr->tag_array2->wt) {
- case (0):
- cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
+ cout << " Data array, H-tree wire type: Delay optimized global wires\n";
break;
- case (1):
- cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
+ case (1):
+ cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
break;
- case (2):
- cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
+ case (2):
+ cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
break;
- case (3):
- cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
+ case (3):
+ cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
break;
- case (4):
- cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
+ case (4):
+ cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
break;
- case (5):
- cout << " Tag array, wire type: Low swing wires\n";
+ case (5):
+ cout << " Data array, wire type: Low swing wires\n";
break;
- default:
- cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <<endl;
- exit(-1);
+ default:
+ cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt << endl;
+ exit(0);
}
- }
-
- if (g_ip->print_detail)
- {
- //if(g_ip->fully_assoc) return;
-
- /* Delay stats */
- /* data array stats */
- cout << endl << "Time Components:" << endl << endl;
-
- cout << " Data side (with Output driver) (ns): " <<
- fr->data_array2->access_time/1e-9 << endl;
- cout << "\tH-tree input delay (ns): " <<
- fr->data_array2->delay_route_to_bank * 1e9 +
- fr->data_array2->delay_input_htree * 1e9 << endl;
-
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- {
- cout << "\tDecoder + wordline delay (ns): " <<
- fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->data_array2->delay_row_decoder * 1e9 << endl;
- }
- else
- {
- cout << "\tCAM search delay (ns): " <<
- fr->data_array2->delay_matchlines * 1e9 << endl;
+ if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
+ switch (fr->tag_array2->wt) {
+ case (0):
+ cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
+ break;
+ case (1):
+ cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
+ break;
+ case (2):
+ cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
+ break;
+ case (3):
+ cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
+ break;
+ case (4):
+ cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
+ break;
+ case (5):
+ cout << " Tag array, wire type: Low swing wires\n";
+ break;
+ default:
+ cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl;
+ exit(-1);
+ }
}
- cout << "\tBitline delay (ns): " <<
- fr->data_array2->delay_bitlines/1e-9 << endl;
+ if (g_ip->print_detail) {
+ /* Delay stats */
+ /* data array stats */
+ cout << endl << "Time Components:" << endl << endl;
+
+ cout << " Data side (with Output driver) (ns): " <<
+ fr->data_array2->access_time / 1e-9 << endl;
+
+ cout << "\tH-tree input delay (ns): " <<
+ fr->data_array2->delay_route_to_bank * 1e9 +
+ fr->data_array2->delay_input_htree * 1e9 << endl;
+
+ if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
+ cout << "\tDecoder + wordline delay (ns): " <<
+ fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
+ fr->data_array2->delay_row_decoder * 1e9 << endl;
+ } else {
+ cout << "\tCAM search delay (ns): " <<
+ fr->data_array2->delay_matchlines * 1e9 << endl;
+ }
+
+ cout << "\tBitline delay (ns): " <<
+ fr->data_array2->delay_bitlines / 1e-9 << endl;
- cout << "\tSense Amplifier delay (ns): " <<
- fr->data_array2->delay_sense_amp * 1e9 << endl;
+ cout << "\tSense Amplifier delay (ns): " <<
+ fr->data_array2->delay_sense_amp * 1e9 << endl;
- cout << "\tH-tree output delay (ns): " <<
- fr->data_array2->delay_subarray_output_driver * 1e9 +
- fr->data_array2->delay_dout_htree * 1e9 << endl;
+ cout << "\tH-tree output delay (ns): " <<
+ fr->data_array2->delay_subarray_output_driver * 1e9 +
+ fr->data_array2->delay_dout_htree * 1e9 << endl;
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- /* tag array stats */
- cout << endl << " Tag side (with Output driver) (ns): " <<
- fr->tag_array2->access_time/1e-9 << endl;
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ /* tag array stats */
+ cout << endl << " Tag side (with Output driver) (ns): " <<
+ fr->tag_array2->access_time / 1e-9 << endl;
- cout << "\tH-tree input delay (ns): " <<
- fr->tag_array2->delay_route_to_bank * 1e9 +
- fr->tag_array2->delay_input_htree * 1e9 << endl;
+ cout << "\tH-tree input delay (ns): " <<
+ fr->tag_array2->delay_route_to_bank * 1e9 +
+ fr->tag_array2->delay_input_htree * 1e9 << endl;
- cout << "\tDecoder + wordline delay (ns): " <<
- fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->tag_array2->delay_row_decoder * 1e9 << endl;
+ cout << "\tDecoder + wordline delay (ns): " <<
+ fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
+ fr->tag_array2->delay_row_decoder * 1e9 << endl;
- cout << "\tBitline delay (ns): " <<
- fr->tag_array2->delay_bitlines/1e-9 << endl;
+ cout << "\tBitline delay (ns): " <<
+ fr->tag_array2->delay_bitlines / 1e-9 << endl;
- cout << "\tSense Amplifier delay (ns): " <<
- fr->tag_array2->delay_sense_amp * 1e9 << endl;
+ cout << "\tSense Amplifier delay (ns): " <<
+ fr->tag_array2->delay_sense_amp * 1e9 << endl;
- cout << "\tComparator delay (ns): " <<
- fr->tag_array2->delay_comparator * 1e9 << endl;
+ cout << "\tComparator delay (ns): " <<
+ fr->tag_array2->delay_comparator * 1e9 << endl;
- cout << "\tH-tree output delay (ns): " <<
- fr->tag_array2->delay_subarray_output_driver * 1e9 +
- fr->tag_array2->delay_dout_htree * 1e9 << endl;
- }
+ cout << "\tH-tree output delay (ns): " <<
+ fr->tag_array2->delay_subarray_output_driver * 1e9 +
+ fr->tag_array2->delay_dout_htree * 1e9 << endl;
+ }
- /* Energy/Power stats */
- cout << endl << endl << "Power Components:" << endl << endl;
+ /* Energy/Power stats */
+ cout << endl << endl << "Power Components:" << endl << endl;
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- {
- cout << " Data array: Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal leakage read/write power of a bank (mW): " <<
- fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
+ cout << " Data array: Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal leakage read/write power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- fr->data_array2->power_data_output_htree.readOp.dynamic +
- fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- cout << "\tTotal leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+ cout << "\tTotal leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->data_array2->power_addr_input_htree.readOp.leakage +
fr->data_array2->power_data_output_htree.readOp.leakage +
- fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
+ fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3
+ << endl;
- cout << "\tTotal gate leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+ cout << "\tTotal gate leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->data_array2->power_addr_input_htree.readOp.gate_leakage +
fr->data_array2->power_data_output_htree.readOp.gate_leakage +
- fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
-
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
-
- cout << "\tBitlines precharge and equalization circuit (nJ): " <<
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
- }
+ fr->data_array2->power_routing_to_bank.readOp.gate_leakage) *
+ 1e3 << endl;
+
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+
+ cout << "\tBitlines precharge and equalization circuit (nJ): " <<
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+ }
- else if (g_ip->pure_cam)
- {
-
- cout << " CAM array:"<<endl;
- cout << " Total dynamic associative search energy/access (nJ): " <<
- fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "match key and data transfer) (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic +
- fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
- cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
- cout << "\tSearchlines (nJ): " <<
- fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tMatchlines (nJ): " <<
- fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
-
-
- cout <<endl<< " Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- fr->data_array2->power_data_output_htree.readOp.dynamic +
- fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
-
- cout << endl <<" Total leakage power of a bank (mW): " <<
- fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ else if (g_ip->pure_cam) {
+
+ cout << " CAM array:" << endl;
+ cout << " Total dynamic associative search energy/access (nJ): " <<
+ fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "match key and data transfer) (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic +
+ fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tSearchlines (nJ): " <<
+ fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tMatchlines (nJ): " <<
+ fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
+
+
+ cout << endl << " Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+
+ cout << endl << " Total leakage power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ } else {
+ cout << " Fully associative array:" << endl;
+ cout << " Total dynamic associative search energy/access (nJ): " <<
+ fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "match key and data transfer) (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic +
+ fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tSearchlines (nJ): " <<
+ fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tMatchlines (nJ): " <<
+ fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tData portion wordline (nJ): " <<
+ fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
+ cout << "\tData Bitlines (nJ): " <<
+ fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
+
+
+ cout << endl << " Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+
+ cout << endl << " Total leakage power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
}
- else
- {
- cout << " Fully associative array:"<<endl;
- cout << " Total dynamic associative search energy/access (nJ): " <<
- fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "match key and data transfer) (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic +
- fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
- cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
- cout << "\tSearchlines (nJ): " <<
- fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tMatchlines (nJ): " <<
- fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tData portion wordline (nJ): " <<
- fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
- cout << "\tData Bitlines (nJ): " <<
- fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
-
-
- cout <<endl<< " Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- fr->data_array2->power_data_output_htree.readOp.dynamic +
- fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
-
- cout << endl <<" Total leakage power of a bank (mW): " <<
- fr->data_array2->power.readOp.leakage * 1e3 << endl;
- }
-
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
- fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal leakage read/write power of a bank (mW): " <<
- fr->tag_array2->power.readOp.leakage * 1e3 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->tag_array2->power_addr_input_htree.readOp.dynamic +
- fr->tag_array2->power_data_output_htree.readOp.dynamic +
- fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
-
- cout << "\tTotal leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+
+
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
+ fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal leakage read/write power of a bank (mW): " <<
+ fr->tag_array2->power.readOp.leakage * 1e3 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->tag_array2->power_addr_input_htree.readOp.dynamic +
+ fr->tag_array2->power_data_output_htree.readOp.dynamic +
+ fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+
+ cout << "\tTotal leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->tag_array2->power_addr_input_htree.readOp.leakage +
fr->tag_array2->power_data_output_htree.readOp.leakage +
- fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
+ fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3
+ << endl;
- cout << "\tTotal gate leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+ cout << "\tTotal gate leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->tag_array2->power_addr_input_htree.readOp.gate_leakage +
fr->tag_array2->power_data_output_htree.readOp.gate_leakage +
- fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
-
- cout << "\tOutput Htree inside a bank Energy (nJ): " <<
- fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines precharge and equalization circuit (nJ): " <<
- fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
- }
+ fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) *
+ 1e3 << endl;
+
+ cout << "\tOutput Htree inside a bank Energy (nJ): " <<
+ fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines precharge and equalization circuit (nJ): " <<
+ fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+ }
- cout << endl << endl << "Area Components:" << endl << endl;
- /* Data array area stats */
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- else if (g_ip->pure_cam)
- cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- else
- cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- cout << "\tHeight (mm): " <<
- fr->data_array2->all_banks_height*1e-3 << endl;
- cout << "\tWidth (mm): " <<
- fr->data_array2->all_banks_width*1e-3 << endl;
- if (g_ip->print_detail) {
- cout << "\tArea efficiency (Memory cell area/Total area) - " <<
- fr->data_array2->area_efficiency << " %" << endl;
- cout << "\t\tMAT Height (mm): " <<
- fr->data_array2->mat_height*1e-3 << endl;
- cout << "\t\tMAT Length (mm): " <<
- fr->data_array2->mat_length*1e-3 << endl;
- cout << "\t\tSubarray Height (mm): " <<
- fr->data_array2->subarray_height*1e-3 << endl;
- cout << "\t\tSubarray Length (mm): " <<
- fr->data_array2->subarray_length*1e-3 << endl;
- }
+ cout << endl << endl << "Area Components:" << endl << endl;
+ /* Data array area stats */
+ if (!(g_ip->pure_cam || g_ip->fully_assoc))
+ cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ else if (g_ip->pure_cam)
+ cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ else
+ cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ cout << "\tHeight (mm): " <<
+ fr->data_array2->all_banks_height*1e-3 << endl;
+ cout << "\tWidth (mm): " <<
+ fr->data_array2->all_banks_width*1e-3 << endl;
+ if (g_ip->print_detail) {
+ cout << "\tArea efficiency (Memory cell area/Total area) - " <<
+ fr->data_array2->area_efficiency << " %" << endl;
+ cout << "\t\tMAT Height (mm): " <<
+ fr->data_array2->mat_height*1e-3 << endl;
+ cout << "\t\tMAT Length (mm): " <<
+ fr->data_array2->mat_length*1e-3 << endl;
+ cout << "\t\tSubarray Height (mm): " <<
+ fr->data_array2->subarray_height*1e-3 << endl;
+ cout << "\t\tSubarray Length (mm): " <<
+ fr->data_array2->subarray_length*1e-3 << endl;
+ }
- /* Tag array area stats */
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
- cout << "\tHeight (mm): " <<
- fr->tag_array2->all_banks_height*1e-3 << endl;
- cout << "\tWidth (mm): " <<
- fr->tag_array2->all_banks_width*1e-3 << endl;
- if (g_ip->print_detail)
- {
- cout << "\tArea efficiency (Memory cell area/Total area) - " <<
- fr->tag_array2->area_efficiency << " %" << endl;
- cout << "\t\tMAT Height (mm): " <<
- fr->tag_array2->mat_height*1e-3 << endl;
- cout << "\t\tMAT Length (mm): " <<
- fr->tag_array2->mat_length*1e-3 << endl;
- cout << "\t\tSubarray Height (mm): " <<
- fr->tag_array2->subarray_height*1e-3 << endl;
- cout << "\t\tSubarray Length (mm): " <<
- fr->tag_array2->subarray_length*1e-3 << endl;
- }
+ /* Tag array area stats */
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
+ cout << "\tHeight (mm): " <<
+ fr->tag_array2->all_banks_height*1e-3 << endl;
+ cout << "\tWidth (mm): " <<
+ fr->tag_array2->all_banks_width*1e-3 << endl;
+ if (g_ip->print_detail) {
+ cout << "\tArea efficiency (Memory cell area/Total area) - " <<
+ fr->tag_array2->area_efficiency << " %" << endl;
+ cout << "\t\tMAT Height (mm): " <<
+ fr->tag_array2->mat_height*1e-3 << endl;
+ cout << "\t\tMAT Length (mm): " <<
+ fr->tag_array2->mat_length*1e-3 << endl;
+ cout << "\t\tSubarray Height (mm): " <<
+ fr->tag_array2->subarray_height*1e-3 << endl;
+ cout << "\t\tSubarray Length (mm): " <<
+ fr->tag_array2->subarray_length*1e-3 << endl;
+ }
+ }
+ Wire wpr;
+ wpr.print_wire();
}
- Wire wpr;
- wpr.print_wire();
-
- //cout << "FO4 = " << g_tp.FO4 << endl;
- }
}
//McPAT's plain interface, please keep !!!
-uca_org_t cacti_interface(InputParameter * const local_interface)
-{
-// g_ip = new InputParameter();
- //g_ip->add_ecc_b_ = true;
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip = local_interface;
-
-
-// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
-// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
-// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
-// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-//
-// g_ip->ic_proj_type = interconnect_projection_type_in;
-// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
-// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
-// g_ip->burst_len = BURST_LENGTH_in;
-// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
-// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-//
-// g_ip->cache_sz = cache_size;
-// g_ip->line_sz = line_size;
-// g_ip->assoc = associativity;
-// g_ip->nbanks = banks;
-// g_ip->out_w = output_width;
-// g_ip->specific_tag = specific_tag;
-// if (tag_width == 0) {
-// g_ip->tag_w = 42;
-// }
-// else {
-// g_ip->tag_w = tag_width;
-// }
-//
-// g_ip->access_mode = access_mode;
-// g_ip->delay_wt = obj_func_delay;
-// g_ip->dynamic_power_wt = obj_func_dynamic_power;
-// g_ip->leakage_power_wt = obj_func_leakage_power;
-// g_ip->area_wt = obj_func_area;
-// g_ip->cycle_time_wt = obj_func_cycle_time;
-// g_ip->delay_dev = dev_func_delay;
-// g_ip->dynamic_power_dev = dev_func_dynamic_power;
-// g_ip->leakage_power_dev = dev_func_leakage_power;
-// g_ip->area_dev = dev_func_area;
-// g_ip->cycle_time_dev = dev_func_cycle_time;
-// g_ip->temp = temp;
-//
-// g_ip->F_sz_nm = tech_node;
-// g_ip->F_sz_um = tech_node / 1000;
-// g_ip->is_main_mem = (main_mem != 0) ? true : false;
-// g_ip->is_cache = (cache ==1) ? true : false;
-// g_ip->pure_ram = (cache ==0) ? true : false;
-// g_ip->pure_cam = (cache ==2) ? true : false;
-// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
-// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-//
-// g_ip->num_rw_ports = rw_ports;
-// g_ip->num_rd_ports = excl_read_ports;
-// g_ip->num_wr_ports = excl_write_ports;
-// g_ip->num_se_rd_ports = single_ended_read_ports;
-// g_ip->num_search_ports = search_ports;
-//
-// g_ip->print_detail = 1;
-// g_ip->nuca = 0;
-// g_ip->is_cache=true;
-//
-// if (force_wiretype == 0)
-// {
-// g_ip->wt = Global;
-// g_ip->force_wiretype = false;
-// }
-// else
-// { g_ip->force_wiretype = true;
-// if (wiretype==10) {
-// g_ip->wt = Global_10;
-// }
-// if (wiretype==20) {
-// g_ip->wt = Global_20;
-// }
-// if (wiretype==30) {
-// g_ip->wt = Global_30;
-// }
-// if (wiretype==5) {
-// g_ip->wt = Global_5;
-// }
-// if (wiretype==0) {
-// g_ip->wt = Low_swing;
-// }
-// }
-// //g_ip->wt = Global_5;
-// if (force_config == 0)
-// {
-// g_ip->force_cache_config = false;
-// }
-// else
-// {
-// g_ip->force_cache_config = true;
-// g_ip->ndbl=ndbl;
-// g_ip->ndwl=ndwl;
-// g_ip->nspd=nspd;
-// g_ip->ndcm=ndcm;
-// g_ip->ndsam1=ndsam1;
-// g_ip->ndsam2=ndsam2;
-//
-//
-// }
-//
-// if (ecc==0){
-// g_ip->add_ecc_b_=false;
-// }
-// else
-// {
-// g_ip->add_ecc_b_=true;
-// }
-
+uca_org_t cacti_interface(InputParameter * const local_interface) {
+ uca_org_t fin_res;
+ fin_res.valid = false;
- g_ip->error_checking();
-
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
+ g_ip = local_interface;
- solve(&fin_res);
+ if (!g_ip->error_checking()) {
+ exit(0);
+ }
-// g_ip->display_ip();
-// output_UCA(&fin_res);
-// output_data_csv(fin_res);
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
- // delete (g_ip);
+ solve(&fin_res);
- return fin_res;
+ return fin_res;
}
//McPAT's plain interface, please keep !!!
-uca_org_t init_interface(InputParameter* const local_interface)
-{
- // g_ip = new InputParameter();
- //g_ip->add_ecc_b_ = true;
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip = local_interface;
+uca_org_t init_interface(InputParameter* const local_interface,
+ const string &name) {
+ uca_org_t fin_res;
+ fin_res.valid = false;
+ g_ip = local_interface;
-// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
-// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
-// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
-// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-//
-// g_ip->ic_proj_type = interconnect_projection_type_in;
-// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
-// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
-// g_ip->burst_len = BURST_LENGTH_in;
-// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
-// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-//
-// g_ip->cache_sz = cache_size;
-// g_ip->line_sz = line_size;
-// g_ip->assoc = associativity;
-// g_ip->nbanks = banks;
-// g_ip->out_w = output_width;
-// g_ip->specific_tag = specific_tag;
-// if (tag_width == 0) {
-// g_ip->tag_w = 42;
-// }
-// else {
-// g_ip->tag_w = tag_width;
-// }
-//
-// g_ip->access_mode = access_mode;
-// g_ip->delay_wt = obj_func_delay;
-// g_ip->dynamic_power_wt = obj_func_dynamic_power;
-// g_ip->leakage_power_wt = obj_func_leakage_power;
-// g_ip->area_wt = obj_func_area;
-// g_ip->cycle_time_wt = obj_func_cycle_time;
-// g_ip->delay_dev = dev_func_delay;
-// g_ip->dynamic_power_dev = dev_func_dynamic_power;
-// g_ip->leakage_power_dev = dev_func_leakage_power;
-// g_ip->area_dev = dev_func_area;
-// g_ip->cycle_time_dev = dev_func_cycle_time;
-// g_ip->temp = temp;
-//
-// g_ip->F_sz_nm = tech_node;
-// g_ip->F_sz_um = tech_node / 1000;
-// g_ip->is_main_mem = (main_mem != 0) ? true : false;
-// g_ip->is_cache = (cache ==1) ? true : false;
-// g_ip->pure_ram = (cache ==0) ? true : false;
-// g_ip->pure_cam = (cache ==2) ? true : false;
-// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
-// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-//
-// g_ip->num_rw_ports = rw_ports;
-// g_ip->num_rd_ports = excl_read_ports;
-// g_ip->num_wr_ports = excl_write_ports;
-// g_ip->num_se_rd_ports = single_ended_read_ports;
-// g_ip->num_search_ports = search_ports;
-//
-// g_ip->print_detail = 1;
-// g_ip->nuca = 0;
-//
-// if (force_wiretype == 0)
-// {
-// g_ip->wt = Global;
-// g_ip->force_wiretype = false;
-// }
-// else
-// { g_ip->force_wiretype = true;
-// if (wiretype==10) {
-// g_ip->wt = Global_10;
-// }
-// if (wiretype==20) {
-// g_ip->wt = Global_20;
-// }
-// if (wiretype==30) {
-// g_ip->wt = Global_30;
-// }
-// if (wiretype==5) {
-// g_ip->wt = Global_5;
-// }
-// if (wiretype==0) {
-// g_ip->wt = Low_swing;
-// }
-// }
-// //g_ip->wt = Global_5;
-// if (force_config == 0)
-// {
-// g_ip->force_cache_config = false;
-// }
-// else
-// {
-// g_ip->force_cache_config = true;
-// g_ip->ndbl=ndbl;
-// g_ip->ndwl=ndwl;
-// g_ip->nspd=nspd;
-// g_ip->ndcm=ndcm;
-// g_ip->ndsam1=ndsam1;
-// g_ip->ndsam2=ndsam2;
-//
-//
-// }
-//
-// if (ecc==0){
-// g_ip->add_ecc_b_=false;
-// }
-// else
-// {
-// g_ip->add_ecc_b_=true;
-// }
-
-
- g_ip->error_checking();
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
- //solve(&fin_res);
- //g_ip->display_ip();
-
- //solve(&fin_res);
- //output_UCA(&fin_res);
- //output_data_csv(fin_res);
- // delete (g_ip);
+ if (!g_ip->error_checking(name)) {
+ exit(0);
+ }
- return fin_res;
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+ return fin_res;
}
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res)
diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc
index ef98107c7..447996053 100755..100644
--- a/ext/mcpat/cacti/mat.cc
+++ b/ext/mcpat/cacti/mat.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,371 +37,369 @@
#include "mat.h"
Mat::Mat(const DynamicParameter & dyn_p)
- :dp(dyn_p),
- power_subarray_out_drv(),
- delay_fa_tag(0), delay_cam(0),
- delay_before_decoder(0), delay_bitline(0),
- delay_wl_reset(0), delay_bl_restore(0),
- delay_searchline(0), delay_matchchline(0),
- delay_cam_sl_restore(0), delay_cam_ml_reset(0),
- delay_fa_ram_wl(0),delay_hit_miss_reset(0),
- delay_hit_miss(0),
- subarray(dp, dp.fully_assoc),
- power_bitline(), per_bitline_read_energy(0),
- deg_bl_muxing(dp.deg_bl_muxing),
- num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
- delay_writeback(0),
- cell(subarray.cell), cam_cell(subarray.cam_cell),
- is_dram(dyn_p.is_dram),
- pure_cam(dyn_p.pure_cam),
- num_mats(dp.num_mats),
- power_sa(), delay_sa(0),
- leak_power_sense_amps_closed_page_state(0),
- leak_power_sense_amps_open_page_state(0),
- delay_subarray_out_drv(0),
- delay_comparator(0), power_comparator(),
- num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
- num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
- num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
-{
- assert(num_subarrays_per_mat <= 4);
- assert(num_subarrays_per_row <= 2);
- is_fa = (dp.fully_assoc) ? true : false;
- camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
-
- if (is_fa || pure_cam)
- num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
-
- if (dp.use_inp_params == 1) {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
-
- }
-
- double number_sa_subarray;
-
- if (!is_fa && !pure_cam)
- {
- number_sa_subarray = subarray.num_cols / deg_bl_muxing;
- }
- else if (is_fa && !pure_cam)
- {
- number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
- }
-
- else
- {
- number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
- }
-
- int num_dec_signals = subarray.num_rows;
- double C_ld_bit_mux_dec_out = 0;
- double C_ld_sa_mux_lev_1_dec_out = 0;
- double C_ld_sa_mux_lev_2_dec_out = 0;
- double R_wire_wl_drv_out;
-
- if (!is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ : dp(dyn_p),
+ power_subarray_out_drv(),
+ delay_fa_tag(0), delay_cam(0),
+ delay_before_decoder(0), delay_bitline(0),
+ delay_wl_reset(0), delay_bl_restore(0),
+ delay_searchline(0), delay_matchchline(0),
+ delay_cam_sl_restore(0), delay_cam_ml_reset(0),
+ delay_fa_ram_wl(0), delay_hit_miss_reset(0),
+ delay_hit_miss(0),
+ subarray(dp, dp.fully_assoc),
+ power_bitline(), per_bitline_read_energy(0),
+ deg_bl_muxing(dp.deg_bl_muxing),
+ num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
+ delay_writeback(0),
+ cell(subarray.cell), cam_cell(subarray.cam_cell),
+ is_dram(dyn_p.is_dram),
+ pure_cam(dyn_p.pure_cam),
+ num_mats(dp.num_mats),
+ power_sa(), delay_sa(0),
+ leak_power_sense_amps_closed_page_state(0),
+ leak_power_sense_amps_open_page_state(0),
+ delay_subarray_out_drv(0),
+ delay_comparator(0), power_comparator(),
+ num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
+ num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
+ num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
+ assert(num_subarrays_per_mat <= 4);
+ assert(num_subarrays_per_row <= 2);
+ is_fa = (dp.fully_assoc) ? true : false;
+ camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
+
+ if (is_fa || pure_cam) {
+ num_subarrays_per_row = num_subarrays_per_mat > 2 ?
+ num_subarrays_per_mat / 2 : num_subarrays_per_mat;
}
- else if (is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+
+ if (dp.use_inp_params == 1) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+
+ }
+
+ double number_sa_subarray;
+
+ if (!is_fa && !pure_cam) {
+ number_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ } else if (is_fa && !pure_cam) {
+ number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
+ }
+
+ else {
+ number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
}
- else
- {
+
+ int num_dec_signals = subarray.num_rows;
+ double C_ld_bit_mux_dec_out = 0;
+ double C_ld_sa_mux_lev_1_dec_out = 0;
+ double C_ld_sa_mux_lev_2_dec_out = 0;
+ double R_wire_wl_drv_out;
+
+ if (!is_fa && !pure_cam) {
+ R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ } else if (is_fa && !pure_cam) {
+ R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+ } else {
R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
}
- double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
- double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
-
- if (deg_bl_muxing > 1)
- {
- C_ld_bit_mux_dec_out =
- (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (dp.Ndsam_lev_1 > 1)
- {
- C_ld_sa_mux_lev_1_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- C_ld_sa_mux_lev_2_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (num_subarrays_per_row >= 2)
- {
- // wire heads for both right and left side of a mat, so half the resistance
- R_wire_bit_mux_dec_out /= 2.0;
- R_wire_sa_mux_dec_out /= 2.0;
- }
-
-
- row_dec = new Decoder(
- num_dec_signals,
- false,
- subarray.C_wl,
- R_wire_wl_drv_out,
- false/*is_fa*/,
- is_dram,
- true,
- camFlag? cam_cell:cell);
+ double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
+ double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
+
+ if (deg_bl_muxing > 1) {
+ C_ld_bit_mux_dec_out =
+ (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
+ gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+
+ if (dp.Ndsam_lev_1 > 1) {
+ C_ld_sa_mux_lev_1_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
+ gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+ if (dp.Ndsam_lev_2 > 1) {
+ C_ld_sa_mux_lev_2_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray /
+ (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
+ gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+
+ if (num_subarrays_per_row >= 2) {
+ // wire heads for both right and left side of a mat, so half the resistance
+ R_wire_bit_mux_dec_out /= 2.0;
+ R_wire_sa_mux_dec_out /= 2.0;
+ }
+
+
+ row_dec = new Decoder(
+ num_dec_signals,
+ false,
+ subarray.C_wl,
+ R_wire_wl_drv_out,
+ false/*is_fa*/,
+ is_dram,
+ true,
+ camFlag ? cam_cell : cell);
// if (is_fa && (!dp.is_tag))
// {
// row_dec->exist = true;
// }
- bit_mux_dec = new Decoder(
- deg_bl_muxing,// This number is 1 for FA or CAM
- false,
- C_ld_bit_mux_dec_out,
- R_wire_bit_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
- sa_mux_lev_1_dec = new Decoder(
- dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
- dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
- C_ld_sa_mux_lev_1_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
- sa_mux_lev_2_dec = new Decoder(
- dp.Ndsam_lev_2, // This number is 1 for FA or CAM
- false,
- C_ld_sa_mux_lev_2_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
-
- double C_wire_predec_blk_out;
- double R_wire_predec_blk_out;
-
- if (!is_fa && !pure_cam)
- {
+ bit_mux_dec = new Decoder(
+ deg_bl_muxing,// This number is 1 for FA or CAM
+ false,
+ C_ld_bit_mux_dec_out,
+ R_wire_bit_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+ sa_mux_lev_1_dec = new Decoder(
+ dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
+ dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
+ C_ld_sa_mux_lev_1_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+ sa_mux_lev_2_dec = new Decoder(
+ dp.Ndsam_lev_2, // This number is 1 for FA or CAM
+ false,
+ C_ld_sa_mux_lev_2_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+
+ double C_wire_predec_blk_out;
+ double R_wire_predec_blk_out;
+
+ if (!is_fa && !pure_cam) {
+
+ C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
+ R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
+
+ } else { //for pre-decode block's load is same for both FA and CAM
+ C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
+ R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
+ }
- C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
- R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
- }
- else //for pre-decode block's load is same for both FA and CAM
- {
- C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
- R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
- }
-
-
- if (is_fa||pure_cam)
- num_dec_signals += _log2(num_subarrays_per_mat);
-
- PredecBlk * r_predec_blk1 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- true);
- PredecBlk * r_predec_blk2 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- false);
- PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
- PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
- dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
- dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
-
- PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
- PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
- way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
- dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
-
- r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
- b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
- sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
- sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
-
- subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
-
- if (is_fa || pure_cam)
-
- { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
- driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
- cam_bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- if (!pure_cam)
- {
- //This is only used for fully asso not pure CAM
- driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- }
-
- else
- {
- driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
- double w_row_decoder = area_row_decoder / subarray.area.get_h();
-
- double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
- compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
-
- double h_subarray_out_drv = subarray_out_wire->area.get_area() *
- (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
-
-
- h_subarray_out_drv *= (RWP + ERP + SCHP);
-
- double h_comparators = 0.0;
- double w_row_predecode_output_wires = 0.0;
- double h_bit_mux_dec_out_wires = 0.0;
- double h_senseamp_mux_dec_out_wires = 0.0;
-
- if ((!is_fa)&&(dp.is_tag))
- {
- //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
- h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
- h_comparators *= (RWP + ERP);
- }
+ if (is_fa || pure_cam)
+ num_dec_signals += _log2(num_subarrays_per_mat);
+
+ PredecBlk * r_predec_blk1 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ true);
+ PredecBlk * r_predec_blk2 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ false);
+ PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
+ dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
+ dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
+
+ PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
+ PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
+ way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
+ dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
+
+ r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
+ b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
+ sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
+ sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
+
+ subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+
+ if (is_fa || pure_cam)
+
+ { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
+ driver_c_gate_load = (subarray.num_cols_fa_cam ) *
+ gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+ is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+ g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+ g_tp.wire_outside_mat.R_per_um;
+ cam_bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ if (!pure_cam) {
+ //This is only used for fully asso not pure CAM
+ driver_c_gate_load = (subarray.num_cols_fa_ram ) *
+ gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+ is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
+ g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
+ g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ }
+
+ else {
+ driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
+ double w_row_decoder = area_row_decoder / subarray.area.get_h();
+
+ double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
+ compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
+
+ double h_subarray_out_drv = subarray_out_wire->area.get_area() *
+ (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
+
+
+ h_subarray_out_drv *= (RWP + ERP + SCHP);
+
+ double h_comparators = 0.0;
+ double w_row_predecode_output_wires = 0.0;
+ double h_bit_mux_dec_out_wires = 0.0;
+ double h_senseamp_mux_dec_out_wires = 0.0;
+
+ if ((!is_fa) && (dp.is_tag)) {
+ //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
+ h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
+ h_comparators *= (RWP + ERP);
+ }
int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
-
-
- double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
- (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
- h_subarray_out_drv + h_comparators);
-
- double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
-
- if (deg_bl_muxing > 1)
- {
- h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_1 > 1)
- {
- h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
-
- double h_addr_datain_wires;
- if (!g_ip->ver_htree_wires_over_array)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
- (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+
+ double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
+ (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
+ h_subarray_out_drv + h_comparators);
+
+ double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
+
+ if (deg_bl_muxing > 1) {
+ h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_1 > 1) {
+ h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_2 > 1) {
+ h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+
+ double h_addr_datain_wires;
+ if (!g_ip->ver_htree_wires_over_array) {
+ h_addr_datain_wires = (dp.number_addr_bits_mat +
+ dp.number_way_select_signals_mat +
+ (dp.num_di_b_mat + dp.num_do_b_mat) /
+ num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+ if (is_fa || pure_cam) {
+ h_addr_datain_wires =
+ (dp.number_addr_bits_mat +
+ dp.number_way_select_signals_mat + //TODO: revisit
+ (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
+ (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
+ g_tp.wire_inside_mat.pitch * SCHP;
+ }
+ //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
+ //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
+ h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
+ h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
+ h_addr_datain_wires +
+ h_bit_mux_dec_out_wires +
+ h_senseamp_mux_dec_out_wires;
- if (is_fa || pure_cam)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
- (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
- (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
}
- //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
- //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
- h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
- h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
- h_addr_datain_wires +
- h_bit_mux_dec_out_wires +
- h_senseamp_mux_dec_out_wires;
-
- }
-
- // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
- double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
- b_mux_predec_blk_drv1->area.get_area() +
- sa_mux_lev_1_predec_blk_drv1->area.get_area() +
- sa_mux_lev_2_predec_blk_drv1->area.get_area() +
- way_sel_drv1->area.get_area() +
- r_predec_blk_drv2->area.get_area() +
- b_mux_predec_blk_drv2->area.get_area() +
- sa_mux_lev_1_predec_blk_drv2->area.get_area() +
- sa_mux_lev_2_predec_blk_drv2->area.get_area() +
- r_predec_blk1->area.get_area() +
- b_mux_predec_blk1->area.get_area() +
- sa_mux_lev_1_predec_blk1->area.get_area() +
- sa_mux_lev_2_predec_blk1->area.get_area() +
- r_predec_blk2->area.get_area() +
- b_mux_predec_blk2->area.get_area() +
- sa_mux_lev_1_predec_blk2->area.get_area() +
- sa_mux_lev_2_predec_blk2->area.get_area() +
- bit_mux_dec->area.get_area() +
- sa_mux_lev_1_dec->area.get_area() +
- sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
-
- double area_efficiency_mat;
+
+ // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
+ double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
+ b_mux_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv1->area.get_area() +
+ way_sel_drv1->area.get_area() +
+ r_predec_blk_drv2->area.get_area() +
+ b_mux_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv2->area.get_area() +
+ r_predec_blk1->area.get_area() +
+ b_mux_predec_blk1->area.get_area() +
+ sa_mux_lev_1_predec_blk1->area.get_area() +
+ sa_mux_lev_2_predec_blk1->area.get_area() +
+ r_predec_blk2->area.get_area() +
+ b_mux_predec_blk2->area.get_area() +
+ sa_mux_lev_1_predec_blk2->area.get_area() +
+ sa_mux_lev_2_predec_blk2->area.get_area() +
+ bit_mux_dec->area.get_area() +
+ sa_mux_lev_1_dec->area.get_area() +
+ sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
+
+ double area_efficiency_mat;
// if (!is_fa)
// {
- assert(num_subarrays_per_mat/num_subarrays_per_row>0);
- area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
+ assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
+ area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
+ subarray.area.h + h_non_cell_area;
area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
- area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
- area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
+ area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
+ area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
+ 100.0 / area.get_area();
// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
// cout<<"h_comparators"<<h_comparators<<endl;
@@ -413,8 +412,8 @@ Mat::Mat(const DynamicParameter & dyn_p)
// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
- assert(area.h>0);
- assert(area.w>0);
+ assert(area.h > 0);
+ assert(area.w > 0);
// }
// else
// {
@@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p)
// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
// }
- }
-
-
-
-Mat::~Mat()
-{
- delete row_dec;
- delete bit_mux_dec;
- delete sa_mux_lev_1_dec;
- delete sa_mux_lev_2_dec;
-
- delete r_predec->blk1;
- delete r_predec->blk2;
- delete b_mux_predec->blk1;
- delete b_mux_predec->blk2;
- delete sa_mux_lev_1_predec->blk1;
- delete sa_mux_lev_1_predec->blk2;
- delete sa_mux_lev_2_predec->blk1;
- delete sa_mux_lev_2_predec->blk2;
- delete dummy_way_sel_predec_blk1;
- delete dummy_way_sel_predec_blk2;
-
- delete r_predec->drv1;
- delete r_predec->drv2;
- delete b_mux_predec->drv1;
- delete b_mux_predec->drv2;
- delete sa_mux_lev_1_predec->drv1;
- delete sa_mux_lev_1_predec->drv2;
- delete sa_mux_lev_2_predec->drv1;
- delete sa_mux_lev_2_predec->drv2;
- delete way_sel_drv1;
- delete dummy_way_sel_predec_blk_drv2;
-
- delete r_predec;
- delete b_mux_predec;
- delete sa_mux_lev_1_predec;
- delete sa_mux_lev_2_predec;
-
- delete subarray_out_wire;
- if (!pure_cam)
- delete bl_precharge_eq_drv;
-
- if (is_fa || pure_cam)
- {
- delete sl_precharge_eq_drv ;
- delete sl_data_drv ;
- delete cam_bl_precharge_eq_drv;
- delete ml_precharge_drv;
- delete ml_to_ram_wl_drv;
- }
}
-double Mat::compute_delays(double inrisetime)
-{
- int k;
- double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
- double outrisetime_search, outrisetime, row_dec_outrisetime;
- // delay calculation for tags of fully associative cache
- if (is_fa || pure_cam)
- {
- //Compute search access time
- outrisetime_search = compute_cam_delay(inrisetime);
- if (is_fa)
- {
- bl_precharge_eq_drv->compute_delay(0);
- k = ml_to_ram_wl_drv->number_gates - 1;
- rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
- C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
- tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
-
- R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
- R_bl = subarray.num_rows * r_b_metal;
- C_bl = subarray.C_bl;
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
-
- outrisetime_search = compute_bitline_delay(outrisetime_search);
- outrisetime_search = compute_sa_delay(outrisetime_search);
- }
- outrisetime_search = compute_subarray_out_drv(outrisetime_search);
- subarray_out_wire->set_in_rise_time(outrisetime_search);
- outrisetime_search = subarray_out_wire->signal_rise_time();
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
-
- //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- if (pure_cam)
- {
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- }
- return outrisetime_search;
+Mat::~Mat() {
+ delete row_dec;
+ delete bit_mux_dec;
+ delete sa_mux_lev_1_dec;
+ delete sa_mux_lev_2_dec;
+
+ delete r_predec->blk1;
+ delete r_predec->blk2;
+ delete b_mux_predec->blk1;
+ delete b_mux_predec->blk2;
+ delete sa_mux_lev_1_predec->blk1;
+ delete sa_mux_lev_1_predec->blk2;
+ delete sa_mux_lev_2_predec->blk1;
+ delete sa_mux_lev_2_predec->blk2;
+ delete dummy_way_sel_predec_blk1;
+ delete dummy_way_sel_predec_blk2;
+
+ delete r_predec->drv1;
+ delete r_predec->drv2;
+ delete b_mux_predec->drv1;
+ delete b_mux_predec->drv2;
+ delete sa_mux_lev_1_predec->drv1;
+ delete sa_mux_lev_1_predec->drv2;
+ delete sa_mux_lev_2_predec->drv1;
+ delete sa_mux_lev_2_predec->drv2;
+ delete way_sel_drv1;
+ delete dummy_way_sel_predec_blk_drv2;
+
+ delete r_predec;
+ delete b_mux_predec;
+ delete sa_mux_lev_1_predec;
+ delete sa_mux_lev_2_predec;
+
+ delete subarray_out_wire;
+ if (!pure_cam)
+ delete bl_precharge_eq_drv;
+
+ if (is_fa || pure_cam) {
+ delete sl_precharge_eq_drv ;
+ delete sl_data_drv ;
+ delete cam_bl_precharge_eq_drv;
+ delete ml_precharge_drv;
+ delete ml_to_ram_wl_drv;
+ }
+}
+
+
+
+double Mat::compute_delays(double inrisetime) {
+ int k;
+ double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
+ double outrisetime_search, outrisetime, row_dec_outrisetime;
+ // delay calculation for tags of fully associative cache
+ if (is_fa || pure_cam) {
+ //Compute search access time
+ outrisetime_search = compute_cam_delay(inrisetime);
+ if (is_fa) {
+ bl_precharge_eq_drv->compute_delay(0);
+ k = ml_to_ram_wl_drv->number_gates - 1;
+ rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
+ C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
+ cell.h, is_dram, false, true) +
+ drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
+ is_dram, false, true);
+ C_ld = ml_to_ram_wl_drv->c_gate_load +
+ ml_to_ram_wl_drv->c_wire_load;
+ tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+
+ R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
+ R_bl = subarray.num_rows * r_b_metal;
+ C_bl = subarray.C_bl;
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+ (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+
+ outrisetime_search = compute_bitline_delay(outrisetime_search);
+ outrisetime_search = compute_sa_delay(outrisetime_search);
+ }
+ outrisetime_search = compute_subarray_out_drv(outrisetime_search);
+ subarray_out_wire->set_in_rise_time(outrisetime_search);
+ outrisetime_search = subarray_out_wire->signal_rise_time();
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+
+ //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ if (pure_cam) {
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ }
+ return outrisetime_search;
+ } else {
+ bl_precharge_eq_drv->compute_delay(0);
+ if (row_dec->exist == true) {
+ int k = row_dec->num_gates - 1;
+ double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
+ // TODO: this 4*cell.h number must be revisited
+ double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
+ cell.h, is_dram, false, true) +
+ drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
+ false, true);
+ double C_ld = row_dec->C_ld_dec_out;
+ double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+ }
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * r_b_metal;
+ double C_bl = subarray.C_bl;
+
+ if (is_dram) {
+ delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ } else {
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+ (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ }
+ }
+
+
+
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ outrisetime = compute_subarray_out_drv(outrisetime);
+ subarray_out_wire->set_in_rise_time(outrisetime);
+ outrisetime = subarray_out_wire->signal_rise_time();
+
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+ if (dp.is_tag == true && dp.fully_assoc == false) {
+ compute_comparator_delay(0);
}
- else
- {
- bl_precharge_eq_drv->compute_delay(0);
- if (row_dec->exist == true)
- {
- int k = row_dec->num_gates - 1;
- double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
- // TODO: this 4*cell.h number must be revisited
- double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- double C_ld = row_dec->C_ld_dec_out;
- double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
- }
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * r_b_metal;
- double C_bl = subarray.C_bl;
-
- if (is_dram)
- {
- delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- else
- {
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- }
-
-
-
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- outrisetime = compute_subarray_out_drv(outrisetime);
- subarray_out_wire->set_in_rise_time(outrisetime);
- outrisetime = subarray_out_wire->signal_rise_time();
-
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
- if (dp.is_tag == true && dp.fully_assoc == false)
- {
- compute_comparator_delay(0);
- }
-
- if (row_dec->exist == false)
- {
- delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
+
+ if (row_dec->exist == false) {
+ delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
}
- return outrisetime;
+ return outrisetime;
}
-double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
-{
-
- double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
- compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
-
- if (deg_bl_muxing > 1)
- {
- height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
- // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
- }
-
- height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
-
- if (dp.Ndsam_lev_1 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
- }
-
- if (dp.Ndsam_lev_2 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
-
- // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
- height += 2 * compute_tr_width_after_folding(
- pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- }
-
- // TODO: this should be uncommented...
- /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
- {
- //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
- double width_write_driver_write_mux = width_write_driver_or_write_mux();
- double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
- cell.w *
- // deg_bl_muxing *
- dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
- height += height_write_driver_write_mux;
- }*/
-
- return height;
+double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
+
+ double height =
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
+ camFlag ? cam_cell.w :
+ cell.w / (2 * (RWP + ERP + SCHP))) +
+ // precharge circuitry
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
+ camFlag ? cam_cell.w :
+ cell.w / (RWP + ERP + SCHP));
+
+ if (deg_bl_muxing > 1) {
+ // col mux tr height
+ height +=
+ compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
+ cell.w / (2 * (RWP + ERP)));
+ // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
+ }
+
+ height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
+
+ if (dp.Ndsam_lev_1 > 1) {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+ }
+
+ if (dp.Ndsam_lev_2 > 1) {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+
+ // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
+ height += 2 * compute_tr_width_after_folding(
+ pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ }
+
+ // TODO: this should be uncommented...
+ /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
+ {
+ //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
+ double width_write_driver_write_mux = width_write_driver_or_write_mux();
+ double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
+ cell.w *
+ // deg_bl_muxing *
+ dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
+ height += height_write_driver_write_mux;
+ }*/
+
+ return height;
}
-double Mat::compute_cam_delay(double inrisetime)
-{
+double Mat::compute_cam_delay(double inrisetime) {
- double out_time_ramp, this_delay;
- double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
+ double out_time_ramp, this_delay;
+ double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
- double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
+ double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
- double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
- int Htagbits;
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
- //double searchline_precharge_time;
-
- double leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double leak_power_SCHP_port_sram_cell = 0;
- double leak_comparator_cam_cell =0;
-
- double gate_leak_comparator_cam_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_SCHP_port_sram_cell = 0;
-
- c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
- c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
- r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
- r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
-
- dynSearchEng = 0.0;
- delay_matchchline = 0.0;
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
- bool linear_scaling = false;
-
- if (linear_scaling)
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
- }
- else
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
- Wdummyn = g_tp.cam.cell_nmos_w;
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- }
-
- Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
-
- /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
- search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
- From the driver(am and an) to the comparators in all the rows including the dummy row,
- Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
-
- //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
- //Searchline precharge routes horizontally
- driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
-
- sl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
- //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
- sl_data_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- sl_precharge_eq_drv->compute_delay(0);
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
- double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
- double R_bl = (subarray.num_rows + 1) * r_b_metal;
- double C_bl = subarray.C_bl_cam;
- delay_cam_sl_restore = sl_precharge_eq_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
- out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
-
- //matchline ops delay
- delay_matchchline += sl_data_drv->delay;
-
- /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
- //matchline delay, matchline power, matchline_reset for cycle time computation,
-
- ////matchline precharge circuitry routes vertically
- //There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
-
- ml_precharge_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- ml_precharge_drv->compute_delay(0);
-
-
- rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
- c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
- + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
-
- Cwire = c_matchline_metal * Htagbits;
- Rwire = r_matchline_metal * Htagbits;
- c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
-
- double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_ml = Rwire;
- double C_ml = Cwire + c_intrinsic;
- delay_cam_ml_reset = ml_precharge_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
-
- //matchline ops delay
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
- delay_matchchline += this_delay;
- out_time_ramp = this_delay / VTHFA3;
-
- dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
- * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
-
- /* third stage, from the NAND2 gates to the drivers in the dummy row */
- rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
- c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
- c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
- out_time_ramp = this_delay / (1 - VTHFA4);
- delay_matchchline += this_delay;
-
- //only the dummy row has the extra inverter between NAND and NOR gates
- dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
-
- /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
- rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
- c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
- Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
- c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
- tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
- out_time_ramp = this_delay / VTHFA5;
- delay_matchchline += this_delay;
-
- dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
- /*final statge from the NOR gate to drive the wordline of the data portion */
-
- //searchline data driver There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
- driver_c_wire_load = subarray.C_wl_ram;
- driver_r_wire_load = subarray.R_wl_ram;
-
- ml_to_ram_wl_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
-
-
- rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
- c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
- out_time_ramp = this_delay / (1-0.5);
- delay_matchchline += this_delay;
-
- out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
-
- //c_gate_load energy is computed in ml_to_ram_wl_drv
- dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
-
- /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
- /*Precharge the hitting logic */
- c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_hit_miss = Rwire;
- double C_hit_miss = Cwire + c_intrinsic;
- delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /*hitting logic evaluation */
- c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-
- delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
-
- if (is_fa)
- delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
-
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
-
- power_matchline.searchOp.dynamic = dynSearchEng;
-
- //leakage in one subarray
- double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
- double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
- leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
- leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
-
- power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
- leak_comparator_cam_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP +
- leak_power_SCHP_port_sram_cell*SCHP;
+ double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
+ int Htagbits;
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+ //double searchline_precharge_time;
+
+ double leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double leak_power_SCHP_port_sram_cell = 0;
+ double leak_comparator_cam_cell =0;
+
+ double gate_leak_comparator_cam_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_SCHP_port_sram_cell = 0;
+
+ c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
+ c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
+ r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
+ r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
+
+ dynSearchEng = 0.0;
+ delay_matchchline = 0.0;
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
+ bool linear_scaling = false;
+
+ if (linear_scaling) {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
+ } else {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
+ Wdummyn = g_tp.cam.cell_nmos_w;
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ }
+
+ Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
+
+ /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
+ search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
+ From the driver(am and an) to the comparators in all the rows including the dummy row,
+ Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
+
+ //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
+ //Searchline precharge routes horizontally
+ driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
+
+ sl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
+ //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+ sl_data_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ sl_precharge_eq_drv->compute_delay(0);
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
+ double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = (subarray.num_rows + 1) * r_b_metal;
+ double C_bl = subarray.C_bl_cam;
+ delay_cam_sl_restore = sl_precharge_eq_drv->delay
+ + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+ out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
+
+ //matchline ops delay
+ delay_matchchline += sl_data_drv->delay;
+
+ /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
+ //matchline delay, matchline power, matchline_reset for cycle time computation,
+
+ ////matchline precharge circuitry routes vertically
+ //There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+
+ ml_precharge_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ ml_precharge_drv->compute_delay(0);
+
+
+ rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
+ c_intrinsic = Htagbits *
+ (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
+ is_dram)//TODO: the cell_h_def should be revisit
+ + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
+ Htagbits);//since each halve only has one precharge tx per matchline
+
+ Cwire = c_matchline_metal * Htagbits;
+ Rwire = r_matchline_metal * Htagbits;
+ c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
+
+ double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_ml = Rwire;
+ double C_ml = Cwire + c_intrinsic;
+ //TODO: latest CAM has sense amps on matchlines too
+ delay_cam_ml_reset = ml_precharge_drv->delay
+ + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
+
+ //matchline ops delay
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
+ delay_matchchline += this_delay;
+ out_time_ramp = this_delay / VTHFA3;
+
+ dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
+ (subarray.num_rows + 1)) //TODO: need to be precise
+ * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
+ 2;//each subarry has two halves
+
+ /* third stage, from the NAND2 gates to the drivers in the dummy row */
+ rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
+ c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
+ c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
+ out_time_ramp = this_delay / (1 - VTHFA4);
+ delay_matchchline += this_delay;
+
+ //only the dummy row has the extra inverter between NAND and NOR gates
+ dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
+ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
+
+ /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
+ rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
+ c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_matchline_metal * Htagbits + c_searchline_metal *
+ (subarray.num_rows + 1) / 2;
+ Rwire = r_matchline_metal * Htagbits + r_searchline_metal *
+ (subarray.num_rows + 1) / 2;
+ c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
+ tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
+ out_time_ramp = this_delay / VTHFA5;
+ delay_matchchline += this_delay;
+
+ dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
+ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+ /*final statge from the NOR gate to drive the wordline of the data portion */
+
+ //searchline data driver There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
+ driver_c_wire_load = subarray.C_wl_ram;
+ driver_r_wire_load = subarray.R_wl_ram;
+
+ ml_to_ram_wl_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+
+
+ rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
+ c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
+ out_time_ramp = this_delay / (1 - 0.5);
+ delay_matchchline += this_delay;
+
+ out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
+
+ //c_gate_load energy is computed in ml_to_ram_wl_drv
+ dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+
+ /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
+ /*Precharge the hitting logic */
+ c_intrinsic = 2 *
+ drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+ subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_hit_miss = Rwire;
+ double C_hit_miss = Cwire + c_intrinsic;
+ delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
+ (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /*hitting logic evaluation */
+ c_intrinsic = 2 *
+ drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+ subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+
+ delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
+
+ if (is_fa)
+ delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
+
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
+
+ power_matchline.searchOp.dynamic = dynSearchEng;
+
+ //leakage in one subarray
+ double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
+ double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
+ double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+ 1, inv, false, true) * 2;
+ //approx XOR with Inv
+ double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
+ false, true) * 2;
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
+ leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
+ leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
+
+ power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
+ leak_comparator_cam_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP +
+ leak_power_SCHP_port_sram_cell * SCHP;
// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
- power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
- //In idle states, the hit/miss txs are closed (on) therefore no Isub
- power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
+ power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
+ subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ //In idle states, the hit/miss txs are closed (on) therefore no Isub
+ power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
// + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
-
- gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- gate_leak_power_SCHP_port_sram_cell = 0;
-
- //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
-
- power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
- power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
- power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
- power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
- + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
-
-
- return out_time_ramp;
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+ 1, inv, false, true) * 2;
+ double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
+ false, true) * 2;
+
+ gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+ gate_leak_power_SCHP_port_sram_cell = 0;
+
+ //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
+
+ power_matchline.searchOp.gate_leakage +=
+ gate_leak_power_cc_inverters_sram_cell;
+ power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
+ power_matchline.searchOp.gate_leakage +=
+ gate_leak_power_SCHP_port_sram_cell * SCHP +
+ gate_leak_power_RD_port_sram_cell * ERP;
+ power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
+ subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += subarray.num_rows *
+ cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
+ + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
+
+
+ return out_time_ramp;
}
-double Mat::width_write_driver_or_write_mux()
-{
- // calculate resistance of SRAM cell pull-up PMOS transistor
- // cam and sram have same cell trasistor properties
- double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
- double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
- double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
- double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
+double Mat::width_write_driver_or_write_mux() {
+ // calculate resistance of SRAM cell pull-up PMOS transistor
+ // cam and sram have same cell trasistor properties
+ double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
+ double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
+ double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
+ double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
- return width_write_driver_nmos;
+ return width_write_driver_nmos;
}
@@ -1007,134 +1032,164 @@ double Mat::width_write_driver_or_write_mux()
double Mat::compute_comparators_height(
int tagbits,
int number_ways_in_mat,
- double subarray_mem_cell_area_width)
-{
- double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
- double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
- return cumulative_area / subarray_mem_cell_area_width;
+ double subarray_mem_cell_area_width) {
+ double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
+ double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
+ return cumulative_area / subarray_mem_cell_area_width;
}
-double Mat::compute_bitline_delay(double inrisetime)
-{
- double V_b_pre, v_th_mem_cell, V_wl;
- double tstep;
- double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
- double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
- int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
-
- double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * R_b_metal;
- double C_bl = subarray.C_bl;
-
- // TODO: no leakage for DRAMs?
- double leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
-
- if (is_dram == true)
- {
- V_b_pre = g_tp.dram.Vbitpre;
- v_th_mem_cell = g_tp.dram_acc.Vth;
- V_wl = g_tp.vpp;
- //The access transistor is not folded. So we just need to specify a threshold value for the
- //folding width that is equal to or greater than Wmemcella.
- R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
- r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
- }
- else
- { //SRAM
- V_b_pre = g_tp.sram.Vbitpre;
- v_th_mem_cell = g_tp.sram_cell.Vth;
- V_wl = g_tp.sram_cell.Vdd;
- R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
- R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
-
- //Leakage current of an SRAM cell
- double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
- double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+double Mat::compute_bitline_delay(double inrisetime) {
+ double V_b_pre, v_th_mem_cell, V_wl;
+ double tstep;
+ double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
+ double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
+ int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
+
+ double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * R_b_metal;
+ double C_bl = subarray.C_bl;
+
+ // TODO: no leakage for DRAMs?
+ double leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+
+ if (is_dram == true) {
+ V_b_pre = g_tp.dram.Vbitpre;
+ v_th_mem_cell = g_tp.dram_acc.Vth;
+ V_wl = g_tp.vpp;
+ //The access transistor is not folded. So we just need to specify a
+ // threshold value for the folding width that is equal to or greater
+ // than Wmemcella.
+ R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
+ r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
+ } else { //SRAM
+ V_b_pre = g_tp.sram.Vbitpre;
+ v_th_mem_cell = g_tp.sram_cell.Vth;
+ V_wl = g_tp.sram_cell.Vdd;
+ R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
+ R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
+
+ //Leakage current of an SRAM cell
+ //TODO: how much is the idle time? just by *2?
+ double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+ false, true);
+ double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,
+ false, true);
+ double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
+ g_tp.sram.cell_pmos_w, 1, inv, false,
+ true) * 2;//two invs per cell
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+
+
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+ false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
+ g_tp.sram.cell_pmos_w, 1, inv, false,
+ true);
+
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+ }
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
-
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- }
-
-
- double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
- double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
- double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
- double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-
- if (is_dram)
- {
- double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
- tstep = 2.3 * fraction * r_dev *
- (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
- (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
- delay_writeback = tstep;
- dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
- per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
- }
- else
- {
- double tau;
-
- if (deg_bl_muxing > 1)
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
- subarray.num_cols * num_subarrays_per_mat*/;
- dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
- dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
- //Write Ops are differential for SRAM
- }
- else
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
+ double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w /
+ (2 * (RWP + ERP + SCHP)), is_dram);
+ double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
+ double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
+ camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram);
+ double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
+ double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
+ is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+ double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram);
+
+ if (is_dram) {
+ double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl));
+ tstep = 2.3 * fraction * r_dev *
+ (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux)) /
+ (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux));
+ delay_writeback = tstep;
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
+ (g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
+ num_act_mats_hor_dir * 100;
+ per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
+ } else {
+ double tau;
+
+ if (deg_bl_muxing > 1) {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
+ C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) +
+ R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
+ g_tp.sram_cell.Vdd;
+ dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
+ (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+ deg_bl_muxing);
+ dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
+ deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
+ g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+ //Write Ops are differential for SRAM
+ } else {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+ deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * C_bl) *
+ g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+
+ }
+ tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
+ power_bitline.readOp.leakage =
+ leak_power_cc_inverters_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP;
+ power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
+ gate_leak_power_RD_port_sram_cell * ERP;
}
- tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
- power_bitline.readOp.leakage =
- leak_power_cc_inverters_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP;
- power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
- gate_leak_power_RD_port_sram_cell * ERP;
-
- }
// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
@@ -1142,607 +1197,684 @@ double Mat::compute_bitline_delay(double inrisetime)
// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
- /* take input rise time into account */
- double m = V_wl / inrisetime;
- if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
- {
- delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
- }
- else
- {
- delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
- }
+ /* take input rise time into account */
+ double m = V_wl / inrisetime;
+ if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
+ delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
+ } else {
+ delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
+ }
- bool is_fa = (dp.fully_assoc) ? true : false;
+ bool is_fa = (dp.fully_assoc) ? true : false;
- if (dp.is_tag == false || is_fa == false)
- {
- power_bitline.readOp.dynamic = dynRdEnergy;
- power_bitline.writeOp.dynamic = dynWriteEnergy;
- }
+ if (dp.is_tag == false || is_fa == false) {
+ power_bitline.readOp.dynamic = dynRdEnergy;
+ power_bitline.writeOp.dynamic = dynWriteEnergy;
+ }
- double outrisetime = 0;
- return outrisetime;
+ double outrisetime = 0;
+ return outrisetime;
}
-double Mat::compute_sa_delay(double inrisetime)
-{
- //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
-
- //Bitline circuitry leakage.
- double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
- double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
- double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
- double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
-
- double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
- //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
- double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
- //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
- // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
- double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
- leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
- leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
-
- // sense amplifier has to drive logic in "data out driver" and sense precharge load.
- // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
- //constant as well as the magnitude of input differential voltage.
- double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double tau = C_ld / g_tp.gm_sense_amp_latch;
- delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
- power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
+double Mat::compute_sa_delay(double inrisetime) {
+ //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
+
+ //Bitline circuitry leakage.
+ double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
+ double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
+ double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
+ double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
+
+ double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
+ //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
+ double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
+ //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
+ // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
+ double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+
+ // sense amplifier has to drive logic in "data out driver" and sense precharge load.
+ // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
+ //constant as well as the magnitude of input differential voltage.
+ double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram);
+ double tau = C_ld / g_tp.gm_sense_amp_latch;
+ delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
+ power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
num_subarrays_per_mat * num_act_mats_hor_dir*/;
- power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
+ power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
- double outrisetime = 0;
- return outrisetime;
+ double outrisetime = 0;
+ return outrisetime;
}
-double Mat::compute_subarray_out_drv(double inrisetime)
-{
- double C_ld, rd, tf, this_delay;
- double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
-
- // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
- rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
- C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
- // delay of signal through inverter-buffer to second level of sense-amp mux.
- // internal delay of buffer
- rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
- C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
- // inverter driving drain of pass transistor of second level of sense-amp mux.
- rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
- C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
-
- // delay of signal through pass-transistor to input of subarray output driver.
- rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
- C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
- //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
-
-
- return inrisetime;
+double Mat::compute_subarray_out_drv(double inrisetime) {
+ double C_ld, rd, tf, this_delay;
+ double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
+
+ // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w *
+ deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+ // delay of signal through inverter-buffer to second level of sense-amp mux.
+ // internal delay of buffer
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage +=
+ cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv, is_dram) * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+
+ // inverter driving drain of pass transistor of second level of sense-amp mux.
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
+ is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
+ (RWP + ERP + SCHP), is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage +=
+ cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+
+
+ // delay of signal through pass-transistor to input of subarray output driver.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_2 *
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
+ is_dram) +
+ //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
+ gate_C(subarray_out_wire->repeater_size *
+ (subarray_out_wire->wire_length /
+ subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
+ (1 + p_to_n_sz_r), 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+
+
+ return inrisetime;
}
-double Mat::compute_comparator_delay(double inrisetime)
-{
- int A = g_ip->tag_assoc;
-
- int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
- // a multiple of 4.
-
- /* First Inverter */
- double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
- double tf = Req*Ceq;
- double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
- double nextinputtime = st1del/VTHCOMPINV;
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-
- //For each degree of associativity
- //there are 4 such quarter comparators
- double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- /* Second Inverter */
- Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
- tf = Req*Ceq;
- double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
- nextinputtime = st2del/(1.0-VTHCOMPINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
-
- /* Third Inverter */
- Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
- tf = Req*Ceq;
- double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
- nextinputtime = st3del/(VTHEVALINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
-
- /* Final Inverter (virtual ground driver) discharging compare part */
- double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
- double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
- double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
- double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
- power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
- lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
-
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
-
- /* time to go to threshold of mux driver */
- double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
- /* take into account non-zero input rise time */
- double m = g_tp.peri_global.Vdd/nextinputtime;
- double Tcomparatorni;
-
- if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
- {
- double a = m;
- double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
- }
- else
- {
- Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
- }
- delay_comparator = Tcomparatorni+st1del+st2del+st3del;
- power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
- power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
-
- return Tcomparatorni / (1.0 - VTHMUXNAND);;
+double Mat::compute_comparator_delay(double inrisetime) {
+ int A = g_ip->tag_assoc;
+
+ int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
+ // a multiple of 4.
+
+ /* First Inverter */
+ double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
+ double tf = Req * Ceq;
+ double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
+ double nextinputtime = st1del / VTHCOMPINV;
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+
+ //For each degree of associativity
+ //there are 4 such quarter comparators
+ double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
+ g_tp.w_comp_inv_p1, 1, inv,
+ is_dram) * 4 * A;
+ double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
+ g_tp.w_comp_inv_p1, 1, inv,
+ is_dram) * 4 * A;
+ /* Second Inverter */
+ Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
+ tf = Req * Ceq;
+ double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
+ nextinputtime = st2del / (1.0 - VTHCOMPINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+ inv, is_dram) * 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+ inv, is_dram) * 4 * A;
+
+ /* Third Inverter */
+ Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
+ tf = Req * Ceq;
+ double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
+ nextinputtime = st3del / (VTHEVALINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
+ inv, is_dram) * 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
+ 1, inv, is_dram) * 4 * A;
+
+ /* Final Inverter (virtual ground driver) discharging compare part */
+ double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
+ double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
+ double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+ g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+ g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+ g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+ g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
+ power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+ inv, is_dram) * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+ is_dram) * 4 * A; // stack factor of 0.2
+
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+ inv, is_dram) * 4 * A;
+ //for gate leakage this equals to a inverter
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+ is_dram) * 4 * A;
+
+ /* time to go to threshold of mux driver */
+ double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
+ /* take into account non-zero input rise time */
+ double m = g_tp.peri_global.Vdd / nextinputtime;
+ double Tcomparatorni;
+
+ if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
+ double a = m;
+ double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
+ g_tp.peri_global.Vth);
+ double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
+ g_tp.peri_global.Vth) + 1 / m *
+ ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
+ ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
+ Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
+ } else {
+ Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
+ g_tp.peri_global.Vth) / (2 * m) -
+ (g_tp.peri_global.Vdd * VTHEVALINV) / m;
+ }
+ delay_comparator = Tcomparatorni + st1del + st2del + st3del;
+ power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
+ power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
+
+ return Tcomparatorni / (1.0 - VTHMUXNAND);;
}
-void Mat::compute_power_energy()
-{
- //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
+void Mat::compute_power_energy() {
+ //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
//when search all subarrays and all mats are fully active
- //when plain read/write only one subarray in a single mat is active.
+ //when plain read/write only one subarray in a single mat is active.
// add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
- power.readOp.dynamic += r_predec->power.readOp.dynamic +
- b_mux_predec->power.readOp.dynamic +
- sa_mux_lev_1_predec->power.readOp.dynamic +
- sa_mux_lev_2_predec->power.readOp.dynamic;
-
- // add energy consumed in decoders
- power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
- if (!(is_fa||pure_cam))
- power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
-
- // add energy consumed in bitline prechagers, SAs, and bitlines
- if (!(is_fa||pure_cam))
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
-
- // add energy consumed in bitlines
- //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
- power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
- power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
- //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
- //Add subarray output energy
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
- }
-
- else if (is_fa)
- {
- //for plain read/write only one subarray in a mat is active
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
- + cam_bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
-
- //Add sense amps energy
- num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
- num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
- power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
- power_sa.readOp.dynamic *= num_sa_subarray;
-
-
- // add energy consumed in bitlines
- power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
- power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
-
- //Add subarray output energy
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
- //add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
- else
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
- power_sa.searchOp.dynamic = 0;
-
- power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
- power_bitline.searchOp.dynamic = 0;
- power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
-
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
-
- ////add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
-
-
-
- // calculate leakage power
- if (!(is_fa || pure_cam))
- {
- int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
- //cout<<"leakage"<<power.readOp.leakage<<endl;
-
- power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.leakage += power_comparator.readOp.leakage;
-
- //cout<<"leakage1"<<power.readOp.leakage<<endl;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
-
- power.readOp.leakage += r_predec->power.readOp.leakage +
- b_mux_predec->power.readOp.leakage +
- sa_mux_lev_1_predec->power.readOp.leakage +
- sa_mux_lev_2_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage +
- power_bit_mux_decoders.readOp.leakage +
- power_sa_mux_lev_1_decoders.readOp.leakage +
- power_sa_mux_lev_2_decoders.readOp.leakage;
- //cout<<"leakage2"<<power.readOp.leakage<<endl;
-
- //++++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
- //cout<<"leakage"<<power.readOp.leakage<<endl;
-
- power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
-
- //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
-
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- b_mux_predec->power.readOp.gate_leakage +
- sa_mux_lev_1_predec->power.readOp.gate_leakage +
- sa_mux_lev_2_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage +
- power_bit_mux_decoders.readOp.gate_leakage +
- power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- power_sa_mux_lev_2_decoders.readOp.gate_leakage;
- }
- else if (is_fa)
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<<power.readOp.leakage<<endl;
-
-
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
-
- //cout<<"leakage4"<<power.readOp.leakage<<endl;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
-
- //cout<<"leakage5"<<power.readOp.leakage<<endl;
-
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
-
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
-
-// cout<<"leakage6"<<power.readOp.leakage<<endl;
-
- //+++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
-
-
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
-
- //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
+ power.readOp.dynamic += r_predec->power.readOp.dynamic +
+ b_mux_predec->power.readOp.dynamic +
+ sa_mux_lev_1_predec->power.readOp.dynamic +
+ sa_mux_lev_2_predec->power.readOp.dynamic;
+
+ // add energy consumed in decoders
+ power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
+ if (!(is_fa || pure_cam))
+ power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
+
+ // add energy consumed in bitline prechagers, SAs, and bitlines
+ if (!(is_fa || pure_cam)) {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
+
+ // add energy consumed in bitlines
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
+ power_bitline.readOp.dynamic *= num_subarrays_per_mat *
+ subarray.num_cols;
+ power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
+ subarray.num_cols;
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
+ //Add subarray output energy
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+ }
- //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+ else if (is_fa) {
+ //for plain read/write only one subarray in a mat is active
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
+ + cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+
+ //Add sense amps energy
+ num_sa_subarray = (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram) / deg_bl_muxing;
+ num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
+ power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
+ num_sa_subarray_search;
+ power_sa.readOp.dynamic *= num_sa_subarray;
+
+
+ // add energy consumed in bitlines
+ power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
+ power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram);
+ power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram);
+ power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
+
+ //Add subarray output energy
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+ //add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+ subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic =
+ power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic =
+ ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ } else {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
+ power_sa.searchOp.dynamic = 0;
+
+ power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
+ power_bitline.searchOp.dynamic = 0;
+ power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
+
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+
+ ////add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+ subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic =
+ power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic =
+ ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic =
+ power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+ }
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
- else
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
- //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+ // calculate leakage power
+ if (!(is_fa || pure_cam)) {
+ int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.leakage += power_comparator.readOp.leakage;
+
+ //cout<<"leakage1"<<power.readOp.leakage<<endl;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
+
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ b_mux_predec->power.readOp.leakage +
+ sa_mux_lev_1_predec->power.readOp.leakage +
+ sa_mux_lev_2_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage +
+ power_bit_mux_decoders.readOp.leakage +
+ power_sa_mux_lev_1_decoders.readOp.leakage +
+ power_sa_mux_lev_2_decoders.readOp.leakage;
+ //cout<<"leakage2"<<power.readOp.leakage<<endl;
+
+ //++++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
+
+ //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
+
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ b_mux_predec->power.readOp.gate_leakage +
+ sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage +
+ power_bit_mux_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage;
+ } else if (is_fa) {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP + SCHP);
- power.readOp.leakage += //power_bitline.readOp.leakage +
- //power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage3"<<power.readOp.leakage<<endl;
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
- //+++Below is gate leakage
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+ //cout<<"leakage4"<<power.readOp.leakage<<endl;
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+ //cout<<"leakage5"<<power.readOp.leakage<<endl;
- power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
- //power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *=
+ num_subarrays_per_mat;
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+// cout<<"leakage6"<<power.readOp.leakage<<endl;
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
+ //+++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage =
+ power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+
+ } else {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+ //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.leakage += //power_bitline.readOp.leakage +
+ //power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
+ subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+
+ //+++Below is gate leakage
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
+ //power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage =
+ row_dec->power.readOp.gate_leakage * subarray.num_rows *
+ num_subarrays_per_mat * (RWP + ERP + EWP);
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage =
+ power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *=
+ num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+ }
}
diff --git a/ext/mcpat/cacti/mat.h b/ext/mcpat/cacti/mat.h
index 8d038be8b..38200107c 100755
--- a/ext/mcpat/cacti/mat.h
+++ b/ext/mcpat/cacti/mat.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,9 +40,8 @@
#include "subarray.h"
#include "wire.h"
-class Mat : public Component
-{
- public:
+class Mat : public Component {
+public:
Mat(const DynamicParameter & dyn_p);
~Mat();
double compute_delays(double inrisetime); // return outrisetime
@@ -106,8 +106,8 @@ class Mat : public Component
int deg_bl_muxing;
int num_act_mats_hor_dir;
double delay_writeback;
- Area cell,cam_cell;
- bool is_dram,is_fa, pure_cam, camFlag;
+ Area cell, cam_cell;
+ bool is_dram, is_fa, pure_cam, camFlag;
int num_mats;
powerDef power_sa;
double delay_sa;
@@ -127,7 +127,7 @@ class Mat : public Component
uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
- private:
+private:
double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
double width_write_driver_or_write_mux();
double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
diff --git a/ext/mcpat/cacti/nuca.cc b/ext/mcpat/cacti/nuca.cc
index 2aabe843f..e0b4dcdaf 100644
--- a/ext/mcpat/cacti/nuca.cc
+++ b/ext/mcpat/cacti/nuca.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,89 +37,86 @@
#include "Ucache.h"
#include "nuca.h"
-unsigned int MIN_BANKSIZE=65536;
+unsigned int MIN_BANKSIZE = 65536;
#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
#define CONTR_2_BANK_LAT 0
int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
- Nuca::Nuca(
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- ):deviceType(dt)
-{
- init_cont();
+Nuca::Nuca(
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+): deviceType(dt) {
+ init_cont();
}
void
-Nuca::init_cont()
-{
- FILE *cont;
- char line[5000];
- char jk[5000];
- cont = fopen("contention.dat", "r");
- if (!cont) {
- cout << "contention.dat file is missing!\n";
- exit(0);
- }
-
- for(int i=0; i<2; i++) {
- for(int j=2; j<5; j++) {
- for(int k=0; k<ROUTER_TYPES; k++) {
- for(int l=0;l<7; l++) {
- int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
- assert(fscanf(cont, "%[^\n]\n", line) != EOF);
- sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
- &temp[4], &temp[5], &temp[6], &temp[7]);
+Nuca::init_cont() {
+ FILE *cont;
+ char line[5000];
+ char jk[5000];
+ cont = fopen("contention.dat", "r");
+ if (!cont) {
+ cout << "contention.dat file is missing!\n";
+ exit(0);
+ }
+
+ for (int i = 0; i < 2; i++) {
+ for (int j = 2; j < 5; j++) {
+ for (int k = 0; k < ROUTER_TYPES; k++) {
+ for (int l = 0; l < 7; l++) {
+ int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
+ assert(fscanf(cont, "%[^\n]\n", line) != EOF);
+ sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
+ &temp[0], &temp[1], &temp[2], &temp[3],
+ &temp[4], &temp[5], &temp[6], &temp[7]);
+ }
+ }
}
- }
}
- }
- fclose(cont);
+ fclose(cont);
}
- void
-Nuca::print_cont_stats()
-{
- for(int i=0; i<2; i++) {
- for(int j=2; j<5; j++) {
- for(int k=0; k<ROUTER_TYPES; k++) {
- for(int l=0;l<7; l++) {
- for(int m=0;l<7; l++) {
- cout << cont_stats[i][j][k][l][m] << " ";
- }
- cout << endl;
+void
+Nuca::print_cont_stats() {
+ for (int i = 0; i < 2; i++) {
+ for (int j = 2; j < 5; j++) {
+ for (int k = 0; k < ROUTER_TYPES; k++) {
+ for (int l = 0; l < 7; l++) {
+ for (int m = 0; l < 7; l++) {
+ cout << cont_stats[i][j][k][l][m] << " ";
+ }
+ cout << endl;
+ }
+ }
}
- }
}
- }
- cout << endl;
+ cout << endl;
}
-Nuca::~Nuca(){
- for (int i = wt_min; i <= wt_max; i++) {
- delete wire_vertical[i];
- delete wire_horizontal[i];
- }
+Nuca::~Nuca() {
+ for (int i = wt_min; i <= wt_max; i++) {
+ delete wire_vertical[i];
+ delete wire_horizontal[i];
+ }
}
/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
- int
-Nuca::calc_cycles(double lat, double oper_freq)
-{
- //TODO: convert latch delay to FO4 */
- double cycle_time = (1.0/(oper_freq*1e9)); /*s*/
- cycle_time -= LATCH_DELAY;
- cycle_time -= FIXED_OVERHEAD;
-
- return (int)ceil(lat/cycle_time);
+int
+Nuca::calc_cycles(double lat, double oper_freq) {
+ //TODO: convert latch delay to FO4 */
+ double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
+ cycle_time -= LATCH_DELAY;
+ cycle_time -= FIXED_OVERHEAD;
+
+ return (int)ceil(lat / cycle_time);
}
nuca_org_t::~nuca_org_t() {
- // if(h_wire) delete h_wire;
- // if(v_wire) delete v_wire;
- // if(router) delete router;
+ // if(h_wire) delete h_wire;
+ // if(v_wire) delete v_wire;
+ // if(router) delete router;
}
/*
@@ -137,476 +135,477 @@ nuca_org_t::~nuca_org_t() {
* Finally include contention statistics and find the optimal
* NUCA configuration
*/
- void
-Nuca::sim_nuca()
-{
- /* temp variables */
- int it, ro, wr;
- int num_cyc;
- unsigned int i, j, k;
- unsigned int r, c;
- int l2_c;
- int bank_count = 0;
- uca_org_t ures;
- nuca_org_t *opt_n;
- mem_array tag, data;
- list<nuca_org_t *> nuca_list;
- Router *router_s[ROUTER_TYPES];
- router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
- router_s[0]->print_router();
- router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
- router_s[1]->print_router();
- router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
- router_s[2]->print_router();
-
- int core_in; // to store no. of cores
-
- /* to search diff grid organizations */
- double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
- curr_acclat;
- double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
- avg_leakage_power;
-
- double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
- int opt_rows = 0;
- int opt_columns = 0;
- double opt_totno_hops = 0;
- double opt_avg_hop = 0;
- double opt_dyn_power = 0, opt_leakage_power = 0;
- min_values_t minval;
-
- int bank_start = 0;
-
- int flit_width = 0;
-
- /* vertical and horizontal hop latency values */
- int ver_hop_lat, hor_hop_lat; /* in cycles */
-
-
- /* no. of different bank sizes to consider */
- int iterations;
-
-
- g_ip->nuca_cache_sz = g_ip->cache_sz;
- nuca_list.push_back(new nuca_org_t());
-
- if (g_ip->cache_level == 0) l2_c = 1;
- else l2_c = 0;
-
- if (g_ip->cores <= 4) core_in = 2;
- else if (g_ip->cores <= 8) core_in = 3;
- else if (g_ip->cores <= 16) core_in = 4;
- else {cout << "Number of cores should be <= 16!\n"; exit(0);}
-
-
- // set the lower bound to an appropriate value. this depends on cache associativity
- if (g_ip->assoc > 2) {
- i = 2;
- while (i != g_ip->assoc) {
- MIN_BANKSIZE *= 2;
- i *= 2;
- }
- }
-
- iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE);
-
- if (g_ip->force_wiretype)
- {
- if (g_ip->wt == Low_swing) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
+void
+Nuca::sim_nuca() {
+ /* temp variables */
+ int it, ro, wr;
+ int num_cyc;
+ unsigned int i, j, k;
+ unsigned int r, c;
+ int l2_c;
+ int bank_count = 0;
+ uca_org_t ures;
+ nuca_org_t *opt_n;
+ mem_array tag, data;
+ list<nuca_org_t *> nuca_list;
+ Router *router_s[ROUTER_TYPES];
+ router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
+ router_s[0]->print_router();
+ router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
+ router_s[1]->print_router();
+ router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
+ router_s[2]->print_router();
+
+ int core_in; // to store no. of cores
+
+ /* to search diff grid organizations */
+ double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
+ curr_acclat;
+ double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
+ avg_leakage_power;
+
+ double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
+ int opt_rows = 0;
+ int opt_columns = 0;
+ double opt_totno_hops = 0;
+ double opt_avg_hop = 0;
+ double opt_dyn_power = 0, opt_leakage_power = 0;
+ min_values_t minval;
+
+ int bank_start = 0;
+
+ int flit_width = 0;
+
+ /* vertical and horizontal hop latency values */
+ int ver_hop_lat, hor_hop_lat; /* in cycles */
+
+
+ /* no. of different bank sizes to consider */
+ int iterations;
+
+
+ g_ip->nuca_cache_sz = g_ip->cache_sz;
+ nuca_list.push_back(new nuca_org_t());
+
+ if (g_ip->cache_level == 0) l2_c = 1;
+ else l2_c = 0;
+
+ if (g_ip->cores <= 4) core_in = 2;
+ else if (g_ip->cores <= 8) core_in = 3;
+ else if (g_ip->cores <= 16) core_in = 4;
else {
- wt_min = Global;
- wt_max = Low_swing-1;
+ cout << "Number of cores should be <= 16!\n";
+ exit(0);
}
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
- if (g_ip->nuca_bank_count != 0) { // simulate just one bank
- if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
- g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
- g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
- fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
- }
- bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
- iterations = bank_start+1;
- g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count;
- }
- cout << "Simulating various NUCA configurations\n";
- for (it=bank_start; it<iterations; it++) { /* different bank count values */
- ures.tag_array2 = &tag;
- ures.data_array2 = &data;
- /*
- * find the optimal bank organization
- */
- solve(&ures);
-// output_UCA(&ures);
- bank_count = g_ip->nuca_cache_sz/g_ip->cache_sz;
- cout << "====" << g_ip->cache_sz << "\n";
-
- for (wr=wt_min; wr<=wt_max; wr++) {
-
- for (ro=0; ro<ROUTER_TYPES; ro++)
- {
- flit_width = (int) router_s[ro]->flit_size; //initialize router
- nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
-
- /* calculate router and wire parameters */
-
- double vlength = ures.cache_ht; /* length of the wire (u)*/
- double hlength = ures.cache_len; // u
- /* find delay, area, and power for wires */
- wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
- wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+ // set the lower bound to an appropriate value. this depends on cache associativity
+ if (g_ip->assoc > 2) {
+ i = 2;
+ while (i != g_ip->assoc) {
+ MIN_BANKSIZE *= 2;
+ i *= 2;
+ }
+ }
- hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
- ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
+ iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == Low_swing) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing - 1;
+ }
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
+ if (g_ip->nuca_bank_count != 0) { // simulate just one bank
+ if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
+ g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
+ g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
+ fprintf(stderr, "Incorrect bank count value! Please fix the ",
+ "value in cache.cfg\n");
+ }
+ bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
+ iterations = bank_start + 1;
+ g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
+ }
+ cout << "Simulating various NUCA configurations\n";
+ for (it = bank_start; it < iterations; it++) {
+ /* different bank count values */
+ ures.tag_array2 = &tag;
+ ures.data_array2 = &data;
/*
- * assume a grid like topology and explore for optimal network
- * configuration using different row and column count values.
+ * find the optimal bank organization
*/
- for (c=1; c<=(unsigned int)bank_count; c++) {
- while (bank_count%c != 0) c++;
- r = bank_count/c;
-
- /*
- * to find the avg access latency of a NUCA cache, uncontended
- * access time to each bank from the
- * cache controller is calculated.
- * avg latency =
- * sum of the access latencies to individual banks)/bank
- * count value.
- */
- totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
- k = 1;
- for (i=0; i<r; i++) {
- for (j=0; j<c; j++) {
- /*
- * vertical hops including the
- * first hop from the cache controller
- */
- curr_hop = i + 1;
- curr_hop += j; /* horizontal hops */
- totno_hhops += j;
- totno_vhops += (i+1);
- curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
- j * hor_hop_lat);
-
- tot_lat += curr_acclat;
- totno_hops += curr_hop;
+ solve(&ures);
+// output_UCA(&ures);
+ bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
+ cout << "====" << g_ip->cache_sz << "\n";
+
+ for (wr = wt_min; wr <= wt_max; wr++) {
+
+ for (ro = 0; ro < ROUTER_TYPES; ro++) {
+ flit_width = (int) router_s[ro]->flit_size; //initialize router
+ nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
+
+ /* calculate router and wire parameters */
+
+ double vlength = ures.cache_ht; /* length of the wire (u)*/
+ double hlength = ures.cache_len; // u
+
+ /* find delay, area, and power for wires */
+ wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
+ wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+
+
+ hor_hop_lat =
+ calc_cycles(wire_horizontal[wr]->delay,
+ 1 /(nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+ ver_hop_lat =
+ calc_cycles(wire_vertical[wr]->delay,
+ 1 / (nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+
+ /*
+ * assume a grid like topology and explore for optimal network
+ * configuration using different row and column count values.
+ */
+ for (c = 1; c <= (unsigned int)bank_count; c++) {
+ while (bank_count % c != 0) c++;
+ r = bank_count / c;
+
+ /*
+ * to find the avg access latency of a NUCA cache, uncontended
+ * access time to each bank from the
+ * cache controller is calculated.
+ * avg latency =
+ * sum of the access latencies to individual banks)/bank
+ * count value.
+ */
+ totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
+ k = 1;
+ for (i = 0; i < r; i++) {
+ for (j = 0; j < c; j++) {
+ /*
+ * vertical hops including the
+ * first hop from the cache controller
+ */
+ curr_hop = i + 1;
+ curr_hop += j; /* horizontal hops */
+ totno_hhops += j;
+ totno_vhops += (i + 1);
+ curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
+ j * hor_hop_lat);
+
+ tot_lat += curr_acclat;
+ totno_hops += curr_hop;
+ }
+ }
+ avg_lat = tot_lat / bank_count;
+ avg_hop = totno_hops / bank_count;
+ avg_hhop = totno_hhops / bank_count;
+ avg_vhop = totno_vhops / bank_count;
+
+ /* net access latency */
+ curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
+ avg_hop) +
+ calc_cycles(ures.access_time,
+ 1 /
+ (nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+
+ /* avg access lat of nuca */
+ avg_dyn_power =
+ avg_hop *
+ (router_s[ro]->power.readOp.dynamic) + avg_hhop *
+ (wire_horizontal[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz * 8 + 64) + avg_vhop *
+ (wire_vertical[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
+
+ avg_leakage_power =
+ bank_count * router_s[ro]->power.readOp.leakage +
+ avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay) * flit_width +
+ avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay);
+
+ if (curr_acclat < opt_acclat) {
+ opt_acclat = curr_acclat;
+ opt_tot_lat = tot_lat;
+ opt_avg_lat = avg_lat;
+ opt_totno_hops = totno_hops;
+ opt_avg_hop = avg_hop;
+ opt_rows = r;
+ opt_columns = c;
+ opt_dyn_power = avg_dyn_power;
+ opt_leakage_power = avg_leakage_power;
+ }
+ totno_hops = 0;
+ tot_lat = 0;
+ totno_hhops = 0;
+ totno_vhops = 0;
+ }
+ nuca_list.back()->wire_pda.power.readOp.dynamic =
+ opt_avg_hop * flit_width *
+ (wire_horizontal[wr]->power.readOp.dynamic +
+ wire_vertical[wr]->power.readOp.dynamic);
+ nuca_list.back()->avg_hops = opt_avg_hop;
+ /* network delay/power */
+ nuca_list.back()->h_wire = wire_horizontal[wr];
+ nuca_list.back()->v_wire = wire_vertical[wr];
+ nuca_list.back()->router = router_s[ro];
+ /* bank delay/power */
+
+ nuca_list.back()->bank_pda.delay = ures.access_time;
+ nuca_list.back()->bank_pda.power = ures.power;
+ nuca_list.back()->bank_pda.area.h = ures.cache_ht;
+ nuca_list.back()->bank_pda.area.w = ures.cache_len;
+ nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
+
+ num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
+ 1 /
+ (nuca_list.back()->nuca_pda.cycle_time *
+ .001/*GHz*/));
+ if (num_cyc % 2 != 0) num_cyc++;
+ if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
+
+ if (it < 7) {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ } else {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ }
+ nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
+ nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+
+ /* array organization */
+ nuca_list.back()->bank_count = bank_count;
+ nuca_list.back()->rows = opt_rows;
+ nuca_list.back()->columns = opt_columns;
+ calculate_nuca_area (nuca_list.back());
+
+ minval.update_min_values(nuca_list.back());
+ nuca_list.push_back(new nuca_org_t());
+ opt_acclat = BIGNUM;
+
}
- }
- avg_lat = tot_lat/bank_count;
- avg_hop = totno_hops/bank_count;
- avg_hhop = totno_hhops/bank_count;
- avg_vhop = totno_vhops/bank_count;
-
- /* net access latency */
- curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
- calc_cycles(ures.access_time,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
-
- /* avg access lat of nuca */
- avg_dyn_power =
- avg_hop *
- (router_s[ro]->power.readOp.dynamic) + avg_hhop *
- (wire_horizontal[wr]->power.readOp.dynamic) *
- (g_ip->block_sz*8 + 64) + avg_vhop *
- (wire_vertical[wr]->power.readOp.dynamic) *
- (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic;
-
- avg_leakage_power =
- bank_count * router_s[ro]->power.readOp.leakage +
- avg_hhop * (wire_horizontal[wr]->power.readOp.leakage*
- wire_horizontal[wr]->delay) * flit_width +
- avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
- wire_horizontal[wr]->delay);
-
- if (curr_acclat < opt_acclat) {
- opt_acclat = curr_acclat;
- opt_tot_lat = tot_lat;
- opt_avg_lat = avg_lat;
- opt_totno_hops = totno_hops;
- opt_avg_hop = avg_hop;
- opt_rows = r;
- opt_columns = c;
- opt_dyn_power = avg_dyn_power;
- opt_leakage_power = avg_leakage_power;
- }
- totno_hops = 0;
- tot_lat = 0;
- totno_hhops = 0;
- totno_vhops = 0;
}
- nuca_list.back()->wire_pda.power.readOp.dynamic =
- opt_avg_hop * flit_width *
- (wire_horizontal[wr]->power.readOp.dynamic +
- wire_vertical[wr]->power.readOp.dynamic);
- nuca_list.back()->avg_hops = opt_avg_hop;
- /* network delay/power */
- nuca_list.back()->h_wire = wire_horizontal[wr];
- nuca_list.back()->v_wire = wire_vertical[wr];
- nuca_list.back()->router = router_s[ro];
- /* bank delay/power */
-
- nuca_list.back()->bank_pda.delay = ures.access_time;
- nuca_list.back()->bank_pda.power = ures.power;
- nuca_list.back()->bank_pda.area.h = ures.cache_ht;
- nuca_list.back()->bank_pda.area.w = ures.cache_len;
- nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
-
- num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
- if(num_cyc%2 != 0) num_cyc++;
- if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
-
- if (it < 7) {
- nuca_list.back()->nuca_pda.delay = opt_acclat +
- cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
- nuca_list.back()->contention =
- cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
- }
- else {
- nuca_list.back()->nuca_pda.delay = opt_acclat +
- cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
- nuca_list.back()->contention =
- cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
- }
- nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
- nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+ g_ip->cache_sz /= 2;
+ }
- /* array organization */
- nuca_list.back()->bank_count = bank_count;
- nuca_list.back()->rows = opt_rows;
- nuca_list.back()->columns = opt_columns;
- calculate_nuca_area (nuca_list.back());
+ delete(nuca_list.back());
+ nuca_list.pop_back();
+ opt_n = find_optimal_nuca(&nuca_list, &minval);
+ print_nuca(opt_n);
+ g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
- minval.update_min_values(nuca_list.back());
- nuca_list.push_back(new nuca_org_t());
- opt_acclat = BIGNUM;
+ list<nuca_org_t *>::iterator niter;
+ for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
+ delete *niter;
+ }
+ nuca_list.clear();
- }
+ for (int i = 0; i < ROUTER_TYPES; i++) {
+ delete router_s[i];
}
- g_ip->cache_sz /= 2;
- }
-
- delete(nuca_list.back());
- nuca_list.pop_back();
- opt_n = find_optimal_nuca(&nuca_list, &minval);
- print_nuca(opt_n);
- g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count;
-
- list<nuca_org_t *>::iterator niter;
- for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
- {
- delete *niter;
- }
- nuca_list.clear();
-
- for(int i=0; i < ROUTER_TYPES; i++)
- {
- delete router_s[i];
- }
- g_ip->display_ip();
- // g_ip->force_cache_config = true;
- // g_ip->ndwl = 8;
- // g_ip->ndbl = 16;
- // g_ip->nspd = 4;
- // g_ip->ndcm = 1;
- // g_ip->ndsam1 = 8;
- // g_ip->ndsam2 = 32;
+ g_ip->display_ip();
+ // g_ip->force_cache_config = true;
+ // g_ip->ndwl = 8;
+ // g_ip->ndbl = 16;
+ // g_ip->nspd = 4;
+ // g_ip->ndcm = 1;
+ // g_ip->ndsam1 = 8;
+ // g_ip->ndsam2 = 32;
}
- void
-Nuca::print_nuca (nuca_org_t *fr)
-{
- printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
- "----------\n\n");
- printf("Optimal number of banks - %d\n", fr->bank_count);
- printf("Grid organization rows x columns - %d x %d\n",
- fr->rows, fr->columns);
- printf("Network frequency - %g GHz\n",
- (1/fr->nuca_pda.cycle_time)*1e3);
- printf("Cache dimension (mm x mm) - %g x %g\n",
- fr->nuca_pda.area.h,
- fr->nuca_pda.area.w);
-
- fr->router->print_router();
-
- printf("\n\nWire stats:\n");
- if (fr->h_wire->wt == Global) {
- printf("\tWire type - Full swing global wires with least "
- "possible delay\n");
- }
- else if (fr->h_wire->wt == Global_5) {
- printf("\tWire type - Full swing global wires with "
- "5%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_10) {
- printf("\tWire type - Full swing global wires with "
- "10%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_20) {
- printf("\tWire type - Full swing global wires with "
- "20%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_30) {
- printf("\tWire type - Full swing global wires with "
- "30%% delay penalty\n");
- }
- else if(fr->h_wire->wt == Low_swing) {
- printf("\tWire type - Low swing wires\n");
- }
-
- printf("\tHorizontal link delay - %g (ns)\n",
- fr->h_wire->delay*1e9);
- printf("\tVertical link delay - %g (ns)\n",
- fr->v_wire->delay*1e9);
- printf("\tDelay/length - %g (ns/mm)\n",
- fr->h_wire->delay*1e9/fr->bank_pda.area.w);
- printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
- "\t -leakage %g (nW)\n\n",
- fr->h_wire->power.readOp.dynamic*1e9,
- fr->h_wire->power.readOp.leakage*1e9);
- printf("\tVertical link energy -dynamic/access %g (nJ)\n"
- "\t -leakage %g (nW)\n\n",
- fr->v_wire->power.readOp.dynamic*1e9,
- fr->v_wire->power.readOp.leakage*1e9);
- printf("\n\n");
- fr->v_wire->print_wire();
- printf("\n\nBank stats:\n");
+void
+Nuca::print_nuca (nuca_org_t *fr) {
+ printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
+ "----------\n\n");
+ printf("Optimal number of banks - %d\n", fr->bank_count);
+ printf("Grid organization rows x columns - %d x %d\n",
+ fr->rows, fr->columns);
+ printf("Network frequency - %g GHz\n",
+ (1 / fr->nuca_pda.cycle_time)*1e3);
+ printf("Cache dimension (mm x mm) - %g x %g\n",
+ fr->nuca_pda.area.h,
+ fr->nuca_pda.area.w);
+
+ fr->router->print_router();
+
+ printf("\n\nWire stats:\n");
+ if (fr->h_wire->wt == Global) {
+ printf("\tWire type - Full swing global wires with least "
+ "possible delay\n");
+ } else if (fr->h_wire->wt == Global_5) {
+ printf("\tWire type - Full swing global wires with "
+ "5%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_10) {
+ printf("\tWire type - Full swing global wires with "
+ "10%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_20) {
+ printf("\tWire type - Full swing global wires with "
+ "20%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_30) {
+ printf("\tWire type - Full swing global wires with "
+ "30%% delay penalty\n");
+ } else if (fr->h_wire->wt == Low_swing) {
+ printf("\tWire type - Low swing wires\n");
+ }
+
+ printf("\tHorizontal link delay - %g (ns)\n",
+ fr->h_wire->delay*1e9);
+ printf("\tVertical link delay - %g (ns)\n",
+ fr->v_wire->delay*1e9);
+ printf("\tDelay/length - %g (ns/mm)\n",
+ fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
+ printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->h_wire->power.readOp.dynamic*1e9,
+ fr->h_wire->power.readOp.leakage*1e9);
+ printf("\tVertical link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->v_wire->power.readOp.dynamic*1e9,
+ fr->v_wire->power.readOp.leakage*1e9);
+ printf("\n\n");
+ fr->v_wire->print_wire();
+ printf("\n\nBank stats:\n");
}
- nuca_org_t *
-Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- nuca_org_t *res = NULL;
- float d, a, dp, lp, c;
- int v;
- dp = g_ip->dynamic_power_wt_nuca;
- lp = g_ip->leakage_power_wt_nuca;
- a = g_ip->area_wt_nuca;
- d = g_ip->delay_wt_nuca;
- c = g_ip->cycle_time_wt_nuca;
-
- list<nuca_org_t *>::iterator niter;
-
-
- for (niter = n->begin(); niter != n->end(); niter++) {
- fprintf(stderr, "\n-----------------------------"
- "---------------\n");
-
-
- printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
- "bank_dpower = %g \tleak = %g \tcycle = %g\n",
- (*niter)->bank_count,
- (*niter)->nuca_pda.delay,
- (*niter)->nuca_pda.power.readOp.dynamic,
- (*niter)->h_wire->wt,
- (*niter)->bank_pda.power.readOp.dynamic,
- (*niter)->nuca_pda.power.readOp.leakage,
- (*niter)->nuca_pda.cycle_time);
-
-
- if (g_ip->ed == 1) {
- cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
- }
- }
- else if (g_ip->ed == 2) {
- cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
- }
- }
- else {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- v = check_nuca_org((*niter), minval);
- if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v) {
- cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) +
- c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) +
- dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
- lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
- a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
- fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
+nuca_org_t *
+Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
+ double cost = 0;
+ double min_cost = BIGNUM;
+ nuca_org_t *res = NULL;
+ float d, a, dp, lp, c;
+ int v;
+ dp = g_ip->dynamic_power_wt_nuca;
+ lp = g_ip->leakage_power_wt_nuca;
+ a = g_ip->area_wt_nuca;
+ d = g_ip->delay_wt_nuca;
+ c = g_ip->cycle_time_wt_nuca;
+
+ list<nuca_org_t *>::iterator niter;
+
+
+ for (niter = n->begin(); niter != n->end(); niter++) {
+ fprintf(stderr, "\n-----------------------------"
+ "---------------\n");
+
+
+ printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
+ "bank_dpower = %g \tleak = %g \tcycle = %g\n",
+ (*niter)->bank_count,
+ (*niter)->nuca_pda.delay,
+ (*niter)->nuca_pda.power.readOp.dynamic,
+ (*niter)->h_wire->wt,
+ (*niter)->bank_pda.power.readOp.dynamic,
+ (*niter)->nuca_pda.power.readOp.leakage,
+ (*niter)->nuca_pda.cycle_time);
+
+
+ if (g_ip->ed == 1) {
+ cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else if (g_ip->ed == 2) {
+ cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ v = check_nuca_org((*niter), minval);
+ if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
+
+ if (v) {
+ cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) +
+ c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
+ dp * ((*niter)->nuca_pda.power.readOp.dynamic /
+ minval->min_dyn) +
+ lp * ((*niter)->nuca_pda.power.readOp.leakage /
+ minval->min_leakage) +
+ a * ((*niter)->nuca_pda.area.get_area() /
+ minval->min_area));
+ fprintf(stderr, "cost = %g\n", cost);
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else {
+ niter = n->erase(niter);
+ if (niter != n->begin())
+ niter --;
+ }
}
- }
- else {
- niter = n->erase(niter);
- if (niter !=n->begin())
- niter --;
- }
}
- }
- return res;
+ return res;
}
- int
-Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
-{
- if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev_nuca) {
- return 0;
- }
- return 1;
+int
+Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
+ if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
+ g_ip->delay_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
+ minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage)*100 >
+ g_ip->leakage_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
+ g_ip->cycle_time_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
+ 100 >
+ g_ip->area_dev_nuca) {
+ return 0;
+ }
+ return 1;
}
- void
-Nuca::calculate_nuca_area (nuca_org_t *nuca)
-{
- nuca->nuca_pda.area.h=
- nuca->rows * ((nuca->h_wire->wire_width +
- nuca->h_wire->wire_spacing)
- * nuca->router->flit_size +
- nuca->bank_pda.area.h);
-
- nuca->nuca_pda.area.w =
- nuca->columns * ((nuca->v_wire->wire_width +
- nuca->v_wire->wire_spacing)
- * nuca->router->flit_size +
- nuca->bank_pda.area.w);
+void
+Nuca::calculate_nuca_area (nuca_org_t *nuca) {
+ nuca->nuca_pda.area.h =
+ nuca->rows * ((nuca->h_wire->wire_width +
+ nuca->h_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.h);
+
+ nuca->nuca_pda.area.w =
+ nuca->columns * ((nuca->v_wire->wire_width +
+ nuca->v_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.w);
}
diff --git a/ext/mcpat/cacti/nuca.h b/ext/mcpat/cacti/nuca.h
index adfe32564..38cca6f70 100644
--- a/ext/mcpat/cacti/nuca.h
+++ b/ext/mcpat/cacti/nuca.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -46,8 +47,8 @@
#include "wire.h"
class nuca_org_t {
- public:
- ~nuca_org_t();
+public:
+ ~nuca_org_t();
// int size;
/* area, power, access time, and cycle time stats */
Component nuca_pda;
@@ -71,9 +72,8 @@ class nuca_org_t {
-class Nuca : public Component
-{
- public:
+class Nuca : public Component {
+public:
Nuca(
TechnologyParameter::DeviceType *dt);
void print_router();
@@ -87,12 +87,12 @@ class Nuca : public Component
void print_nuca(nuca_org_t *n);
void print_cont_stats();
- private:
+private:
TechnologyParameter::DeviceType *deviceType;
int wt_min, wt_max;
Wire *wire_vertical[WIRE_TYPES],
- *wire_horizontal[WIRE_TYPES];
+ *wire_horizontal[WIRE_TYPES];
};
diff --git a/ext/mcpat/cacti/parameter.cc b/ext/mcpat/cacti/parameter.cc
index b71640c19..f7184d8a9 100644
--- a/ext/mcpat/cacti/parameter.cc
+++ b/ext/mcpat/cacti/parameter.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -46,147 +47,141 @@ TechnologyParameter g_tp;
-void TechnologyParameter::DeviceType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
- cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
- cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
- cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
- cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
- cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
- cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
- cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
- cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
- cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
- cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
- cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
- cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
- cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
- cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
- cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
- cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
+void TechnologyParameter::DeviceType::display(uint32_t indent) {
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
+ cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
+ cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
+ cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
+ cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
+ cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
+ cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
+ cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
+ cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
+ cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
+ cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
+ cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
+ cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
+ cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
+ cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
+ cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
+ cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
}
-void TechnologyParameter::InterconnectType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
+void TechnologyParameter::InterconnectType::display(uint32_t indent) {
+ string indent_str(indent, ' ');
- cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
- cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
- cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
+ cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
+ cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
+ cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
}
-void TechnologyParameter::ScalingFactor::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
+void TechnologyParameter::ScalingFactor::display(uint32_t indent) {
+ string indent_str(indent, ' ');
- cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
- cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
+ cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
+ cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
}
-void TechnologyParameter::MemoryType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
+void TechnologyParameter::MemoryType::display(uint32_t indent) {
+ string indent_str(indent, ' ');
- cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
- cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
- cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
- cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
- cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
- cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
+ cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
+ cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
+ cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
+ cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
+ cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
+ cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
}
-void TechnologyParameter::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
- cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
- cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
- cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
- cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
- cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
- cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
- cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
- cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
- cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
- cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
- cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
- cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
- cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
- cout << endl;
- cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
- cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
- cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
- cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
- cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
- cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
- cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
- cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
- cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
- cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
- cout << endl;
- cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
- cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
- cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
- cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
- cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
- cout << endl;
- cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
- cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
- cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
- cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
- cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
- cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
- cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
-
- cout << endl;
- cout << indent_str << "SRAM cell transistor: " << endl;
- sram_cell.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM access transistor: " << endl;
- dram_acc.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM wordline transistor: " << endl;
- dram_wl.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "peripheral global transistor: " << endl;
- peri_global.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire local" << endl;
- wire_local.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire inside mat" << endl;
- wire_inside_mat.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire outside mat" << endl;
- wire_outside_mat.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "SRAM" << endl;
- sram.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM" << endl;
- dram.display(indent + 2);
+void TechnologyParameter::display(uint32_t indent) {
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
+ cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
+ cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
+ cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
+ cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
+ cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
+ cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
+ cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
+ cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
+ cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
+ cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
+ cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
+ cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
+ cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
+ cout << endl;
+ cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
+ cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
+ cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
+ cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
+ cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
+ cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
+ cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
+ cout << endl;
+ cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
+ cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
+ cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
+ cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
+ cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
+ cout << endl;
+ cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
+ cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
+ cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
+ cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
+ cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
+ cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
+ cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
+
+ cout << endl;
+ cout << indent_str << "SRAM cell transistor: " << endl;
+ sram_cell.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM access transistor: " << endl;
+ dram_acc.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM wordline transistor: " << endl;
+ dram_wl.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "peripheral global transistor: " << endl;
+ peri_global.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire local" << endl;
+ wire_local.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire inside mat" << endl;
+ wire_inside_mat.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire outside mat" << endl;
+ wire_outside_mat.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "SRAM" << endl;
+ sram.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM" << endl;
+ dram.display(indent + 2);
}
DynamicParameter::DynamicParameter():
- use_inp_params(0), cell(), is_valid(true)
-{
+ use_inp_params(0), cell(), is_valid(true) {
}
@@ -202,512 +197,433 @@ DynamicParameter::DynamicParameter(
unsigned int Ndsam_lev_1_,
unsigned int Ndsam_lev_2_,
bool is_main_mem_):
- is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
- Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
- number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
- is_main_mem(is_main_mem_), cell(), is_valid(false)
-{
- ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
-
- unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
- const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
- fully_assoc = (g_ip->fully_assoc) ? true : false;
-
- if (fully_assoc || pure_cam)
- { // fully-assocative cache -- ref: CACTi 2.0 report
- if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
- Ndcm != 1 || //Ndcm is fixed to 1 for FA
- Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
- Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
- Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
- Ndbl < 2)
- {
- return;
- }
- }
-
- if ((is_dram) && (!is_tag) && (Ndcm > 1))
- {
- return; // For a DRAM array, each bitline has its own sense-amp
- }
-
- // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
- // at least two because an array is assumed to have at least one mat. And a mat
- // is formed out of two horizontal subarrays and two vertical subarrays
- if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
- {
- return;
- }
-
- //***********compute row, col of an subarray
- if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
- {
- // if data array, let tagbits = 0
- if (is_tag)
- {
- if (g_ip->specific_tag)
- {
- tagbits = g_ip->tag_w;
- }
- else
- {
- tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
- _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks);
-
- }
- tagbits = (((tagbits + 3) >> 2) << 2);
-
- num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
- g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
- num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
- //burst_length = 1;
- }
- else
- {
- num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
- g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
- num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
- // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
- }
-
- if (num_r_subarray < MINSUBARRAYROWS) return;
- if (num_r_subarray == 0) return;
- if (num_r_subarray > MAXSUBARRAYROWS) return;
- if (num_c_subarray < MINSUBARRAYCOLS) return;
- if (num_c_subarray > MAXSUBARRAYCOLS) return;
-
- }
-
- else
- {//either fully-asso or cam
- if (pure_cam)
- {
- if (g_ip->specific_tag)
- {
- tagbits = int(ceil(g_ip->tag_w/8.0)*8);
- }
- else
- {
- tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8);
+ is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0),
+ Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_),
+ Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
+ number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
+ is_main_mem(is_main_mem_), cell(), is_valid(false) {
+ ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+
+ unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
+ const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
+ fully_assoc = (g_ip->fully_assoc) ? true : false;
+
+ // fully-assocative cache -- ref: CACTi 2.0 report
+ if (fully_assoc || pure_cam) {
+ if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
+ Ndcm != 1 || //Ndcm is fixed to 1 for FA
+ Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
+ Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
+ Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
+ Ndbl < 2) {
+ return;
+ }
+ }
+
+ if ((is_dram) && (!is_tag) && (Ndcm > 1)) {
+ return; // For a DRAM array, each bitline has its own sense-amp
+ }
+
+ // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
+ // at least two because an array is assumed to have at least one mat. And a mat
+ // is formed out of two horizontal subarrays and two vertical subarrays
+ if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) {
+ return;
+ }
+
+ //***********compute row, col of an subarray
+ if (!(fully_assoc || pure_cam)) {
+ //Not fully_asso nor cam
+ // if data array, let tagbits = 0
+ if (is_tag) {
+ if (g_ip->specific_tag) {
+ tagbits = g_ip->tag_w;
+ } else {
+ tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
+ _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks);
+
+ }
+ tagbits = (((tagbits + 3) >> 2) << 2);
+
+ num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
+ g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
+ num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
+ //burst_length = 1;
+ } else {
+ num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
+ g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
+ num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
+ // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
+ }
+
+ if (num_r_subarray < MINSUBARRAYROWS) return;
+ if (num_r_subarray == 0) return;
+ if (num_r_subarray > MAXSUBARRAYROWS) return;
+ if (num_c_subarray < MINSUBARRAYCOLS) return;
+ if (num_c_subarray > MAXSUBARRAYCOLS) return;
+
+ }
+
+ else {//either fully-asso or cam
+ if (pure_cam) {
+ if (g_ip->specific_tag) {
+ tagbits = int(ceil(g_ip->tag_w / 8.0) * 8);
+ } else {
+ tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8);
// cout<<"Pure CAM needs tag width to be specified"<<endl;
// exit(0);
- }
- //tagbits = (((tagbits + 3) >> 2) << 2);
-
- tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
- //tag_num_c_subarray = (int)(tagbits + EPSILON);
- tag_num_c_subarray = tagbits;
- if (tag_num_r_subarray == 0) return;
- if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
- if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
- if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
- num_r_subarray = tag_num_r_subarray;
- }
- else //fully associative
- {
- if (g_ip->specific_tag)
- {
- tagbits = g_ip->tag_w;
- }
- else
- {
- tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
- }
- tagbits = (((tagbits + 3) >> 2) << 2);
-
- tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
- tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
- if (tag_num_r_subarray == 0) return;
- if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
- if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
- if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
-
- data_num_r_subarray = tag_num_r_subarray;
- data_num_c_subarray = 8 * g_ip->block_sz;
- if (data_num_r_subarray == 0) return;
- if (data_num_r_subarray > MAXSUBARRAYROWS) return;
- if (data_num_c_subarray < MINSUBARRAYCOLS) return;
- if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
- num_r_subarray = tag_num_r_subarray;
- }
- }
-
- num_subarrays = Ndwl * Ndbl;
- //****************end of computation of row, col of an subarray
-
- // calculate wire parameters
- if (fully_assoc || pure_cam)
- {
- cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
- cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
-
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
- + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
- }
- else
- {
- if(is_tag)
- {
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
- g_ip->num_wr_ports);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
- (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
- wire_local.pitch * g_ip->num_se_rd_ports;
- }
- else
- {
- if (is_dram)
- {
- cell.h = g_tp.dram.b_h;
- cell.w = g_tp.dram.b_w;
- }
- else
- {
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
- g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
- (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
- g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
- }
- }
- }
-
- double c_b_metal = cell.h * wire_local.C_per_um;
- double C_bl;
-
- if (!(fully_assoc || pure_cam))
- {
- if (is_dram)
- {
- deg_bl_muxing = 1;
- if (ram_cell_tech_type == comm_dram)
- {
- C_bl = num_r_subarray * c_b_metal;
- V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
- if (V_b_sense < VBITSENSEMIN)
- {
- return;
- }
- V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
- dram_refresh_period = 64e-3;
- }
- else
- {
- double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
-
- if (V_b_sense < VBITSENSEMIN)
- {
- return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
- }
- V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
- //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
- //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
- dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
- }
- }
- else
- { //SRAM
- V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
- deg_bl_muxing = Ndcm;
- // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
- // contacts in a physical layout
- double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- dram_refresh_period = 0;
- }
- }
- else
- {
- c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
- V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
- deg_bl_muxing = 1;//FA fix as 1
- // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
- // contacts in a physical layout
- double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- dram_refresh_period = 0;
- }
-
-
- // do/di: data in/out, for fully associative they are the data width for normal read and write
- // so/si: search data in/out, for fully associative they are the data width for the search ops
- // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
- // so/si needs broadcase while do/di do not
-
- if (fully_assoc || pure_cam)
- {
- switch (Ndbl) {
- case (0):
- cout << " Invalid Ndbl \n"<<endl;
- exit(0);
- break;
- case (1):
- num_mats_h_dir = 1;//one subarray per mat
- num_mats_v_dir = 1;
- break;
- case (2):
- num_mats_h_dir = 1;//two subarrays per mat
- num_mats_v_dir = 1;
- break;
- default:
- num_mats_h_dir = int(floor(sqrt(Ndbl/4.0)));//4 subbarrys per mat
- num_mats_v_dir = int(Ndbl/4.0 / num_mats_h_dir);
}
- num_mats = num_mats_h_dir * num_mats_v_dir;
-
- if (fully_assoc)
- {
- num_so_b_mat = data_num_c_subarray;
- num_do_b_mat = data_num_c_subarray + tagbits;
+ //tagbits = (((tagbits + 3) >> 2) << 2);
+
+ //TODO: error check input of tagbits and blocksize
+ //TODO: for pure CAM, g_ip->block should be number of entries.
+ tag_num_r_subarray = (int)ceil(capacity_per_die /
+ (g_ip->nbanks * tagbits / 8.0 * Ndbl));
+ //tag_num_c_subarray = (int)(tagbits + EPSILON);
+ tag_num_c_subarray = tagbits;
+ if (tag_num_r_subarray == 0) return;
+ if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
+ num_r_subarray = tag_num_r_subarray;
+ } else { //fully associative
+ if (g_ip->specific_tag) {
+ tagbits = g_ip->tag_w;
+ } else {
+ tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
+ }
+ tagbits = (((tagbits + 3) >> 2) << 2);
+
+ tag_num_r_subarray = (int)(capacity_per_die /
+ (g_ip->nbanks * g_ip->block_sz * Ndbl));
+ tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
+ if (tag_num_r_subarray == 0) return;
+ if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
+
+ data_num_r_subarray = tag_num_r_subarray;
+ data_num_c_subarray = 8 * g_ip->block_sz;
+ if (data_num_r_subarray == 0) return;
+ if (data_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (data_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
+ num_r_subarray = tag_num_r_subarray;
+ }
+ }
+
+ num_subarrays = Ndwl * Ndbl;
+ //****************end of computation of row, col of an subarray
+
+ // calculate wire parameters
+ if (fully_assoc || pure_cam) {
+ cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch *
+ (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
+ + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+ cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch *
+ (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
+ + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch *
+ (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports)
+ + 2 * wire_local.pitch * (g_ip->num_search_ports - 1);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch *
+ (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports -
+ g_ip->num_se_rd_ports)
+ + g_ip->num_wr_ports) + g_tp.wire_local.pitch *
+ g_ip->num_se_rd_ports + 2 * wire_local.pitch *
+ (g_ip->num_search_ports - 1);
+ } else {
+ if (is_tag) {
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
+ g_ip->num_wr_ports);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
+ (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+ } else {
+ if (is_dram) {
+ cell.h = g_tp.dram.b_h;
+ cell.w = g_tp.dram.b_w;
+ } else {
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
+ g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
+ (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
+ g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
}
- else
- {
- num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
- num_do_b_mat = tagbits;
+ }
+ }
+
+ double c_b_metal = cell.h * wire_local.C_per_um;
+ double C_bl;
+
+ if (!(fully_assoc || pure_cam)) {
+ if (is_dram) {
+ deg_bl_muxing = 1;
+ if (ram_cell_tech_type == comm_dram) {
+ C_bl = num_r_subarray * c_b_metal;
+ V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl);
+ if (V_b_sense < VBITSENSEMIN) {
+ return;
+ }
+ V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
+ dram_refresh_period = 64e-3;
+ } else {
+ double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl);
+
+ if (V_b_sense < VBITSENSEMIN) {
+ return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
+ }
+ V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
+ //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
+ //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
+ dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
}
- }
- else
- {
- num_mats_h_dir = MAX(Ndwl / 2, 1);
- num_mats_v_dir = MAX(Ndbl / 2, 1);
- num_mats = num_mats_h_dir * num_mats_v_dir;
- num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
- }
-
- if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats)))
- {
- return;
- }
-
-
- int deg_sa_mux_l1_non_assoc;
- //TODO:the i/o for subbank is not necessary and should be removed.
- if (!(fully_assoc || pure_cam))
- {
- if (!is_tag)
- {
- if (is_main_mem == true)
- {
- num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- }
- else
- {
- if (g_ip->fast_access == true)
- {
- num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- }
- else
- {
-
- num_do_b_subbank = g_ip->out_w;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
- if (deg_sa_mux_l1_non_assoc < 1)
- {
- return;
- }
-
- }
- }
- }
- else
- {
- num_do_b_subbank = tagbits * g_ip->tag_assoc;
- if (num_do_b_mat < tagbits)
- {
- return;
- }
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
- num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
- }
- else
- {
- num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
- num_do_b_subbank = tag_num_c_subarray;
- }
-
- deg_sa_mux_l1_non_assoc = 1;
- }
-
- deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
-
- if (fully_assoc || pure_cam)
- {
- num_act_mats_hor_dir = 1;
- num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
- }
- else
- {
- num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
- if (num_act_mats_hor_dir == 0)
- {
- return;
- }
- }
-
- //compute num_do_mat for tag
- if (is_tag)
- {
- if (!(fully_assoc || pure_cam))
- {
- num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
- num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
- }
- }
-
- if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
- {
- if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
- {
- return;
- }
- }
+ } else { //SRAM
+ V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
+ deg_bl_muxing = Ndcm;
+ // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
+ // contacts in a physical layout
+ double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ dram_refresh_period = 0;
+ }
+ } else {
+ c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
+ V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
+ deg_bl_muxing = 1;//FA fix as 1
+ // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
+ // contacts in a physical layout
+ double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ dram_refresh_period = 0;
+ }
+
+
+ // do/di: data in/out, for fully associative they are the data width for normal read and write
+ // so/si: search data in/out, for fully associative they are the data width for the search ops
+ // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
+ // so/si needs broadcase while do/di do not
+
+ if (fully_assoc || pure_cam) {
+ switch (Ndbl) {
+ case (0):
+ cout << " Invalid Ndbl \n" << endl;
+ exit(0);
+ break;
+ case (1):
+ num_mats_h_dir = 1;//one subarray per mat
+ num_mats_v_dir = 1;
+ break;
+ case (2):
+ num_mats_h_dir = 1;//two subarrays per mat
+ num_mats_v_dir = 1;
+ break;
+ default:
+ num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat
+ num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir);
+ }
+ num_mats = num_mats_h_dir * num_mats_v_dir;
+
+ if (fully_assoc) {
+ num_so_b_mat = data_num_c_subarray;
+ num_do_b_mat = data_num_c_subarray + tagbits;
+ } else {
+ num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
+ num_do_b_mat = tagbits;
+ }
+ } else {
+ num_mats_h_dir = MAX(Ndwl / 2, 1);
+ num_mats_v_dir = MAX(Ndbl / 2, 1);
+ num_mats = num_mats_h_dir * num_mats_v_dir;
+ num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray /
+ (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
+ }
+
+ if (!(fully_assoc || pure_cam) && (num_do_b_mat <
+ (num_subarrays / num_mats))) {
+ return;
+ }
+
+
+ int deg_sa_mux_l1_non_assoc;
+ //TODO:the i/o for subbank is not necessary and should be removed.
+ if (!(fully_assoc || pure_cam)) {
+ if (!is_tag) {
+ if (is_main_mem == true) {
+ num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ } else {
+ if (g_ip->fast_access == true) {
+ num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ } else {
+
+ num_do_b_subbank = g_ip->out_w;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
+ if (deg_sa_mux_l1_non_assoc < 1) {
+ return;
+ }
+
+ }
+ }
+ } else {
+ num_do_b_subbank = tagbits * g_ip->tag_assoc;
+ if (num_do_b_mat < tagbits) {
+ return;
+ }
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
+ }
+ } else {
+ if (fully_assoc) {
+ num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
+ num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
+ } else {
+ num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
+ num_do_b_subbank = tag_num_c_subarray;
+ }
+
+ deg_sa_mux_l1_non_assoc = 1;
+ }
+
+ deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
+
+ if (fully_assoc || pure_cam) {
+ num_act_mats_hor_dir = 1;
+ num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
+ } else {
+ num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
+ if (num_act_mats_hor_dir == 0) {
+ return;
+ }
+ }
+
+ //compute num_do_mat for tag
+ if (is_tag) {
+ if (!(fully_assoc || pure_cam)) {
+ num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
+ num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
+ }
+ }
+
+ if ((g_ip->is_cache == false && is_main_mem == true) ||
+ (PAGE_MODE == 1 && is_dram)) {
+ if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 !=
+ (int)g_ip->page_sz_bits) {
+ return;
+ }
+ }
// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
- if (is_tag == false && g_ip->is_main_mem == true &&
- num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
- {
- return;
- }
-
- if (num_act_mats_hor_dir > num_mats_h_dir)
- {
- return;
- }
-
-
- //compute di for mat subbank and bank
- if (!(fully_assoc ||pure_cam))
- {
- if(!is_tag)
- {
- if(g_ip->fast_access == true)
- {
- num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
- }
- else
- {
- num_di_b_mat = num_do_b_mat;
- }
- }
- else
- {
- num_di_b_mat = tagbits;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_di_b_mat = num_do_b_mat;
- //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
- //but inside the mat wire tracks need to be reserved for search data bus
- num_si_b_mat = tagbits;
- }
- else
- {
- num_di_b_mat = tagbits;
- num_si_b_mat = tagbits;//*num_subarrays/num_mats;
- }
-
- }
-
- num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
- num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
-
- int num_addr_b_row_dec = _log2(num_r_subarray);
- if ((fully_assoc ||pure_cam))
- num_addr_b_row_dec +=_log2(num_subarrays/num_mats);
- int number_subbanks = num_mats / num_act_mats_hor_dir;
- number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
-
- num_rw_ports = g_ip->num_rw_ports;
- num_rd_ports = g_ip->num_rd_ports;
- num_wr_ports = g_ip->num_wr_ports;
- num_se_rd_ports = g_ip->num_se_rd_ports;
- num_search_ports = g_ip->num_search_ports;
-
- if (is_dram && is_main_mem)
- {
- number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
- _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
- }
- else
- {
- number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
- _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
- }
-
- if (!(fully_assoc ||pure_cam))
- {
- if (is_tag)
- {
- num_di_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = g_ip->data_assoc;
- }
- else
- {
- num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
- num_do_b_bank_per_port = g_ip->out_w;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
- num_si_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = g_ip->out_w + tagbits;
- num_so_b_bank_per_port = g_ip->out_w;
- }
- else
- {
- num_di_b_bank_per_port = tagbits;
- num_si_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = tagbits;
- num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
- }
- }
-
- if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
- {
- number_way_select_signals_mat = g_ip->data_assoc;
- }
-
- // add ECC adjustment to all data signals that traverse on H-trees.
- if (g_ip->add_ecc_b_ == true)
- {
- num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
- num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
- num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
- num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
- num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
- num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
-
- num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
- num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
- num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
- num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
- num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
- num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
- }
-
- is_valid = true;
+ if (is_tag == false && g_ip->is_main_mem == true &&
+ num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 <
+ ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) {
+ return;
+ }
+
+ if (num_act_mats_hor_dir > num_mats_h_dir) {
+ return;
+ }
+
+
+ //compute di for mat subbank and bank
+ if (!(fully_assoc || pure_cam)) {
+ if (!is_tag) {
+ if (g_ip->fast_access == true) {
+ num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
+ } else {
+ num_di_b_mat = num_do_b_mat;
+ }
+ } else {
+ num_di_b_mat = tagbits;
+ }
+ } else {
+ if (fully_assoc) {
+ num_di_b_mat = num_do_b_mat;
+ //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
+ //but inside the mat wire tracks need to be reserved for search data bus
+ num_si_b_mat = tagbits;
+ } else {
+ num_di_b_mat = tagbits;
+ num_si_b_mat = tagbits;//*num_subarrays/num_mats;
+ }
+
+ }
+
+ num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
+ num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
+
+ int num_addr_b_row_dec = _log2(num_r_subarray);
+ if ((fully_assoc || pure_cam))
+ num_addr_b_row_dec += _log2(num_subarrays / num_mats);
+ int number_subbanks = num_mats / num_act_mats_hor_dir;
+ number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
+
+ num_rw_ports = g_ip->num_rw_ports;
+ num_rd_ports = g_ip->num_rd_ports;
+ num_wr_ports = g_ip->num_wr_ports;
+ num_se_rd_ports = g_ip->num_se_rd_ports;
+ num_search_ports = g_ip->num_search_ports;
+
+ if (is_dram && is_main_mem) {
+ number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
+ _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
+ } else {
+ number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
+ _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
+ }
+
+ if (!(fully_assoc || pure_cam)) {
+ if (is_tag) {
+ num_di_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = g_ip->data_assoc;
+ } else {
+ num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
+ num_do_b_bank_per_port = g_ip->out_w;
+ }
+ } else {
+ if (fully_assoc) {
+ num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
+ num_si_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = g_ip->out_w + tagbits;
+ num_so_b_bank_per_port = g_ip->out_w;
+ } else {
+ num_di_b_bank_per_port = tagbits;
+ num_si_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = tagbits;
+ num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
+ }
+ }
+
+ if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) {
+ number_way_select_signals_mat = g_ip->data_assoc;
+ }
+
+ // add ECC adjustment to all data signals that traverse on H-trees.
+ if (g_ip->add_ecc_b_ == true) {
+ num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
+ num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
+ num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
+ num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
+ num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
+ num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
+
+ num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
+ num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
+ num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
+ num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
+ num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
+ num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
+ }
+
+ is_valid = true;
}
diff --git a/ext/mcpat/cacti/parameter.h b/ext/mcpat/cacti/parameter.h
index 9c827bbc8..573b726a6 100644
--- a/ext/mcpat/cacti/parameter.h
+++ b/ext/mcpat/cacti/parameter.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -40,251 +41,242 @@
#include "io.h"
// parameters which are functions of certain device technology
-class TechnologyParameter
-{
- public:
- class DeviceType
- {
- public:
- double C_g_ideal;
- double C_fringe;
- double C_overlap;
- double C_junc; // C_junc_area
- double C_junc_sidewall;
- double l_phy;
- double l_elec;
- double R_nch_on;
- double R_pch_on;
- double Vdd;
- double Vth;
- double I_on_n;
- double I_on_p;
- double I_off_n;
- double I_off_p;
- double I_g_on_n;
- double I_g_on_p;
- double C_ox;
- double t_ox;
- double n_to_p_eff_curr_drv_ratio;
- double long_channel_leakage_reduction;
-
- DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
- C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
- Vdd(0), Vth(0),
- I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
- C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { };
- void reset()
- {
- C_g_ideal = 0;
- C_fringe = 0;
- C_overlap = 0;
- C_junc = 0;
- l_phy = 0;
- l_elec = 0;
- R_nch_on = 0;
- R_pch_on = 0;
- Vdd = 0;
- Vth = 0;
- I_on_n = 0;
- I_on_p = 0;
- I_off_n = 0;
- I_off_p = 0;
- I_g_on_n = 0;
- I_g_on_p = 0;
- C_ox = 0;
- t_ox = 0;
- n_to_p_eff_curr_drv_ratio = 0;
- long_channel_leakage_reduction = 0;
- }
+class TechnologyParameter {
+public:
+ class DeviceType {
+ public:
+ double C_g_ideal;
+ double C_fringe;
+ double C_overlap;
+ double C_junc; // C_junc_area
+ double C_junc_sidewall;
+ double l_phy;
+ double l_elec;
+ double R_nch_on;
+ double R_pch_on;
+ double Vdd;
+ double Vth;
+ double I_on_n;
+ double I_on_p;
+ double I_off_n;
+ double I_off_p;
+ double I_g_on_n;
+ double I_g_on_p;
+ double C_ox;
+ double t_ox;
+ double n_to_p_eff_curr_drv_ratio;
+ double long_channel_leakage_reduction;
+
+ DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
+ C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
+ Vdd(0), Vth(0),
+ I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0),
+ I_g_on_p(0),
+ C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0),
+ long_channel_leakage_reduction(0) { };
+ void reset() {
+ C_g_ideal = 0;
+ C_fringe = 0;
+ C_overlap = 0;
+ C_junc = 0;
+ l_phy = 0;
+ l_elec = 0;
+ R_nch_on = 0;
+ R_pch_on = 0;
+ Vdd = 0;
+ Vth = 0;
+ I_on_n = 0;
+ I_on_p = 0;
+ I_off_n = 0;
+ I_off_p = 0;
+ I_g_on_n = 0;
+ I_g_on_p = 0;
+ C_ox = 0;
+ t_ox = 0;
+ n_to_p_eff_curr_drv_ratio = 0;
+ long_channel_leakage_reduction = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+ class InterconnectType {
+ public:
+ double pitch;
+ double R_per_um;
+ double C_per_um;
+ double horiz_dielectric_constant;
+ double vert_dielectric_constant;
+ double aspect_ratio;
+ double miller_value;
+ double ild_thickness;
+
+ InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
+
+ void reset() {
+ pitch = 0;
+ R_per_um = 0;
+ C_per_um = 0;
+ horiz_dielectric_constant = 0;
+ vert_dielectric_constant = 0;
+ aspect_ratio = 0;
+ miller_value = 0;
+ ild_thickness = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+ class MemoryType {
+ public:
+ double b_w;
+ double b_h;
+ double cell_a_w;
+ double cell_pmos_w;
+ double cell_nmos_w;
+ double Vbitpre;
+
+ void reset() {
+ b_w = 0;
+ b_h = 0;
+ cell_a_w = 0;
+ cell_pmos_w = 0;
+ cell_nmos_w = 0;
+ Vbitpre = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+
+ class ScalingFactor {
+ public:
+ double logic_scaling_co_eff;
+ double core_tx_density;
+ double long_channel_leakage_reduction;
+
+ ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
+ long_channel_leakage_reduction(0) { };
+
+ void reset() {
+ logic_scaling_co_eff = 0;
+ core_tx_density = 0;
+ long_channel_leakage_reduction = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+
+ double ram_wl_stitching_overhead_;
+ double min_w_nmos_;
+ double max_w_nmos_;
+ double max_w_nmos_dec;
+ double unit_len_wire_del;
+ double FO4;
+ double kinv;
+ double vpp;
+ double w_sense_en;
+ double w_sense_n;
+ double w_sense_p;
+ double sense_delay;
+ double sense_dy_power;
+ double w_iso;
+ double w_poly_contact;
+ double spacing_poly_to_poly;
+ double spacing_poly_to_contact;
+
+ double w_comp_inv_p1;
+ double w_comp_inv_p2;
+ double w_comp_inv_p3;
+ double w_comp_inv_n1;
+ double w_comp_inv_n2;
+ double w_comp_inv_n3;
+ double w_eval_inv_p;
+ double w_eval_inv_n;
+ double w_comp_n;
+ double w_comp_p;
+
+ double dram_cell_I_on;
+ double dram_cell_Vdd;
+ double dram_cell_I_off_worst_case_len_temp;
+ double dram_cell_C;
+ double gm_sense_amp_latch;
+
+ double w_nmos_b_mux;
+ double w_nmos_sa_mux;
+ double w_pmos_bl_precharge;
+ double w_pmos_bl_eq;
+ double MIN_GAP_BET_P_AND_N_DIFFS;
+ double MIN_GAP_BET_SAME_TYPE_DIFFS;
+ double HPOWERRAIL;
+ double cell_h_def;
+
+ double chip_layout_overhead;
+ double macro_layout_overhead;
+ double sckt_co_eff;
+
+ double fringe_cap;
+
+ uint64_t h_dec;
+
+ DeviceType sram_cell; // SRAM cell transistor
+ DeviceType dram_acc; // DRAM access transistor
+ DeviceType dram_wl; // DRAM wordline transistor
+ DeviceType peri_global; // peripheral global
+ DeviceType cam_cell; // SRAM cell transistor
+
+ InterconnectType wire_local;
+ InterconnectType wire_inside_mat;
+ InterconnectType wire_outside_mat;
+
+ ScalingFactor scaling_factor;
+
+ MemoryType sram;
+ MemoryType dram;
+ MemoryType cam;
void display(uint32_t indent = 0);
- };
- class InterconnectType
- {
- public:
- double pitch;
- double R_per_um;
- double C_per_um;
- double horiz_dielectric_constant;
- double vert_dielectric_constant;
- double aspect_ratio;
- double miller_value;
- double ild_thickness;
-
- InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
-
- void reset()
- {
- pitch = 0;
- R_per_um = 0;
- C_per_um = 0;
- horiz_dielectric_constant = 0;
- vert_dielectric_constant = 0;
- aspect_ratio = 0;
- miller_value = 0;
- ild_thickness = 0;
- }
- void display(uint32_t indent = 0);
- };
- class MemoryType
- {
- public:
- double b_w;
- double b_h;
- double cell_a_w;
- double cell_pmos_w;
- double cell_nmos_w;
- double Vbitpre;
-
- void reset()
- {
- b_w = 0;
- b_h = 0;
- cell_a_w = 0;
- cell_pmos_w = 0;
- cell_nmos_w = 0;
- Vbitpre = 0;
- }
+ void reset() {
+ dram_cell_Vdd = 0;
+ dram_cell_I_on = 0;
+ dram_cell_C = 0;
+ vpp = 0;
- void display(uint32_t indent = 0);
- };
-
- class ScalingFactor
- {
- public:
- double logic_scaling_co_eff;
- double core_tx_density;
- double long_channel_leakage_reduction;
-
- ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
- long_channel_leakage_reduction(0) { };
-
- void reset()
- {
- logic_scaling_co_eff= 0;
- core_tx_density = 0;
- long_channel_leakage_reduction= 0;
- }
-
- void display(uint32_t indent = 0);
- };
-
- double ram_wl_stitching_overhead_;
- double min_w_nmos_;
- double max_w_nmos_;
- double max_w_nmos_dec;
- double unit_len_wire_del;
- double FO4;
- double kinv;
- double vpp;
- double w_sense_en;
- double w_sense_n;
- double w_sense_p;
- double sense_delay;
- double sense_dy_power;
- double w_iso;
- double w_poly_contact;
- double spacing_poly_to_poly;
- double spacing_poly_to_contact;
-
- double w_comp_inv_p1;
- double w_comp_inv_p2;
- double w_comp_inv_p3;
- double w_comp_inv_n1;
- double w_comp_inv_n2;
- double w_comp_inv_n3;
- double w_eval_inv_p;
- double w_eval_inv_n;
- double w_comp_n;
- double w_comp_p;
-
- double dram_cell_I_on;
- double dram_cell_Vdd;
- double dram_cell_I_off_worst_case_len_temp;
- double dram_cell_C;
- double gm_sense_amp_latch;
-
- double w_nmos_b_mux;
- double w_nmos_sa_mux;
- double w_pmos_bl_precharge;
- double w_pmos_bl_eq;
- double MIN_GAP_BET_P_AND_N_DIFFS;
- double MIN_GAP_BET_SAME_TYPE_DIFFS;
- double HPOWERRAIL;
- double cell_h_def;
-
- double chip_layout_overhead;
- double macro_layout_overhead;
- double sckt_co_eff;
-
- double fringe_cap;
-
- uint64_t h_dec;
-
- DeviceType sram_cell; // SRAM cell transistor
- DeviceType dram_acc; // DRAM access transistor
- DeviceType dram_wl; // DRAM wordline transistor
- DeviceType peri_global; // peripheral global
- DeviceType cam_cell; // SRAM cell transistor
-
- InterconnectType wire_local;
- InterconnectType wire_inside_mat;
- InterconnectType wire_outside_mat;
-
- ScalingFactor scaling_factor;
-
- MemoryType sram;
- MemoryType dram;
- MemoryType cam;
-
- void display(uint32_t indent = 0);
-
- void reset()
- {
- dram_cell_Vdd = 0;
- dram_cell_I_on = 0;
- dram_cell_C = 0;
- vpp = 0;
-
- sense_delay = 0;
- sense_dy_power = 0;
- fringe_cap = 0;
+ sense_delay = 0;
+ sense_dy_power = 0;
+ fringe_cap = 0;
// horiz_dielectric_constant = 0;
// vert_dielectric_constant = 0;
// aspect_ratio = 0;
// miller_value = 0;
// ild_thickness = 0;
- dram_cell_I_off_worst_case_len_temp = 0;
+ dram_cell_I_off_worst_case_len_temp = 0;
- sram_cell.reset();
- dram_acc.reset();
- dram_wl.reset();
- peri_global.reset();
- cam_cell.reset();
+ sram_cell.reset();
+ dram_acc.reset();
+ dram_wl.reset();
+ peri_global.reset();
+ cam_cell.reset();
- scaling_factor.reset();
+ scaling_factor.reset();
- wire_local.reset();
- wire_inside_mat.reset();
- wire_outside_mat.reset();
+ wire_local.reset();
+ wire_inside_mat.reset();
+ wire_outside_mat.reset();
- sram.reset();
- dram.reset();
- cam.reset();
+ sram.reset();
+ dram.reset();
+ cam.reset();
- chip_layout_overhead = 0;
- macro_layout_overhead = 0;
- sckt_co_eff = 0;
- }
+ chip_layout_overhead = 0;
+ macro_layout_overhead = 0;
+ sckt_co_eff = 0;
+ }
};
-class DynamicParameter
-{
- public:
+class DynamicParameter {
+public:
bool is_tag;
bool pure_ram;
bool pure_cam;
@@ -313,8 +305,8 @@ class DynamicParameter
int num_so_b_mat;
int num_si_b_subbank;
int num_so_b_subbank;
- int num_si_b_bank_per_port;
- int num_so_b_bank_per_port;
+ int num_si_b_bank_per_port;
+ int num_so_b_bank_per_port;
int number_way_select_signals_mat;
int num_act_mats_hor_dir;
diff --git a/ext/mcpat/cacti/router.cc b/ext/mcpat/cacti/router.cc
index 06f170691..d3368d946 100644
--- a/ext/mcpat/cacti/router.cc
+++ b/ext/mcpat/cacti/router.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -41,57 +42,56 @@ Router::Router(
double I_,
double O_,
double M_
- ):flit_size(flit_size_),
- deviceType(dt),
- I(I_),
- O(O_),
- M(M_)
-{
- vc_buffer_size = vc_buf;
- vc_count = vc_c;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- double technology = g_ip->F_sz_um;
-
- Vdd = dt->Vdd;
-
- /*Crossbar parameters. Transmisson gate is employed for connector*/
- NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
- PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
- wt = 15*technology*1e-6/2; /*track width*/
- ht = 15*technology*1e-6/2; /*track height*/
+ ): flit_size(flit_size_),
+ deviceType(dt),
+ I(I_),
+ O(O_),
+ M(M_) {
+ vc_buffer_size = vc_buf;
+ vc_count = vc_c;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ double technology = g_ip->F_sz_um;
+
+ Vdd = dt->Vdd;
+
+ /*Crossbar parameters. Transmisson gate is employed for connector*/
+ NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/
+ wt = 15 * technology * 1e-6 / 2; /*track width*/
+ ht = 15 * technology * 1e-6 / 2; /*track height*/
// I = 5; /*Number of crossbar input ports*/
// O = 5; /*Number of crossbar output ports*/
- NTi = 12.5*technology*1e-6/2;
- PTi = 25*technology*1e-6/2;
+ NTi = 12.5 * technology * 1e-6 / 2;
+ PTi = 25 * technology * 1e-6 / 2;
- NTid = 60*technology*1e-6/2; //m
- PTid = 120*technology*1e-6/2; // m
- NTod = 60*technology*1e-6/2; // m
- PTod = 120*technology*1e-6/2; // m
+ NTid = 60 * technology * 1e-6 / 2; //m
+ PTid = 120 * technology * 1e-6 / 2; // m
+ NTod = 60 * technology * 1e-6 / 2; // m
+ PTod = 120 * technology * 1e-6 / 2; // m
- calc_router_parameters();
+ calc_router_parameters();
}
-Router::~Router(){}
+Router::~Router() {}
double //wire cap with triple spacing
Router::Cw3(double length) {
- Wire wc(g_ip->wt, length, 1, 3, 3);
- return (wc.wire_cap(length));
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ return (wc.wire_cap(length));
}
/*Function to calculate the gate capacitance*/
double
Router::gate_cap(double w) {
- return (double) gate_C (w*1e6 /*u*/, 0);
+ return (double) gate_C (w*1e6 /*u*/, 0);
}
/*Function to calculate the diffusion capacitance*/
double
Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
- double s /*number of stacking transistors*/) {
- return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
+ double s /*number of stacking transistors*/) {
+ return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
}
@@ -100,212 +100,216 @@ Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
// Model for simple transmission gate
double
Router::transmission_buf_inpcap() {
- return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
+ return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
}
double
Router::transmission_buf_outcap() {
- return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
+ return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
}
double
Router::transmission_buf_ctrcap() {
- return gate_cap(NTtr)+gate_cap(PTtr);
+ return gate_cap(NTtr) + gate_cap(PTtr);
}
double
Router::crossbar_inpline() {
- return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
- gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
+ return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
+ gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
}
double
Router::crossbar_outline() {
- return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
- gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
+ return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
+ gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
}
double
Router::crossbar_ctrline() {
- return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
- diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
- gate_cap(NTi) + gate_cap(PTi));
+ return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
+ diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
+ gate_cap(NTi) + gate_cap(PTi));
}
double
Router::tr_crossbar_power() {
- return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
- crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
+ return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 +
+ crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2;
}
-void Router::buffer_stats()
-{
- DynamicParameter dyn_p;
- dyn_p.is_tag = false;
- dyn_p.pure_cam = false;
- dyn_p.fully_assoc = false;
- dyn_p.pure_ram = true;
- dyn_p.is_dram = false;
- dyn_p.is_main_mem = false;
- dyn_p.num_subarrays = 1;
- dyn_p.num_mats = 1;
- dyn_p.Ndbl = 1;
- dyn_p.Ndwl = 1;
- dyn_p.Nspd = 1;
- dyn_p.deg_bl_muxing = 1;
- dyn_p.deg_senseamp_muxing_non_associativity = 1;
- dyn_p.Ndsam_lev_1 = 1;
- dyn_p.Ndsam_lev_2 = 1;
- dyn_p.Ndcm = 1;
- dyn_p.number_addr_bits_mat = 8;
- dyn_p.number_way_select_signals_mat = 1;
- dyn_p.number_subbanks_decode = 0;
- dyn_p.num_act_mats_hor_dir = 1;
- dyn_p.V_b_sense = Vdd; // FIXME check power calc.
- dyn_p.ram_cell_tech_type = 0;
- dyn_p.num_r_subarray = (int) vc_buffer_size;
- dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
- dyn_p.num_mats_h_dir = 1;
- dyn_p.num_mats_v_dir = 1;
- dyn_p.num_do_b_subbank = (int)flit_size;
- dyn_p.num_di_b_subbank = (int)flit_size;
- dyn_p.num_do_b_mat = (int) flit_size;
- dyn_p.num_di_b_mat = (int) flit_size;
- dyn_p.num_do_b_mat = (int) flit_size;
- dyn_p.num_di_b_mat = (int) flit_size;
- dyn_p.num_do_b_bank_per_port = (int) flit_size;
- dyn_p.num_di_b_bank_per_port = (int) flit_size;
- dyn_p.out_w = (int) flit_size;
-
- dyn_p.use_inp_params = 1;
- dyn_p.num_wr_ports = (unsigned int) vc_count;
- dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
- dyn_p.num_rw_ports = 0;
- dyn_p.num_se_rd_ports =0;
- dyn_p.num_search_ports =0;
-
-
-
- dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
- dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
- dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
- (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
- dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
-
- Mat buff(dyn_p);
- buff.compute_delays(0);
- buff.compute_power_energy();
- buffer.power.readOp = buff.power.readOp;
- buffer.power.writeOp = buffer.power.readOp; //FIXME
- buffer.area = buff.area;
+void Router::buffer_stats() {
+ DynamicParameter dyn_p;
+ dyn_p.is_tag = false;
+ dyn_p.pure_cam = false;
+ dyn_p.fully_assoc = false;
+ dyn_p.pure_ram = true;
+ dyn_p.is_dram = false;
+ dyn_p.is_main_mem = false;
+ dyn_p.num_subarrays = 1;
+ dyn_p.num_mats = 1;
+ dyn_p.Ndbl = 1;
+ dyn_p.Ndwl = 1;
+ dyn_p.Nspd = 1;
+ dyn_p.deg_bl_muxing = 1;
+ dyn_p.deg_senseamp_muxing_non_associativity = 1;
+ dyn_p.Ndsam_lev_1 = 1;
+ dyn_p.Ndsam_lev_2 = 1;
+ dyn_p.Ndcm = 1;
+ dyn_p.number_addr_bits_mat = 8;
+ dyn_p.number_way_select_signals_mat = 1;
+ dyn_p.number_subbanks_decode = 0;
+ dyn_p.num_act_mats_hor_dir = 1;
+ dyn_p.V_b_sense = Vdd; // FIXME check power calc.
+ dyn_p.ram_cell_tech_type = 0;
+ dyn_p.num_r_subarray = (int) vc_buffer_size;
+ dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
+ dyn_p.num_mats_h_dir = 1;
+ dyn_p.num_mats_v_dir = 1;
+ dyn_p.num_do_b_subbank = (int)flit_size;
+ dyn_p.num_di_b_subbank = (int)flit_size;
+ dyn_p.num_do_b_mat = (int) flit_size;
+ dyn_p.num_di_b_mat = (int) flit_size;
+ dyn_p.num_do_b_mat = (int) flit_size;
+ dyn_p.num_di_b_mat = (int) flit_size;
+ dyn_p.num_do_b_bank_per_port = (int) flit_size;
+ dyn_p.num_di_b_bank_per_port = (int) flit_size;
+ dyn_p.out_w = (int) flit_size;
+
+ dyn_p.use_inp_params = 1;
+ dyn_p.num_wr_ports = (unsigned int) vc_count;
+ dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
+ dyn_p.num_rw_ports = 0;
+ dyn_p.num_se_rd_ports = 0;
+ dyn_p.num_search_ports = 0;
+
+
+
+ dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
+ dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
+ dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
+ (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
+ dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
+
+ Mat buff(dyn_p);
+ buff.compute_delays(0);
+ buff.compute_power_energy();
+ buffer.power.readOp = buff.power.readOp;
+ buffer.power.writeOp = buffer.power.readOp; //FIXME
+ buffer.area = buff.area;
}
- void
-Router::cb_stats ()
-{
- if (1) {
- Crossbar c_b(I, O, flit_size);
- c_b.compute_power();
- crossbar.delay = c_b.delay;
- crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
- crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
- crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
- crossbar.area = c_b.area;
+void
+Router::cb_stats () {
+ if (1) {
+ Crossbar c_b(I, O, flit_size);
+ c_b.compute_power();
+ crossbar.delay = c_b.delay;
+ crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
+ crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
+ crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
+ crossbar.area = c_b.area;
// c_b.print_crossbar();
- }
- else {
- crossbar.power.readOp.dynamic = tr_crossbar_power();
- crossbar.power.readOp.leakage = flit_size * I * O *
- cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
- crossbar.power.readOp.gate_leakage = flit_size * I * O *
- cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
- }
+ } else {
+ crossbar.power.readOp.dynamic = tr_crossbar_power();
+ crossbar.power.readOp.leakage = flit_size * I * O *
+ cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
+ crossbar.power.readOp.gate_leakage = flit_size * I * O *
+ cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
+ }
}
void
-Router::get_router_power()
-{
- /* calculate buffer stats */
- buffer_stats();
-
- /* calculate cross-bar stats */
- cb_stats();
-
- /* calculate arbiter stats */
- Arbiter vcarb(vc_count, flit_size, buffer.area.w);
- Arbiter cbarb(I, flit_size, crossbar.area.w);
- vcarb.compute_power();
- cbarb.compute_power();
- arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
- cbarb.power.readOp.dynamic * O;
- arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
- cbarb.power.readOp.leakage * O;
- arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
- cbarb.power.readOp.gate_leakage * O;
+Router::get_router_power() {
+ /* calculate buffer stats */
+ buffer_stats();
+
+ /* calculate cross-bar stats */
+ cb_stats();
+
+ /* calculate arbiter stats */
+ Arbiter vcarb(vc_count, flit_size, buffer.area.w);
+ Arbiter cbarb(I, flit_size, crossbar.area.w);
+ vcarb.compute_power();
+ cbarb.compute_power();
+ arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
+ cbarb.power.readOp.dynamic * O;
+ arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
+ cbarb.power.readOp.leakage * O;
+ arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
+ cbarb.power.readOp.gate_leakage * O;
// arb_stats();
- power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
- crossbar.power.readOp.dynamic +
- arbiter.power.readOp.dynamic)*MIN(I, O)*M;
- double pppm_t[4] = {1,I,I,1};
- power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
+ power.readOp.dynamic = ((buffer.power.readOp.dynamic +
+ buffer.power.writeOp.dynamic) +
+ crossbar.power.readOp.dynamic +
+ arbiter.power.readOp.dynamic) * MIN(I, O) * M;
+ double pppm_t[4] = {1, I, I, 1};
+ power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) *
+ pppm_lkg;
}
- void
-Router::get_router_delay ()
-{
- FREQUENCY=5; // move this to config file --TODO
- cycle_time = (1/(double)FREQUENCY)*1e3; //ps
- delay = 4;
- max_cyc = 17 * g_tp.FO4; //s
- max_cyc *= 1e12; //ps
- if (cycle_time < max_cyc) {
- FREQUENCY = (1/max_cyc)*1e3; //GHz
- }
+void
+Router::get_router_delay () {
+ FREQUENCY = 5; // move this to config file --TODO
+ cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps
+ delay = 4;
+ max_cyc = 17 * g_tp.FO4; //s
+ max_cyc *= 1e12; //ps
+ if (cycle_time < max_cyc) {
+ FREQUENCY = (1 / max_cyc) * 1e3; //GHz
+ }
}
- void
-Router::get_router_area()
-{
- area.h = I*buffer.area.h;
- area.w = buffer.area.w+crossbar.area.w;
+void
+Router::get_router_area() {
+ area.h = I * buffer.area.h;
+ area.w = buffer.area.w + crossbar.area.w;
}
- void
-Router::calc_router_parameters()
-{
- /* calculate router frequency and pipeline cycles */
- get_router_delay();
+void
+Router::calc_router_parameters() {
+ /* calculate router frequency and pipeline cycles */
+ get_router_delay();
- /* router power stats */
- get_router_power();
+ /* router power stats */
+ get_router_power();
- /* area stats */
- get_router_area();
+ /* area stats */
+ get_router_area();
}
- void
-Router::print_router()
-{
- cout << "\n\nRouter stats:\n";
- cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
- cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
- cout << "\tNo. of Virtual channels - " << vc_count << "\n";
- cout << "\tNo. of pipeline stages - " << delay << endl;
- cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
- cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
- cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
- cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
- cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
- cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
- cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
- cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
+void
+Router::print_router() {
+ cout << "\n\nRouter stats:\n";
+ cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n";
+ cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3
+ << "GHz\n";
+ cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n";
+ cout << "\tNo. of Virtual channels - " << vc_count << "\n";
+ cout << "\tNo. of pipeline stages - " << delay << endl;
+ cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
+ cout << "\tNo. of buffer entries per virtual channel - "
+ << vc_buffer_size << "\n";
+ cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6
+ << "(mm^2)\n";
+ cout << "\tSimple buffer access (Read) - "
+ << buffer.power.readOp.dynamic * 1e9 << " (nJ)\n";
+ cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3
+ << " (mW)\n";
+ cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6
+ << "(mm^2)\n";
+ cout << "\tCross bar access energy - "
+ << crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n";
+ cout << "\tCross bar leakage power - "
+ << crossbar.power.readOp.leakage * 1e3 << " (mW)\n";
+ cout << "\tArbiter access energy (VC arb + Crossbar arb) - "
+ << arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n";
+ cout << "\tArbiter leakage (VC arb + Crossbar arb) - "
+ << arbiter.power.readOp.leakage * 1e3 << " (mW)\n";
}
diff --git a/ext/mcpat/cacti/router.h b/ext/mcpat/cacti/router.h
index 72ef44939..b7c4b7e69 100644
--- a/ext/mcpat/cacti/router.h
+++ b/ext/mcpat/cacti/router.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -47,9 +48,8 @@
#include "parameter.h"
#include "wire.h"
-class Router : public Component
-{
- public:
+class Router : public Component {
+public:
Router(
double flit_size_,
double vc_buf, /* vc size = vc_buffer_size * flit_size */
@@ -70,9 +70,9 @@ class Router : public Component
double vc_count;
double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
- private:
- TechnologyParameter::DeviceType *deviceType;
- double FREQUENCY; // move this to config file --TODO
+private:
+ TechnologyParameter::DeviceType *deviceType;
+ double FREQUENCY; // move this to config file --TODO
double Cw3(double len);
double gate_cap(double w);
double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
diff --git a/ext/mcpat/cacti/subarray.cc b/ext/mcpat/cacti/subarray.cc
index 7cbf7d990..f4b7de79b 100755
--- a/ext/mcpat/cacti/subarray.cc
+++ b/ext/mcpat/cacti/subarray.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,158 +40,152 @@
#include "subarray.h"
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
- dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
- num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
- cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
-{
- //num_cols=7;
- //cout<<"num_cols ="<< num_cols <<endl;
- if (!(is_fa || dp.pure_cam))
- {
- num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
- uint32_t ram_num_cells_wl_stitching =
- (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
- (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
-
- area.h = cell.h * num_rows;
-
- area.w = cell.w * num_cols +
- ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
- }
- else //cam fa
- {
-
- //should not add dummy row here since the dummy row do not need decoder
- if (is_fa)// fully associative cache
- {
- num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
- num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
- num_cols = num_cols_fa_cam + num_cols_fa_ram;
- }
- else
- {
- num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
- num_cols_fa_ram = 0;
- num_cols = num_cols_fa_cam;
- }
-
- area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
- area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
- + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
- + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
- + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
- }
-
- assert(area.h>0);
- assert(area.w>0);
- compute_C();
+ dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
+ num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
+ cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
+ //num_cols=7;
+ //cout<<"num_cols ="<< num_cols <<endl;
+ if (!(is_fa || dp.pure_cam)) {
+ // ECC overhead
+ num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
+ num_bits_per_ecc_b_) : 0);
+ uint32_t ram_num_cells_wl_stitching =
+ (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
+ (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
+
+ area.h = cell.h * num_rows;
+
+ area.w = cell.w * num_cols +
+ ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
+ } else { //cam fa
+
+ //should not add dummy row here since the dummy row do not need decoder
+ if (is_fa) { // fully associative cache
+ num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
+ num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
+ num_cols = num_cols_fa_cam + num_cols_fa_ram;
+ } else {
+ num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
+ num_cols_fa_ram = 0;
+ num_cols = num_cols_fa_cam;
+ }
+
+ area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
+ area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
+ + ceil((num_cols_fa_cam + num_cols_fa_ram) /
+ sram_num_cells_wl_stitching_) *
+ g_tp.ram_wl_stitching_overhead_
+ //the overhead for the NAND gate to connect the two halves
+ + 16 * g_tp.wire_local.pitch
+ //the overhead for the drivers from matchline to wordline of RAM
+ + 128 * g_tp.wire_local.pitch;
+ }
+
+ assert(area.h > 0);
+ assert(area.w > 0);
+ compute_C();
}
-Subarray::~Subarray()
-{
+Subarray::~Subarray() {
}
-double Subarray::get_total_cell_area()
-{
+double Subarray::get_total_cell_area() {
// return (is_fa==false? cell.get_area() * num_rows * num_cols
// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
if (!(is_fa || dp.pure_cam))
- return (cell.get_area() * num_rows * num_cols);
- else if (is_fa)
- { //for FA, this area includes the dummy cells in SRAM arrays.
- //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
- //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
- return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
+ return (cell.get_area() * num_rows * num_cols);
+ else if (is_fa) {
+ //for FA, this area includes the dummy cells in SRAM arrays.
+ //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
+ //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
+ return (cam_cell.h * (num_rows + 1) *
+ (cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
+ } else {
+ return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
}
- else
- return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
}
-void Subarray::compute_C()
-{
- double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
- double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
- double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
- double C_b_row_drain_C;
-
- if (dp.is_dram)
- {
- C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
-
- if (dp.ram_cell_tech_type == comm_dram)
- {
- C_bl = num_rows * C_b_metal;
- }
- else
- {
- C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
- C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+void Subarray::compute_C() {
+ double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
+ double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
+ double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
+ double C_b_row_drain_C;
+
+ if (dp.is_dram) {
+ C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
+
+ if (dp.ram_cell_tech_type == comm_dram) {
+ C_bl = num_rows * C_b_metal;
+ } else {
+ C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
+ C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+ }
+ } else {
+ if (!(is_fa || dp.pure_cam)) {
+ C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
+ (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
+ false, true) * 2 +
+ c_w_metal) * num_cols;
+ C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
+ C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+ } else {
+ //Following is wordline not matchline
+ //CAM portion
+ c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
+ r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
+ (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
+ 2.0, false, true) * 2 +
+ c_w_metal) * num_cols_fa_cam;
+ R_wl_cam = (r_w_metal) * num_cols_fa_cam;
+
+ if (!dp.pure_cam) {
+ //RAM portion
+ c_w_metal = cell.w * g_tp.wire_local.C_per_um;
+ r_w_metal = cell.w * g_tp.wire_local.R_per_um;
+ C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
+ (g_tp.sram.b_w - 2 *
+ g_tp.sram.cell_a_w) / 2.0, false,
+ true) * 2 +
+ c_w_metal) * num_cols_fa_ram;
+ R_wl_ram = (r_w_metal) * num_cols_fa_ram;
+ } else {
+ C_wl_ram = R_wl_ram = 0;
+ }
+ C_wl = C_wl_cam + C_wl_ram;
+ C_wl += (16 + 128) * g_tp.wire_local.pitch *
+ g_tp.wire_local.C_per_um;
+
+ R_wl = R_wl_cam + R_wl_ram;
+ R_wl += (16 + 128) * g_tp.wire_local.pitch *
+ g_tp.wire_local.R_per_um;
+
+ //there are two ways to write to a FA,
+ //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
+ //2) using separate wordline for read/write and search in RAM.
+ //We are using the second approach.
+
+ //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
+ C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
+ C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
+ C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
+ //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
+ C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
+ C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
+
+ }
}
- }
- else
- {
- if (!(is_fa ||dp.pure_cam))
- {
- C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
- c_w_metal) * num_cols;
- C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
- C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
- }
- else
- {
- //Following is wordline not matchline
- //CAM portion
- c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
- r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
- c_w_metal) * num_cols_fa_cam;
- R_wl_cam = (r_w_metal) * num_cols_fa_cam;
-
- if (!dp.pure_cam)
- {
- //RAM portion
- c_w_metal = cell.w * g_tp.wire_local.C_per_um;
- r_w_metal = cell.w * g_tp.wire_local.R_per_um;
- C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
- c_w_metal) * num_cols_fa_ram;
- R_wl_ram = (r_w_metal) * num_cols_fa_ram;
- }
- else
- {
- C_wl_ram = R_wl_ram =0;
- }
- C_wl = C_wl_cam + C_wl_ram;
- C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
-
- R_wl = R_wl_cam + R_wl_ram;
- R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
-
- //there are two ways to write to a FA,
- //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
- //2) using separate wordline for read/write and search in RAM.
- //We are using the second approach.
-
- //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
- C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
- C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
- C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
- //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
- C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
- C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
-
- }
- }
}
diff --git a/ext/mcpat/cacti/subarray.h b/ext/mcpat/cacti/subarray.h
index 5fb062420..50560a101 100755
--- a/ext/mcpat/cacti/subarray.h
+++ b/ext/mcpat/cacti/subarray.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -41,9 +42,8 @@
using namespace std;
-class Subarray : public Component
-{
- public:
+class Subarray : public Component {
+public:
Subarray(const DynamicParameter & dp, bool is_fa_);
~Subarray();
@@ -59,7 +59,7 @@ class Subarray : public Component
double C_wl, C_wl_cam, C_wl_ram;
double R_wl, R_wl_cam, R_wl_ram;
double C_bl, C_bl_cam;
- private:
+private:
void compute_C(); // compute bitline and wordline capacitance
};
diff --git a/ext/mcpat/cacti/technology.cc b/ext/mcpat/cacti/technology.cc
index a40c6eb44..f2e0ef196 100644
--- a/ext/mcpat/cacti/technology.cc
+++ b/ext/mcpat/cacti/technology.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -34,1445 +35,1381 @@
#include "parameter.h"
-double wire_resistance(double resistivity, double wire_width, double wire_thickness,
- double barrier_thickness, double dishing_thickness, double alpha_scatter)
-{
- double resistance;
- resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
- return(resistance);
+double wire_resistance(double resistivity, double wire_width,
+ double wire_thickness,
+ double barrier_thickness, double dishing_thickness,
+ double alpha_scatter) {
+ double resistance;
+ resistance = alpha_scatter * resistivity /
+ ((wire_thickness - barrier_thickness - dishing_thickness) *
+ (wire_width - 2 * barrier_thickness));
+ return(resistance);
}
-double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
- double ild_thickness, double miller_value, double horiz_dielectric_constant,
- double vert_dielectric_constant, double fringe_cap)
-{
- double vertical_cap, sidewall_cap, total_cap;
- vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
- sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
- total_cap = vertical_cap + sidewall_cap + fringe_cap;
- return(total_cap);
+double wire_capacitance(double wire_width, double wire_thickness,
+ double wire_spacing,
+ double ild_thickness, double miller_value,
+ double horiz_dielectric_constant,
+ double vert_dielectric_constant, double fringe_cap) {
+ double vertical_cap, sidewall_cap, total_cap;
+ vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
+ sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
+ total_cap = vertical_cap + sidewall_cap + fringe_cap;
+ return(total_cap);
}
-void init_tech_params(double technology, bool is_tag)
-{
- int iter, tech, tech_lo, tech_hi;
- double curr_alpha, curr_vpp;
- double wire_width, wire_thickness, wire_spacing,
- fringe_cap, pmos_to_nmos_sizing_r;
+void init_tech_params(double technology, bool is_tag) {
+ int iter, tech, tech_lo, tech_hi;
+ double curr_alpha, curr_vpp;
+ double wire_width, wire_thickness, wire_spacing,
+ fringe_cap, pmos_to_nmos_sizing_r;
// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant;
- double barrier_thickness, dishing_thickness, alpha_scatter;
- double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
-
- uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
-
- technology = technology * 1000.0; // in the unit of nm
-
- // initialize parameters
- g_tp.reset();
- double gmp_to_gmn_multiplier_periph_global = 0;
-
- double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
- curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
- curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
- curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
- double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
- curr_asp_ratio_cell_cam;
- double SENSE_AMP_D, SENSE_AMP_P; // J
- double area_cell_dram = 0;
- double asp_ratio_cell_dram = 0;
- double area_cell_sram = 0;
- double asp_ratio_cell_sram = 0;
- double area_cell_cam = 0;
- double asp_ratio_cell_cam = 0;
- double mobility_eff_periph_global = 0;
- double Vdsat_periph_global = 0;
- double nmos_effective_resistance_multiplier;
- double width_dram_access_transistor;
-
- double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
- double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
- double curr_chip_layout_overhead = 0;
- double curr_macro_layout_overhead = 0;
- double curr_sckt_co_eff = 0;
-
- if (technology < 181 && technology > 179)
- {
+ double barrier_thickness, dishing_thickness, alpha_scatter;
+ double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
+
+ uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
+ uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
+
+ technology = technology * 1000.0; // in the unit of nm
+
+ // initialize parameters
+ g_tp.reset();
+ double gmp_to_gmn_multiplier_periph_global = 0;
+
+ double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
+ curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
+ curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
+ curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
+ double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
+ curr_asp_ratio_cell_cam;
+ double SENSE_AMP_D, SENSE_AMP_P; // J
+ double area_cell_dram = 0;
+ double asp_ratio_cell_dram = 0;
+ double area_cell_sram = 0;
+ double asp_ratio_cell_sram = 0;
+ double area_cell_cam = 0;
+ double asp_ratio_cell_cam = 0;
+ double mobility_eff_periph_global = 0;
+ double Vdsat_periph_global = 0;
+ double nmos_effective_resistance_multiplier;
+ double width_dram_access_transistor;
+
+ double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
+ double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
+ double curr_chip_layout_overhead = 0;
+ double curr_macro_layout_overhead = 0;
+ double curr_sckt_co_eff = 0;
+
+ if (technology < 181 && technology > 179) {
tech_lo = 180;
tech_hi = 180;
- }
- else if (technology < 91 && technology > 89)
- {
- tech_lo = 90;
- tech_hi = 90;
- }
- else if (technology < 66 && technology > 64)
- {
- tech_lo = 65;
- tech_hi = 65;
- }
- else if (technology < 46 && technology > 44)
- {
- tech_lo = 45;
- tech_hi = 45;
- }
- else if (technology < 33 && technology > 31)
- {
- tech_lo = 32;
- tech_hi = 32;
- }
- else if (technology < 23 && technology > 21)
- {
- tech_lo = 22;
- tech_hi = 22;
- if (ram_cell_tech_type == 3 )
- {
- cout<<"current version does not support eDRAM technologies at 22nm"<<endl;
- exit(0);
- }
- }
-// else if (technology < 17 && technology > 15)
-// {
-// tech_lo = 16;
-// tech_hi = 16;
-// }
- else if (technology < 180 && technology > 90)
- {
- tech_lo = 180;
- tech_hi = 90;
- }
- else if (technology < 90 && technology > 65)
- {
- tech_lo = 90;
- tech_hi = 65;
- }
- else if (technology < 65 && technology > 45)
- {
- tech_lo = 65;
- tech_hi = 45;
- }
- else if (technology < 45 && technology > 32)
- {
- tech_lo = 45;
- tech_hi = 32;
- }
- else if (technology < 32 && technology > 22)
- {
- tech_lo = 32;
- tech_hi = 22;
+ } else if (technology < 91 && technology > 89) {
+ tech_lo = 90;
+ tech_hi = 90;
+ } else if (technology < 66 && technology > 64) {
+ tech_lo = 65;
+ tech_hi = 65;
+ } else if (technology < 46 && technology > 44) {
+ tech_lo = 45;
+ tech_hi = 45;
+ } else if (technology < 33 && technology > 31) {
+ tech_lo = 32;
+ tech_hi = 32;
+ } else if (technology < 23 && technology > 21) {
+ tech_lo = 22;
+ tech_hi = 22;
+ if (ram_cell_tech_type == 3 ) {
+ cout << "current version does not support eDRAM technologies at "
+ << "22nm" << endl;
+ exit(0);
+ }
+ } else if (technology < 180 && technology > 90) {
+ tech_lo = 180;
+ tech_hi = 90;
+ } else if (technology < 90 && technology > 65) {
+ tech_lo = 90;
+ tech_hi = 65;
+ } else if (technology < 65 && technology > 45) {
+ tech_lo = 65;
+ tech_hi = 45;
+ } else if (technology < 45 && technology > 32) {
+ tech_lo = 45;
+ tech_hi = 32;
+ } else if (technology < 32 && technology > 22) {
+ tech_lo = 32;
+ tech_hi = 22;
}
// else if (technology < 22 && technology > 16)
// {
// tech_lo = 22;
// tech_hi = 16;
// }
- else
- {
- cout<<"Invalid technology nodes"<<endl;
- exit(0);
+ else {
+ cout << "Invalid technology nodes" << endl;
+ exit(0);
}
- double vdd[NUMBER_TECH_FLAVORS];
- double Lphy[NUMBER_TECH_FLAVORS];
- double Lelec[NUMBER_TECH_FLAVORS];
- double t_ox[NUMBER_TECH_FLAVORS];
- double v_th[NUMBER_TECH_FLAVORS];
- double c_ox[NUMBER_TECH_FLAVORS];
- double mobility_eff[NUMBER_TECH_FLAVORS];
- double Vdsat[NUMBER_TECH_FLAVORS];
- double c_g_ideal[NUMBER_TECH_FLAVORS];
- double c_fringe[NUMBER_TECH_FLAVORS];
- double c_junc[NUMBER_TECH_FLAVORS];
- double I_on_n[NUMBER_TECH_FLAVORS];
- double I_on_p[NUMBER_TECH_FLAVORS];
- double Rnchannelon[NUMBER_TECH_FLAVORS];
- double Rpchannelon[NUMBER_TECH_FLAVORS];
- double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
- double I_off_n[NUMBER_TECH_FLAVORS][101];
- double I_g_on_n[NUMBER_TECH_FLAVORS][101];
- //double I_off_p[NUMBER_TECH_FLAVORS][101];
- double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
- //double curr_sckt_co_eff[NUMBER_TECH_FLAVORS];
- double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
-
- for (iter = 0; iter <= 1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
- }
+ double vdd[NUMBER_TECH_FLAVORS];
+ double Lphy[NUMBER_TECH_FLAVORS];
+ double Lelec[NUMBER_TECH_FLAVORS];
+ double t_ox[NUMBER_TECH_FLAVORS];
+ double v_th[NUMBER_TECH_FLAVORS];
+ double c_ox[NUMBER_TECH_FLAVORS];
+ double mobility_eff[NUMBER_TECH_FLAVORS];
+ double Vdsat[NUMBER_TECH_FLAVORS];
+ double c_g_ideal[NUMBER_TECH_FLAVORS];
+ double c_fringe[NUMBER_TECH_FLAVORS];
+ double c_junc[NUMBER_TECH_FLAVORS];
+ double I_on_n[NUMBER_TECH_FLAVORS];
+ double I_on_p[NUMBER_TECH_FLAVORS];
+ double Rnchannelon[NUMBER_TECH_FLAVORS];
+ double Rpchannelon[NUMBER_TECH_FLAVORS];
+ double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
+ double I_off_n[NUMBER_TECH_FLAVORS][101];
+ double I_g_on_n[NUMBER_TECH_FLAVORS][101];
+ double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
+ double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
+
+ for (iter = 0; iter <= 1; ++iter) {
+ // linear interpolation
+ if (iter == 0) {
+ tech = tech_lo;
+ if (tech_lo == tech_hi) {
+ curr_alpha = 1;
+ } else {
+ curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi);
+ }
+ } else {
+ tech = tech_hi;
+ if (tech_lo == tech_hi) {
+ break;
+ } else {
+ curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi);
+ }
+ }
- if (tech == 180)
- {
- //180nm technology-node. Corresponds to year 1999 in ITRS
- //Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king
- //MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input
- bool Aggre_proj = false;
- SENSE_AMP_D = .28e-9; // s
- SENSE_AMP_P = 14.7e-15; // J
- vdd[0] = 1.5;
- Lphy[0] = 0.12;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3*(Aggre_proj? 1.9/1.2:2);//micron
- v_th[0] = Aggre_proj? 0.36 : 0.4407;//V
- c_ox[0] = 1.79e-14*(Aggre_proj? 1.9/1.2:2);//F/micron2
- mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128*2; //V
- c_g_ideal[0] = (Aggre_proj? 1.9/1.2:2)*6.64e-16;//F/micron
- c_fringe[0] = (Aggre_proj? 1.9/1.2:2)*0.08e-15;//F/micron
- c_junc[0] = (Aggre_proj? 1.9/1.2:2)*1e-15;//F/micron2
- I_on_n[0] = 750e-6;//A/micron
- I_on_p[0] = 350e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 7e-10;//A/micron
- I_off_n[0][10] = 8.26e-10;
- I_off_n[0][20] = 9.74e-10;
- I_off_n[0][30] = 1.15e-9;
- I_off_n[0][40] = 1.35e-9;
- I_off_n[0][50] = 1.60e-9;
- I_off_n[0][60] = 1.88e-9;
- I_off_n[0][70] = 2.29e-9;
- I_off_n[0][80] = 2.70e-9;
- I_off_n[0][90] = 3.19e-9;
- I_off_n[0][100] = 3.76e-9;
-
- I_g_on_n[0][0] = 1.65e-10;//A/micron
- I_g_on_n[0][10] = 1.65e-10;
- I_g_on_n[0][20] = 1.65e-10;
- I_g_on_n[0][30] = 1.65e-10;
- I_g_on_n[0][40] = 1.65e-10;
- I_g_on_n[0][50] = 1.65e-10;
- I_g_on_n[0][60] = 1.65e-10;
- I_g_on_n[0][70] = 1.65e-10;
- I_g_on_n[0][80] = 1.65e-10;
- I_g_on_n[0][90] = 1.65e-10;
- I_g_on_n[0][100] = 1.65e-10;
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm
- curr_core_tx_density = 1.25*0.7*0.7*0.4;
- curr_sckt_co_eff = 1.11;
- curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb
+ if (tech == 180) {
+ //180nm technology-node. Corresponds to year 1999 in ITRS
+ //Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king
+ //MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input
+ bool Aggre_proj = false;
+ SENSE_AMP_D = .28e-9; // s
+ SENSE_AMP_P = 14.7e-15; // J
+ vdd[0] = 1.5;
+ Lphy[0] = 0.12;//Lphy is the physical gate-length. micron
+ Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron
+ t_ox[0] = 1.2e-3 * (Aggre_proj ? 1.9 / 1.2 : 2);//micron
+ v_th[0] = Aggre_proj ? 0.36 : 0.4407;//V
+ c_ox[0] = 1.79e-14 * (Aggre_proj ? 1.9 / 1.2 : 2);//F/micron2
+ mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 0.128 * 2; //V
+ c_g_ideal[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 6.64e-16;//F/micron
+ c_fringe[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 0.08e-15;//F/micron
+ c_junc[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 1e-15;//F/micron2
+ I_on_n[0] = 750e-6;//A/micron
+ I_on_p[0] = 350e-6;//A/micron
+ //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
+ nmos_effective_resistance_multiplier = 1.54;
+ n_to_p_eff_curr_drv_ratio[0] = 2.45;
+ gmp_to_gmn_multiplier[0] = 1.22;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1;
+ I_off_n[0][0] = 7e-10;//A/micron
+ I_off_n[0][10] = 8.26e-10;
+ I_off_n[0][20] = 9.74e-10;
+ I_off_n[0][30] = 1.15e-9;
+ I_off_n[0][40] = 1.35e-9;
+ I_off_n[0][50] = 1.60e-9;
+ I_off_n[0][60] = 1.88e-9;
+ I_off_n[0][70] = 2.29e-9;
+ I_off_n[0][80] = 2.70e-9;
+ I_off_n[0][90] = 3.19e-9;
+ I_off_n[0][100] = 3.76e-9;
+
+ I_g_on_n[0][0] = 1.65e-10;//A/micron
+ I_g_on_n[0][10] = 1.65e-10;
+ I_g_on_n[0][20] = 1.65e-10;
+ I_g_on_n[0][30] = 1.65e-10;
+ I_g_on_n[0][40] = 1.65e-10;
+ I_g_on_n[0][50] = 1.65e-10;
+ I_g_on_n[0][60] = 1.65e-10;
+ I_g_on_n[0][70] = 1.65e-10;
+ I_g_on_n[0][80] = 1.65e-10;
+ I_g_on_n[0][90] = 1.65e-10;
+ I_g_on_n[0][100] = 1.65e-10;
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
+ curr_asp_ratio_cell_cam = 2.92;//2.5
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm
+ curr_core_tx_density = 1.25 * 0.7 * 0.7 * 0.4;
+ curr_sckt_co_eff = 1.11;
+ curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb
- }
+ }
- if (tech == 90)
- {
- SENSE_AMP_D = .28e-9; // s
- SENSE_AMP_P = 14.7e-15; // J
- //90nm technology-node. Corresponds to year 2004 in ITRS
- //ITRS HP device type
- vdd[0] = 1.2;
- Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3;//micron
- v_th[0] = 0.23707;//V
- c_ox[0] = 1.79e-14;//F/micron2
- mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128; //V
- c_g_ideal[0] = 6.64e-16;//F/micron
- c_fringe[0] = 0.08e-15;//F/micron
- c_junc[0] = 1e-15;//F/micron2
- I_on_n[0] = 1076.9e-6;//A/micron
- I_on_p[0] = 712.6e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 3.24e-8;//A/micron
- I_off_n[0][10] = 4.01e-8;
- I_off_n[0][20] = 4.90e-8;
- I_off_n[0][30] = 5.92e-8;
- I_off_n[0][40] = 7.08e-8;
- I_off_n[0][50] = 8.38e-8;
- I_off_n[0][60] = 9.82e-8;
- I_off_n[0][70] = 1.14e-7;
- I_off_n[0][80] = 1.29e-7;
- I_off_n[0][90] = 1.43e-7;
- I_off_n[0][100] = 1.54e-7;
-
- I_g_on_n[0][0] = 1.65e-8;//A/micron
- I_g_on_n[0][10] = 1.65e-8;
- I_g_on_n[0][20] = 1.65e-8;
- I_g_on_n[0][30] = 1.65e-8;
- I_g_on_n[0][40] = 1.65e-8;
- I_g_on_n[0][50] = 1.65e-8;
- I_g_on_n[0][60] = 1.65e-8;
- I_g_on_n[0][70] = 1.65e-8;
- I_g_on_n[0][80] = 1.65e-8;
- I_g_on_n[0][90] = 1.65e-8;
- I_g_on_n[0][100] = 1.65e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.3;
- Lphy[1] = 0.075;
- Lelec[1] = 0.0486;
- t_ox[1] = 2.2e-3;
- v_th[1] = 0.48203;
- c_ox[1] = 1.22e-14;
- mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.373;
- c_g_ideal[1] = 9.15e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 503.6e-6;
- I_on_p[1] = 235.1e-6;
- nmos_effective_resistance_multiplier = 1.92;
- n_to_p_eff_curr_drv_ratio[1] = 2.44;
- gmp_to_gmn_multiplier[1] =0.88;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1;
- I_off_n[1][0] = 2.81e-12;
- I_off_n[1][10] = 4.76e-12;
- I_off_n[1][20] = 7.82e-12;
- I_off_n[1][30] = 1.25e-11;
- I_off_n[1][40] = 1.94e-11;
- I_off_n[1][50] = 2.94e-11;
- I_off_n[1][60] = 4.36e-11;
- I_off_n[1][70] = 6.32e-11;
- I_off_n[1][80] = 8.95e-11;
- I_off_n[1][90] = 1.25e-10;
- I_off_n[1][100] = 1.7e-10;
-
- I_g_on_n[1][0] = 3.87e-11;//A/micron
- I_g_on_n[1][10] = 3.87e-11;
- I_g_on_n[1][20] = 3.87e-11;
- I_g_on_n[1][30] = 3.87e-11;
- I_g_on_n[1][40] = 3.87e-11;
- I_g_on_n[1][50] = 3.87e-11;
- I_g_on_n[1][60] = 3.87e-11;
- I_g_on_n[1][70] = 3.87e-11;
- I_g_on_n[1][80] = 3.87e-11;
- I_g_on_n[1][90] = 3.87e-11;
- I_g_on_n[1][100] = 3.87e-11;
-
- //ITRS LOP device type
- vdd[2] = 0.9;
- Lphy[2] = 0.053;
- Lelec[2] = 0.0354;
- t_ox[2] = 1.5e-3;
- v_th[2] = 0.30764;
- c_ox[2] = 1.59e-14;
- mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.113;
- c_g_ideal[2] = 8.45e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 386.6e-6;
- I_on_p[2] = 209.7e-6;
- nmos_effective_resistance_multiplier = 1.77;
- n_to_p_eff_curr_drv_ratio[2] = 2.54;
- gmp_to_gmn_multiplier[2] = 0.98;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1;
- I_off_n[2][0] = 2.14e-9;
- I_off_n[2][10] = 2.9e-9;
- I_off_n[2][20] = 3.87e-9;
- I_off_n[2][30] = 5.07e-9;
- I_off_n[2][40] = 6.54e-9;
- I_off_n[2][50] = 8.27e-8;
- I_off_n[2][60] = 1.02e-7;
- I_off_n[2][70] = 1.20e-7;
- I_off_n[2][80] = 1.36e-8;
- I_off_n[2][90] = 1.52e-8;
- I_off_n[2][100] = 1.73e-8;
-
- I_g_on_n[2][0] = 4.31e-8;//A/micron
- I_g_on_n[2][10] = 4.31e-8;
- I_g_on_n[2][20] = 4.31e-8;
- I_g_on_n[2][30] = 4.31e-8;
- I_g_on_n[2][40] = 4.31e-8;
- I_g_on_n[2][50] = 4.31e-8;
- I_g_on_n[2][60] = 4.31e-8;
- I_g_on_n[2][70] = 4.31e-8;
- I_g_on_n[2][80] = 4.31e-8;
- I_g_on_n[2][90] = 4.31e-8;
- I_g_on_n[2][100] = 4.31e-8;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.4545;
- width_dram_access_transistor = 0.14;
- curr_I_on_dram_cell = 45e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.168;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.4545;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.3;
- c_g_ideal[3] = 1.47e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 321.6e-6;
- I_on_p[3] = 203.3e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.42e-11;
- I_off_n[3][10] = 2.25e-11;
- I_off_n[3][20] = 3.46e-11;
- I_off_n[3][30] = 5.18e-11;
- I_off_n[3][40] = 7.58e-11;
- I_off_n[3][50] = 1.08e-10;
- I_off_n[3][60] = 1.51e-10;
- I_off_n[3][70] = 2.02e-10;
- I_off_n[3][80] = 2.57e-10;
- I_off_n[3][90] = 3.14e-10;
- I_off_n[3][100] = 3.85e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.6;
- Lphy[3] = 0.09;
- Lelec[3] = 0.0576;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.09*0.09;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.7;
- t_ox[3] = 5.5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 5.65e-15;
- mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.32;
- c_g_ideal[3] = 5.08e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1094.3e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.62;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 5.80e-15;
- I_off_n[3][10] = 1.21e-14;
- I_off_n[3][20] = 2.42e-14;
- I_off_n[3][30] = 4.65e-14;
- I_off_n[3][40] = 8.60e-14;
- I_off_n[3][50] = 1.54e-13;
- I_off_n[3][60] = 2.66e-13;
- I_off_n[3][70] = 4.45e-13;
- I_off_n[3][80] = 7.17e-13;
- I_off_n[3][90] = 1.11e-12;
- I_off_n[3][100] = 1.67e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1;
- curr_core_tx_density = 1.25*0.7*0.7;
- curr_sckt_co_eff = 1.1539;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ if (tech == 90) {
+ SENSE_AMP_D = .28e-9; // s
+ SENSE_AMP_P = 14.7e-15; // J
+ //90nm technology-node. Corresponds to year 2004 in ITRS
+ //ITRS HP device type
+ vdd[0] = 1.2;
+ Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
+ Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
+ t_ox[0] = 1.2e-3;//micron
+ v_th[0] = 0.23707;//V
+ c_ox[0] = 1.79e-14;//F/micron2
+ mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 0.128; //V
+ c_g_ideal[0] = 6.64e-16;//F/micron
+ c_fringe[0] = 0.08e-15;//F/micron
+ c_junc[0] = 1e-15;//F/micron2
+ I_on_n[0] = 1076.9e-6;//A/micron
+ I_on_p[0] = 712.6e-6;//A/micron
+ //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
+ nmos_effective_resistance_multiplier = 1.54;
+ n_to_p_eff_curr_drv_ratio[0] = 2.45;
+ gmp_to_gmn_multiplier[0] = 1.22;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1;
+ I_off_n[0][0] = 3.24e-8;//A/micron
+ I_off_n[0][10] = 4.01e-8;
+ I_off_n[0][20] = 4.90e-8;
+ I_off_n[0][30] = 5.92e-8;
+ I_off_n[0][40] = 7.08e-8;
+ I_off_n[0][50] = 8.38e-8;
+ I_off_n[0][60] = 9.82e-8;
+ I_off_n[0][70] = 1.14e-7;
+ I_off_n[0][80] = 1.29e-7;
+ I_off_n[0][90] = 1.43e-7;
+ I_off_n[0][100] = 1.54e-7;
+
+ I_g_on_n[0][0] = 1.65e-8;//A/micron
+ I_g_on_n[0][10] = 1.65e-8;
+ I_g_on_n[0][20] = 1.65e-8;
+ I_g_on_n[0][30] = 1.65e-8;
+ I_g_on_n[0][40] = 1.65e-8;
+ I_g_on_n[0][50] = 1.65e-8;
+ I_g_on_n[0][60] = 1.65e-8;
+ I_g_on_n[0][70] = 1.65e-8;
+ I_g_on_n[0][80] = 1.65e-8;
+ I_g_on_n[0][90] = 1.65e-8;
+ I_g_on_n[0][100] = 1.65e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.3;
+ Lphy[1] = 0.075;
+ Lelec[1] = 0.0486;
+ t_ox[1] = 2.2e-3;
+ v_th[1] = 0.48203;
+ c_ox[1] = 1.22e-14;
+ mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 0.373;
+ c_g_ideal[1] = 9.15e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 503.6e-6;
+ I_on_p[1] = 235.1e-6;
+ nmos_effective_resistance_multiplier = 1.92;
+ n_to_p_eff_curr_drv_ratio[1] = 2.44;
+ gmp_to_gmn_multiplier[1] = 0.88;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1;
+ I_off_n[1][0] = 2.81e-12;
+ I_off_n[1][10] = 4.76e-12;
+ I_off_n[1][20] = 7.82e-12;
+ I_off_n[1][30] = 1.25e-11;
+ I_off_n[1][40] = 1.94e-11;
+ I_off_n[1][50] = 2.94e-11;
+ I_off_n[1][60] = 4.36e-11;
+ I_off_n[1][70] = 6.32e-11;
+ I_off_n[1][80] = 8.95e-11;
+ I_off_n[1][90] = 1.25e-10;
+ I_off_n[1][100] = 1.7e-10;
+
+ I_g_on_n[1][0] = 3.87e-11;//A/micron
+ I_g_on_n[1][10] = 3.87e-11;
+ I_g_on_n[1][20] = 3.87e-11;
+ I_g_on_n[1][30] = 3.87e-11;
+ I_g_on_n[1][40] = 3.87e-11;
+ I_g_on_n[1][50] = 3.87e-11;
+ I_g_on_n[1][60] = 3.87e-11;
+ I_g_on_n[1][70] = 3.87e-11;
+ I_g_on_n[1][80] = 3.87e-11;
+ I_g_on_n[1][90] = 3.87e-11;
+ I_g_on_n[1][100] = 3.87e-11;
+
+ //ITRS LOP device type
+ vdd[2] = 0.9;
+ Lphy[2] = 0.053;
+ Lelec[2] = 0.0354;
+ t_ox[2] = 1.5e-3;
+ v_th[2] = 0.30764;
+ c_ox[2] = 1.59e-14;
+ mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 0.113;
+ c_g_ideal[2] = 8.45e-16;
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 386.6e-6;
+ I_on_p[2] = 209.7e-6;
+ nmos_effective_resistance_multiplier = 1.77;
+ n_to_p_eff_curr_drv_ratio[2] = 2.54;
+ gmp_to_gmn_multiplier[2] = 0.98;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1;
+ I_off_n[2][0] = 2.14e-9;
+ I_off_n[2][10] = 2.9e-9;
+ I_off_n[2][20] = 3.87e-9;
+ I_off_n[2][30] = 5.07e-9;
+ I_off_n[2][40] = 6.54e-9;
+ I_off_n[2][50] = 8.27e-8;
+ I_off_n[2][60] = 1.02e-7;
+ I_off_n[2][70] = 1.20e-7;
+ I_off_n[2][80] = 1.36e-8;
+ I_off_n[2][90] = 1.52e-8;
+ I_off_n[2][100] = 1.73e-8;
+
+ I_g_on_n[2][0] = 4.31e-8;//A/micron
+ I_g_on_n[2][10] = 4.31e-8;
+ I_g_on_n[2][20] = 4.31e-8;
+ I_g_on_n[2][30] = 4.31e-8;
+ I_g_on_n[2][40] = 4.31e-8;
+ I_g_on_n[2][50] = 4.31e-8;
+ I_g_on_n[2][60] = 4.31e-8;
+ I_g_on_n[2][70] = 4.31e-8;
+ I_g_on_n[2][80] = 4.31e-8;
+ I_g_on_n[2][90] = 4.31e-8;
+ I_g_on_n[2][100] = 4.31e-8;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.2;
+ Lphy[3] = 0.12;
+ Lelec[3] = 0.0756;
+ curr_v_th_dram_access_transistor = 0.4545;
+ width_dram_access_transistor = 0.14;
+ curr_I_on_dram_cell = 45e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 0.168;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.6;
+ t_ox[3] = 2.2e-3;
+ v_th[3] = 0.4545;
+ c_ox[3] = 1.22e-14;
+ mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.3;
+ c_g_ideal[3] = 1.47e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 321.6e-6;
+ I_on_p[3] = 203.3e-6;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.42e-11;
+ I_off_n[3][10] = 2.25e-11;
+ I_off_n[3][20] = 3.46e-11;
+ I_off_n[3][30] = 5.18e-11;
+ I_off_n[3][40] = 7.58e-11;
+ I_off_n[3][50] = 1.08e-10;
+ I_off_n[3][60] = 1.51e-10;
+ I_off_n[3][70] = 2.02e-10;
+ I_off_n[3][80] = 2.57e-10;
+ I_off_n[3][90] = 3.14e-10;
+ I_off_n[3][100] = 3.85e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.6;
+ Lphy[3] = 0.09;
+ Lelec[3] = 0.0576;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.09;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.09 * 0.09;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 3.7;
+ t_ox[3] = 5.5e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 5.65e-15;
+ mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.32;
+ c_g_ideal[3] = 5.08e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1094.3e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.62;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 5.80e-15;
+ I_off_n[3][10] = 1.21e-14;
+ I_off_n[3][20] = 2.42e-14;
+ I_off_n[3][30] = 4.65e-14;
+ I_off_n[3][40] = 8.60e-14;
+ I_off_n[3][50] = 1.54e-13;
+ I_off_n[3][60] = 2.66e-13;
+ I_off_n[3][70] = 4.45e-13;
+ I_off_n[3][80] = 7.17e-13;
+ I_off_n[3][90] = 1.11e-12;
+ I_off_n[3][100] = 1.67e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
+ curr_asp_ratio_cell_cam = 2.92;//2.5
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 1;
+ curr_core_tx_density = 1.25 * 0.7 * 0.7;
+ curr_sckt_co_eff = 1.1539;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ }
- if (tech == 65)
- { //65nm technology-node. Corresponds to year 2007 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .2e-9; // s
- SENSE_AMP_P = 5.7e-15; // J
- vdd[0] = 1.1;
- Lphy[0] = 0.025;
- Lelec[0] = 0.019;
- t_ox[0] = 1.1e-3;
- v_th[0] = .19491;
- c_ox[0] = 1.88e-14;
- mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 7.71e-2;
- c_g_ideal[0] = 4.69e-16;
- c_fringe[0] = 0.077e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 1197.2e-6;
- I_on_p[0] = 870.8e-6;
- nmos_effective_resistance_multiplier = 1.50;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.74;
- //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
- //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
- I_off_n[0][0] = 1.96e-7;
- I_off_n[0][10] = 2.29e-7;
- I_off_n[0][20] = 2.66e-7;
- I_off_n[0][30] = 3.05e-7;
- I_off_n[0][40] = 3.49e-7;
- I_off_n[0][50] = 3.95e-7;
- I_off_n[0][60] = 4.45e-7;
- I_off_n[0][70] = 4.97e-7;
- I_off_n[0][80] = 5.48e-7;
- I_off_n[0][90] = 5.94e-7;
- I_off_n[0][100] = 6.3e-7;
- I_g_on_n[0][0] = 4.09e-8;//A/micron
- I_g_on_n[0][10] = 4.09e-8;
- I_g_on_n[0][20] = 4.09e-8;
- I_g_on_n[0][30] = 4.09e-8;
- I_g_on_n[0][40] = 4.09e-8;
- I_g_on_n[0][50] = 4.09e-8;
- I_g_on_n[0][60] = 4.09e-8;
- I_g_on_n[0][70] = 4.09e-8;
- I_g_on_n[0][80] = 4.09e-8;
- I_g_on_n[0][90] = 4.09e-8;
- I_g_on_n[0][100] = 4.09e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.2;
- Lphy[1] = 0.045;
- Lelec[1] = 0.0298;
- t_ox[1] = 1.9e-3;
- v_th[1] = 0.52354;
- c_ox[1] = 1.36e-14;
- mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.128;
- c_g_ideal[1] = 6.14e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 519.2e-6;
- I_on_p[1] = 266e-6;
- nmos_effective_resistance_multiplier = 1.96;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.82;
- I_off_n[1][0] = 9.12e-12;
- I_off_n[1][10] = 1.49e-11;
- I_off_n[1][20] = 2.36e-11;
- I_off_n[1][30] = 3.64e-11;
- I_off_n[1][40] = 5.48e-11;
- I_off_n[1][50] = 8.05e-11;
- I_off_n[1][60] = 1.15e-10;
- I_off_n[1][70] = 1.59e-10;
- I_off_n[1][80] = 2.1e-10;
- I_off_n[1][90] = 2.62e-10;
- I_off_n[1][100] = 3.21e-10;
-
- I_g_on_n[1][0] = 1.09e-10;//A/micron
- I_g_on_n[1][10] = 1.09e-10;
- I_g_on_n[1][20] = 1.09e-10;
- I_g_on_n[1][30] = 1.09e-10;
- I_g_on_n[1][40] = 1.09e-10;
- I_g_on_n[1][50] = 1.09e-10;
- I_g_on_n[1][60] = 1.09e-10;
- I_g_on_n[1][70] = 1.09e-10;
- I_g_on_n[1][80] = 1.09e-10;
- I_g_on_n[1][90] = 1.09e-10;
- I_g_on_n[1][100] = 1.09e-10;
-
- //ITRS LOP device type
- vdd[2] = 0.8;
- Lphy[2] = 0.032;
- Lelec[2] = 0.0216;
- t_ox[2] = 1.2e-3;
- v_th[2] = 0.28512;
- c_ox[2] = 1.87e-14;
- mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.292;
- c_g_ideal[2] = 6e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 573.1e-6;
- I_on_p[2] = 340.6e-6;
- nmos_effective_resistance_multiplier = 1.82;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/2.05;
- I_off_n[2][0] = 4.9e-9;
- I_off_n[2][10] = 6.49e-9;
- I_off_n[2][20] = 8.45e-9;
- I_off_n[2][30] = 1.08e-8;
- I_off_n[2][40] = 1.37e-8;
- I_off_n[2][50] = 1.71e-8;
- I_off_n[2][60] = 2.09e-8;
- I_off_n[2][70] = 2.48e-8;
- I_off_n[2][80] = 2.84e-8;
- I_off_n[2][90] = 3.13e-8;
- I_off_n[2][100] = 3.42e-8;
-
- I_g_on_n[2][0] = 9.61e-9;//A/micron
- I_g_on_n[2][10] = 9.61e-9;
- I_g_on_n[2][20] = 9.61e-9;
- I_g_on_n[2][30] = 9.61e-9;
- I_g_on_n[2][40] = 9.61e-9;
- I_g_on_n[2][50] = 9.61e-9;
- I_g_on_n[2][60] = 9.61e-9;
- I_g_on_n[2][70] = 9.61e-9;
- I_g_on_n[2][80] = 9.61e-9;
- I_g_on_n[2][90] = 9.61e-9;
- I_g_on_n[2][100] = 9.61e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.43806;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.11;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.43806;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.43806;
- c_g_ideal[3] = 1.46e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 399.8e-6;
- I_on_p[3] = 243.4e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.23e-11;
- I_off_n[3][10] = 3.46e-11;
- I_off_n[3][20] = 5.24e-11;
- I_off_n[3][30] = 7.75e-11;
- I_off_n[3][40] = 1.12e-10;
- I_off_n[3][50] = 1.58e-10;
- I_off_n[3][60] = 2.18e-10;
- I_off_n[3][70] = 2.88e-10;
- I_off_n[3][80] = 3.63e-10;
- I_off_n[3][90] = 4.41e-10;
- I_off_n[3][100] = 5.36e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.3;
- Lphy[3] = 0.065;
- Lelec[3] = 0.0426;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.065;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.065*0.065;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.3;
- t_ox[3] = 5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 6.16e-15;
- mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.385;
- c_g_ideal[3] = 4e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 1031e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 2.39;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.80e-14;
- I_off_n[3][10] = 3.64e-14;
- I_off_n[3][20] = 7.03e-14;
- I_off_n[3][30] = 1.31e-13;
- I_off_n[3][40] = 2.35e-13;
- I_off_n[3][50] = 4.09e-13;
- I_off_n[3][60] = 6.89e-13;
- I_off_n[3][70] = 1.13e-12;
- I_off_n[3][80] = 1.78e-12;
- I_off_n[3][90] = 2.71e-12;
- I_off_n[3][100] = 3.99e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor
- curr_core_tx_density = 1.25*0.7;
- curr_sckt_co_eff = 1.1359;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ if (tech == 65) {
+ //65nm technology-node. Corresponds to year 2007 in ITRS
+ //ITRS HP device type
+ SENSE_AMP_D = .2e-9; // s
+ SENSE_AMP_P = 5.7e-15; // J
+ vdd[0] = 1.1;
+ Lphy[0] = 0.025;
+ Lelec[0] = 0.019;
+ t_ox[0] = 1.1e-3;
+ v_th[0] = .19491;
+ c_ox[0] = 1.88e-14;
+ mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 7.71e-2;
+ c_g_ideal[0] = 4.69e-16;
+ c_fringe[0] = 0.077e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 1197.2e-6;
+ I_on_p[0] = 870.8e-6;
+ nmos_effective_resistance_multiplier = 1.50;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
+ long_channel_leakage_reduction[0] = 1 / 3.74;
+ //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
+ //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
+ I_off_n[0][0] = 1.96e-7;
+ I_off_n[0][10] = 2.29e-7;
+ I_off_n[0][20] = 2.66e-7;
+ I_off_n[0][30] = 3.05e-7;
+ I_off_n[0][40] = 3.49e-7;
+ I_off_n[0][50] = 3.95e-7;
+ I_off_n[0][60] = 4.45e-7;
+ I_off_n[0][70] = 4.97e-7;
+ I_off_n[0][80] = 5.48e-7;
+ I_off_n[0][90] = 5.94e-7;
+ I_off_n[0][100] = 6.3e-7;
+ I_g_on_n[0][0] = 4.09e-8;//A/micron
+ I_g_on_n[0][10] = 4.09e-8;
+ I_g_on_n[0][20] = 4.09e-8;
+ I_g_on_n[0][30] = 4.09e-8;
+ I_g_on_n[0][40] = 4.09e-8;
+ I_g_on_n[0][50] = 4.09e-8;
+ I_g_on_n[0][60] = 4.09e-8;
+ I_g_on_n[0][70] = 4.09e-8;
+ I_g_on_n[0][80] = 4.09e-8;
+ I_g_on_n[0][90] = 4.09e-8;
+ I_g_on_n[0][100] = 4.09e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.2;
+ Lphy[1] = 0.045;
+ Lelec[1] = 0.0298;
+ t_ox[1] = 1.9e-3;
+ v_th[1] = 0.52354;
+ c_ox[1] = 1.36e-14;
+ mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 0.128;
+ c_g_ideal[1] = 6.14e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 519.2e-6;
+ I_on_p[1] = 266e-6;
+ nmos_effective_resistance_multiplier = 1.96;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1 / 2.82;
+ I_off_n[1][0] = 9.12e-12;
+ I_off_n[1][10] = 1.49e-11;
+ I_off_n[1][20] = 2.36e-11;
+ I_off_n[1][30] = 3.64e-11;
+ I_off_n[1][40] = 5.48e-11;
+ I_off_n[1][50] = 8.05e-11;
+ I_off_n[1][60] = 1.15e-10;
+ I_off_n[1][70] = 1.59e-10;
+ I_off_n[1][80] = 2.1e-10;
+ I_off_n[1][90] = 2.62e-10;
+ I_off_n[1][100] = 3.21e-10;
+
+ I_g_on_n[1][0] = 1.09e-10;//A/micron
+ I_g_on_n[1][10] = 1.09e-10;
+ I_g_on_n[1][20] = 1.09e-10;
+ I_g_on_n[1][30] = 1.09e-10;
+ I_g_on_n[1][40] = 1.09e-10;
+ I_g_on_n[1][50] = 1.09e-10;
+ I_g_on_n[1][60] = 1.09e-10;
+ I_g_on_n[1][70] = 1.09e-10;
+ I_g_on_n[1][80] = 1.09e-10;
+ I_g_on_n[1][90] = 1.09e-10;
+ I_g_on_n[1][100] = 1.09e-10;
+
+ //ITRS LOP device type
+ vdd[2] = 0.8;
+ Lphy[2] = 0.032;
+ Lelec[2] = 0.0216;
+ t_ox[2] = 1.2e-3;
+ v_th[2] = 0.28512;
+ c_ox[2] = 1.87e-14;
+ mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 0.292;
+ c_g_ideal[2] = 6e-16;
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 573.1e-6;
+ I_on_p[2] = 340.6e-6;
+ nmos_effective_resistance_multiplier = 1.82;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1 / 2.05;
+ I_off_n[2][0] = 4.9e-9;
+ I_off_n[2][10] = 6.49e-9;
+ I_off_n[2][20] = 8.45e-9;
+ I_off_n[2][30] = 1.08e-8;
+ I_off_n[2][40] = 1.37e-8;
+ I_off_n[2][50] = 1.71e-8;
+ I_off_n[2][60] = 2.09e-8;
+ I_off_n[2][70] = 2.48e-8;
+ I_off_n[2][80] = 2.84e-8;
+ I_off_n[2][90] = 3.13e-8;
+ I_off_n[2][100] = 3.42e-8;
+
+ I_g_on_n[2][0] = 9.61e-9;//A/micron
+ I_g_on_n[2][10] = 9.61e-9;
+ I_g_on_n[2][20] = 9.61e-9;
+ I_g_on_n[2][30] = 9.61e-9;
+ I_g_on_n[2][40] = 9.61e-9;
+ I_g_on_n[2][50] = 9.61e-9;
+ I_g_on_n[2][60] = 9.61e-9;
+ I_g_on_n[2][70] = 9.61e-9;
+ I_g_on_n[2][80] = 9.61e-9;
+ I_g_on_n[2][90] = 9.61e-9;
+ I_g_on_n[2][100] = 9.61e-9;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.2;
+ Lphy[3] = 0.12;
+ Lelec[3] = 0.0756;
+ curr_v_th_dram_access_transistor = 0.43806;
+ width_dram_access_transistor = 0.09;
+ curr_I_on_dram_cell = 36e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 0.11;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.6;
+ t_ox[3] = 2.2e-3;
+ v_th[3] = 0.43806;
+ c_ox[3] = 1.22e-14;
+ mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.43806;
+ c_g_ideal[3] = 1.46e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15 ;
+ I_on_n[3] = 399.8e-6;
+ I_on_p[3] = 243.4e-6;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 2.23e-11;
+ I_off_n[3][10] = 3.46e-11;
+ I_off_n[3][20] = 5.24e-11;
+ I_off_n[3][30] = 7.75e-11;
+ I_off_n[3][40] = 1.12e-10;
+ I_off_n[3][50] = 1.58e-10;
+ I_off_n[3][60] = 2.18e-10;
+ I_off_n[3][70] = 2.88e-10;
+ I_off_n[3][80] = 3.63e-10;
+ I_off_n[3][90] = 4.41e-10;
+ I_off_n[3][100] = 5.36e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.3;
+ Lphy[3] = 0.065;
+ Lelec[3] = 0.0426;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.065;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.065 * 0.065;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 3.3;
+ t_ox[3] = 5e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 6.16e-15;
+ mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.385;
+ c_g_ideal[3] = 4e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15 ;
+ I_on_n[3] = 1031e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 2.39;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.80e-14;
+ I_off_n[3][10] = 3.64e-14;
+ I_off_n[3][20] = 7.03e-14;
+ I_off_n[3][30] = 1.31e-13;
+ I_off_n[3][40] = 2.35e-13;
+ I_off_n[3][50] = 4.09e-13;
+ I_off_n[3][60] = 6.89e-13;
+ I_off_n[3][70] = 1.13e-12;
+ I_off_n[3][80] = 1.78e-12;
+ I_off_n[3][90] = 2.71e-12;
+ I_off_n[3][100] = 3.99e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor
+ curr_core_tx_density = 1.25 * 0.7;
+ curr_sckt_co_eff = 1.1359;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
- if (tech == 45)
- { //45nm technology-node. Corresponds to year 2010 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .04e-9; // s
- SENSE_AMP_P = 2.7e-15; // J
- vdd[0] = 1.0;
- Lphy[0] = 0.018;
- Lelec[0] = 0.01345;
- t_ox[0] = 0.65e-3;
- v_th[0] = .18035;
- c_ox[0] = 3.77e-14;
- mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 9.38E-2;
- c_g_ideal[0] = 6.78e-16;
- c_fringe[0] = 0.05e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2046.6e-6;
- //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
- //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
- I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
- nmos_effective_resistance_multiplier = 1.51;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
- I_off_n[0][0] = 2.8e-7;
- I_off_n[0][10] = 3.28e-7;
- I_off_n[0][20] = 3.81e-7;
- I_off_n[0][30] = 4.39e-7;
- I_off_n[0][40] = 5.02e-7;
- I_off_n[0][50] = 5.69e-7;
- I_off_n[0][60] = 6.42e-7;
- I_off_n[0][70] = 7.2e-7;
- I_off_n[0][80] = 8.03e-7;
- I_off_n[0][90] = 8.91e-7;
- I_off_n[0][100] = 9.84e-7;
-
- I_g_on_n[0][0] = 3.59e-8;//A/micron
- I_g_on_n[0][10] = 3.59e-8;
- I_g_on_n[0][20] = 3.59e-8;
- I_g_on_n[0][30] = 3.59e-8;
- I_g_on_n[0][40] = 3.59e-8;
- I_g_on_n[0][50] = 3.59e-8;
- I_g_on_n[0][60] = 3.59e-8;
- I_g_on_n[0][70] = 3.59e-8;
- I_g_on_n[0][80] = 3.59e-8;
- I_g_on_n[0][90] = 3.59e-8;
- I_g_on_n[0][100] = 3.59e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.1;
- Lphy[1] = 0.028;
- Lelec[1] = 0.0212;
- t_ox[1] = 1.4e-3;
- v_th[1] = 0.50245;
- c_ox[1] = 2.01e-14;
- mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 9.12e-2;
- c_g_ideal[1] = 5.18e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 666.2e-6;
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.08;
- I_off_n[1][0] = 1.01e-11;
- I_off_n[1][10] = 1.65e-11;
- I_off_n[1][20] = 2.62e-11;
- I_off_n[1][30] = 4.06e-11;
- I_off_n[1][40] = 6.12e-11;
- I_off_n[1][50] = 9.02e-11;
- I_off_n[1][60] = 1.3e-10;
- I_off_n[1][70] = 1.83e-10;
- I_off_n[1][80] = 2.51e-10;
- I_off_n[1][90] = 3.29e-10;
- I_off_n[1][100] = 4.1e-10;
-
- I_g_on_n[1][0] = 9.47e-12;//A/micron
- I_g_on_n[1][10] = 9.47e-12;
- I_g_on_n[1][20] = 9.47e-12;
- I_g_on_n[1][30] = 9.47e-12;
- I_g_on_n[1][40] = 9.47e-12;
- I_g_on_n[1][50] = 9.47e-12;
- I_g_on_n[1][60] = 9.47e-12;
- I_g_on_n[1][70] = 9.47e-12;
- I_g_on_n[1][80] = 9.47e-12;
- I_g_on_n[1][90] = 9.47e-12;
- I_g_on_n[1][100] = 9.47e-12;
-
- //ITRS LOP device type
- vdd[2] = 0.7;
- Lphy[2] = 0.022;
- Lelec[2] = 0.016;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.22599;
- c_ox[2] = 2.82e-14;//F/micron2
- mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 5.71e-2;
- c_g_ideal[2] = 6.2e-16;
- c_fringe[2] = 0.073e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 748.9e-6;
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.76;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.92;
- I_off_n[2][0] = 4.03e-9;
- I_off_n[2][10] = 5.02e-9;
- I_off_n[2][20] = 6.18e-9;
- I_off_n[2][30] = 7.51e-9;
- I_off_n[2][40] = 9.04e-9;
- I_off_n[2][50] = 1.08e-8;
- I_off_n[2][60] = 1.27e-8;
- I_off_n[2][70] = 1.47e-8;
- I_off_n[2][80] = 1.66e-8;
- I_off_n[2][90] = 1.84e-8;
- I_off_n[2][100] = 2.03e-8;
-
- I_g_on_n[2][0] = 3.24e-8;//A/micron
- I_g_on_n[2][10] = 4.01e-8;
- I_g_on_n[2][20] = 4.90e-8;
- I_g_on_n[2][30] = 5.92e-8;
- I_g_on_n[2][40] = 7.08e-8;
- I_g_on_n[2][50] = 8.38e-8;
- I_g_on_n[2][60] = 9.82e-8;
- I_g_on_n[2][70] = 1.14e-7;
- I_g_on_n[2][80] = 1.29e-7;
- I_g_on_n[2][90] = 1.43e-7;
- I_g_on_n[2][100] = 1.54e-7;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.078;
- Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44559;
- width_dram_access_transistor = 0.079;
- curr_I_on_dram_cell = 36e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2.1e-3;
- v_th[3] = 0.44559;
- c_ox[3] = 1.41e-14;
- mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.181;
- c_g_ideal[3] = 1.10e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 456e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.54e-11;
- I_off_n[3][10] = 3.94e-11;
- I_off_n[3][20] = 5.95e-11;
- I_off_n[3][30] = 8.79e-11;
- I_off_n[3][40] = 1.27e-10;
- I_off_n[3][50] = 1.79e-10;
- I_off_n[3][60] = 2.47e-10;
- I_off_n[3][70] = 3.31e-10;
- I_off_n[3][80] = 4.26e-10;
- I_off_n[3][90] = 5.27e-10;
- I_off_n[3][100] = 6.46e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.045;
- Lelec[3] = 0.0298;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.045;
- curr_I_on_dram_cell = 20e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.045*0.045;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.7;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.98e-15;
- mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.147;
- c_g_ideal[3] = 3.59e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 999.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.31e-14;
- I_off_n[3][10] = 2.68e-14;
- I_off_n[3][20] = 5.25e-14;
- I_off_n[3][30] = 9.88e-14;
- I_off_n[3][40] = 1.79e-13;
- I_off_n[3][50] = 3.15e-13;
- I_off_n[3][60] = 5.36e-13;
- I_off_n[3][70] = 8.86e-13;
- I_off_n[3][80] = 1.42e-12;
- I_off_n[3][90] = 2.20e-12;
- I_off_n[3][100] = 3.29e-12;
- }
-
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7;
- curr_core_tx_density = 1.25;
- curr_sckt_co_eff = 1.1387;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ if (tech == 45) {
+ //45nm technology-node. Corresponds to year 2010 in ITRS
+ //ITRS HP device type
+ SENSE_AMP_D = .04e-9; // s
+ SENSE_AMP_P = 2.7e-15; // J
+ vdd[0] = 1.0;
+ Lphy[0] = 0.018;
+ Lelec[0] = 0.01345;
+ t_ox[0] = 0.65e-3;
+ v_th[0] = .18035;
+ c_ox[0] = 3.77e-14;
+ mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 9.38E-2;
+ c_g_ideal[0] = 6.78e-16;
+ c_fringe[0] = 0.05e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 2046.6e-6;
+ //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
+ //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
+ I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
+ nmos_effective_resistance_multiplier = 1.51;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
+ //Using MASTAR, @380K, increase Lgate until Ion reduces to 90%,
+ //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
+ long_channel_leakage_reduction[0] = 1 / 3.546;
+ I_off_n[0][0] = 2.8e-7;
+ I_off_n[0][10] = 3.28e-7;
+ I_off_n[0][20] = 3.81e-7;
+ I_off_n[0][30] = 4.39e-7;
+ I_off_n[0][40] = 5.02e-7;
+ I_off_n[0][50] = 5.69e-7;
+ I_off_n[0][60] = 6.42e-7;
+ I_off_n[0][70] = 7.2e-7;
+ I_off_n[0][80] = 8.03e-7;
+ I_off_n[0][90] = 8.91e-7;
+ I_off_n[0][100] = 9.84e-7;
+
+ I_g_on_n[0][0] = 3.59e-8;//A/micron
+ I_g_on_n[0][10] = 3.59e-8;
+ I_g_on_n[0][20] = 3.59e-8;
+ I_g_on_n[0][30] = 3.59e-8;
+ I_g_on_n[0][40] = 3.59e-8;
+ I_g_on_n[0][50] = 3.59e-8;
+ I_g_on_n[0][60] = 3.59e-8;
+ I_g_on_n[0][70] = 3.59e-8;
+ I_g_on_n[0][80] = 3.59e-8;
+ I_g_on_n[0][90] = 3.59e-8;
+ I_g_on_n[0][100] = 3.59e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.1;
+ Lphy[1] = 0.028;
+ Lelec[1] = 0.0212;
+ t_ox[1] = 1.4e-3;
+ v_th[1] = 0.50245;
+ c_ox[1] = 2.01e-14;
+ mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 9.12e-2;
+ c_g_ideal[1] = 5.18e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 666.2e-6;
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1 / 2.08;
+ I_off_n[1][0] = 1.01e-11;
+ I_off_n[1][10] = 1.65e-11;
+ I_off_n[1][20] = 2.62e-11;
+ I_off_n[1][30] = 4.06e-11;
+ I_off_n[1][40] = 6.12e-11;
+ I_off_n[1][50] = 9.02e-11;
+ I_off_n[1][60] = 1.3e-10;
+ I_off_n[1][70] = 1.83e-10;
+ I_off_n[1][80] = 2.51e-10;
+ I_off_n[1][90] = 3.29e-10;
+ I_off_n[1][100] = 4.1e-10;
+
+ I_g_on_n[1][0] = 9.47e-12;//A/micron
+ I_g_on_n[1][10] = 9.47e-12;
+ I_g_on_n[1][20] = 9.47e-12;
+ I_g_on_n[1][30] = 9.47e-12;
+ I_g_on_n[1][40] = 9.47e-12;
+ I_g_on_n[1][50] = 9.47e-12;
+ I_g_on_n[1][60] = 9.47e-12;
+ I_g_on_n[1][70] = 9.47e-12;
+ I_g_on_n[1][80] = 9.47e-12;
+ I_g_on_n[1][90] = 9.47e-12;
+ I_g_on_n[1][100] = 9.47e-12;
+
+ //ITRS LOP device type
+ vdd[2] = 0.7;
+ Lphy[2] = 0.022;
+ Lelec[2] = 0.016;
+ t_ox[2] = 0.9e-3;
+ v_th[2] = 0.22599;
+ c_ox[2] = 2.82e-14;//F/micron2
+ mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 5.71e-2;
+ c_g_ideal[2] = 6.2e-16;
+ c_fringe[2] = 0.073e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 748.9e-6;
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.76;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1 / 1.92;
+ I_off_n[2][0] = 4.03e-9;
+ I_off_n[2][10] = 5.02e-9;
+ I_off_n[2][20] = 6.18e-9;
+ I_off_n[2][30] = 7.51e-9;
+ I_off_n[2][40] = 9.04e-9;
+ I_off_n[2][50] = 1.08e-8;
+ I_off_n[2][60] = 1.27e-8;
+ I_off_n[2][70] = 1.47e-8;
+ I_off_n[2][80] = 1.66e-8;
+ I_off_n[2][90] = 1.84e-8;
+ I_off_n[2][100] = 2.03e-8;
+
+ I_g_on_n[2][0] = 3.24e-8;//A/micron
+ I_g_on_n[2][10] = 4.01e-8;
+ I_g_on_n[2][20] = 4.90e-8;
+ I_g_on_n[2][30] = 5.92e-8;
+ I_g_on_n[2][40] = 7.08e-8;
+ I_g_on_n[2][50] = 8.38e-8;
+ I_g_on_n[2][60] = 9.82e-8;
+ I_g_on_n[2][70] = 1.14e-7;
+ I_g_on_n[2][80] = 1.29e-7;
+ I_g_on_n[2][90] = 1.43e-7;
+ I_g_on_n[2][100] = 1.54e-7;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.1;
+ Lphy[3] = 0.078;
+ Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 0.44559;
+ width_dram_access_transistor = 0.079;
+ curr_I_on_dram_cell = 36e-6;//A
+ curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.5;
+ t_ox[3] = 2.1e-3;
+ v_th[3] = 0.44559;
+ c_ox[3] = 1.41e-14;
+ mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.181;
+ c_g_ideal[3] = 1.10e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 456e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 2.54e-11;
+ I_off_n[3][10] = 3.94e-11;
+ I_off_n[3][20] = 5.95e-11;
+ I_off_n[3][30] = 8.79e-11;
+ I_off_n[3][40] = 1.27e-10;
+ I_off_n[3][50] = 1.79e-10;
+ I_off_n[3][60] = 2.47e-10;
+ I_off_n[3][70] = 3.31e-10;
+ I_off_n[3][80] = 4.26e-10;
+ I_off_n[3][90] = 5.27e-10;
+ I_off_n[3][100] = 6.46e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.1;
+ Lphy[3] = 0.045;
+ Lelec[3] = 0.0298;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.045;
+ curr_I_on_dram_cell = 20e-6;//A
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.045 * 0.045;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 2.7;
+ t_ox[3] = 4e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 7.98e-15;
+ mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.147;
+ c_g_ideal[3] = 3.59e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 999.4e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.31e-14;
+ I_off_n[3][10] = 2.68e-14;
+ I_off_n[3][20] = 5.25e-14;
+ I_off_n[3][30] = 9.88e-14;
+ I_off_n[3][40] = 1.79e-13;
+ I_off_n[3][50] = 3.15e-13;
+ I_off_n[3][60] = 5.36e-13;
+ I_off_n[3][70] = 8.86e-13;
+ I_off_n[3][80] = 1.42e-12;
+ I_off_n[3][90] = 2.20e-12;
+ I_off_n[3][100] = 3.29e-12;
+ }
+
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7;
+ curr_core_tx_density = 1.25;
+ curr_sckt_co_eff = 1.1387;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
- if (tech == 32)
- {
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
- //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
- //HP and LSTP.
- vdd[0] = 0.9;
- Lphy[0] = 0.013;
- Lelec[0] = 0.01013;
- t_ox[0] = 0.5e-3;
- v_th[0] = 0.21835;
- c_ox[0] = 4.11e-14;
- mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 5.09E-2;
- c_g_ideal[0] = 5.34e-16;
- c_fringe[0] = 0.04e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2211.7e-6;
- I_on_p[0] = I_on_n[0] / 2;
- nmos_effective_resistance_multiplier = 1.49;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.706;
- //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
- //whichever comes first
- I_off_n[0][0] = 1.52e-7;
- I_off_n[0][10] = 1.55e-7;
- I_off_n[0][20] = 1.59e-7;
- I_off_n[0][30] = 1.68e-7;
- I_off_n[0][40] = 1.90e-7;
- I_off_n[0][50] = 2.69e-7;
- I_off_n[0][60] = 5.32e-7;
- I_off_n[0][70] = 1.02e-6;
- I_off_n[0][80] = 1.62e-6;
- I_off_n[0][90] = 2.73e-6;
- I_off_n[0][100] = 6.1e-6;
-
- I_g_on_n[0][0] = 6.55e-8;//A/micron
- I_g_on_n[0][10] = 6.55e-8;
- I_g_on_n[0][20] = 6.55e-8;
- I_g_on_n[0][30] = 6.55e-8;
- I_g_on_n[0][40] = 6.55e-8;
- I_g_on_n[0][50] = 6.55e-8;
- I_g_on_n[0][60] = 6.55e-8;
- I_g_on_n[0][70] = 6.55e-8;
- I_g_on_n[0][80] = 6.55e-8;
- I_g_on_n[0][90] = 6.55e-8;
- I_g_on_n[0][100] = 6.55e-8;
-
-// 32 DG
-// I_g_on_n[0][0] = 2.71e-9;//A/micron
-// I_g_on_n[0][10] = 2.71e-9;
-// I_g_on_n[0][20] = 2.71e-9;
-// I_g_on_n[0][30] = 2.71e-9;
-// I_g_on_n[0][40] = 2.71e-9;
-// I_g_on_n[0][50] = 2.71e-9;
-// I_g_on_n[0][60] = 2.71e-9;
-// I_g_on_n[0][70] = 2.71e-9;
-// I_g_on_n[0][80] = 2.71e-9;
-// I_g_on_n[0][90] = 2.71e-9;
-// I_g_on_n[0][100] = 2.71e-9;
-
- //LSTP device type
- vdd[1] = 1;
- Lphy[1] = 0.020;
- Lelec[1] = 0.0173;
- t_ox[1] = 1.2e-3;
- v_th[1] = 0.513;
- c_ox[1] = 2.29e-14;
- mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 8.64e-2;
- c_g_ideal[1] = 4.58e-16;
- c_fringe[1] = 0.053e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 683.6e-6;
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/1.93;
- I_off_n[1][0] = 2.06e-11;
- I_off_n[1][10] = 3.30e-11;
- I_off_n[1][20] = 5.15e-11;
- I_off_n[1][30] = 7.83e-11;
- I_off_n[1][40] = 1.16e-10;
- I_off_n[1][50] = 1.69e-10;
- I_off_n[1][60] = 2.40e-10;
- I_off_n[1][70] = 3.34e-10;
- I_off_n[1][80] = 4.54e-10;
- I_off_n[1][90] = 5.96e-10;
- I_off_n[1][100] = 7.44e-10;
-
- I_g_on_n[1][0] = 3.73e-11;//A/micron
- I_g_on_n[1][10] = 3.73e-11;
- I_g_on_n[1][20] = 3.73e-11;
- I_g_on_n[1][30] = 3.73e-11;
- I_g_on_n[1][40] = 3.73e-11;
- I_g_on_n[1][50] = 3.73e-11;
- I_g_on_n[1][60] = 3.73e-11;
- I_g_on_n[1][70] = 3.73e-11;
- I_g_on_n[1][80] = 3.73e-11;
- I_g_on_n[1][90] = 3.73e-11;
- I_g_on_n[1][100] = 3.73e-11;
-
-
- //LOP device type
- vdd[2] = 0.6;
- Lphy[2] = 0.016;
- Lelec[2] = 0.01232;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.24227;
- c_ox[2] = 2.84e-14;
- mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 4.64e-2;
- c_g_ideal[2] = 4.54e-16;
- c_fringe[2] = 0.057e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 827.8e-6;
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.89;
- I_off_n[2][0] = 5.94e-8;
- I_off_n[2][10] = 7.23e-8;
- I_off_n[2][20] = 8.7e-8;
- I_off_n[2][30] = 1.04e-7;
- I_off_n[2][40] = 1.22e-7;
- I_off_n[2][50] = 1.43e-7;
- I_off_n[2][60] = 1.65e-7;
- I_off_n[2][70] = 1.90e-7;
- I_off_n[2][80] = 2.15e-7;
- I_off_n[2][90] = 2.39e-7;
- I_off_n[2][100] = 2.63e-7;
-
- I_g_on_n[2][0] = 2.93e-9;//A/micron
- I_g_on_n[2][10] = 2.93e-9;
- I_g_on_n[2][20] = 2.93e-9;
- I_g_on_n[2][30] = 2.93e-9;
- I_g_on_n[2][40] = 2.93e-9;
- I_g_on_n[2][50] = 2.93e-9;
- I_g_on_n[2][60] = 2.93e-9;
- I_g_on_n[2][70] = 2.93e-9;
- I_g_on_n[2][80] = 2.93e-9;
- I_g_on_n[2][90] = 2.93e-9;
- I_g_on_n[2][100] = 2.93e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.056;
- Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44129;
- width_dram_access_transistor = 0.056;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2e-3;
- v_th[3] = 0.44467;
- c_ox[3] = 1.48e-14;
- mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.174;
- c_g_ideal[3] = 7.45e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1055.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.57e-11;
- I_off_n[3][10] = 5.51e-11;
- I_off_n[3][20] = 8.27e-11;
- I_off_n[3][30] = 1.21e-10;
- I_off_n[3][40] = 1.74e-10;
- I_off_n[3][50] = 2.45e-10;
- I_off_n[3][60] = 3.38e-10;
- I_off_n[3][70] = 4.53e-10;
- I_off_n[3][80] = 5.87e-10;
- I_off_n[3][90] = 7.29e-10;
- I_off_n[3][100] = 8.87e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.032;
- Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.032;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.032*0.032;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.6;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.99e-15;
- mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.129;
- c_g_ideal[3] = 2.56e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1024.5e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.63e-14;
- I_off_n[3][10] = 7.18e-14;
- I_off_n[3][20] = 1.36e-13;
- I_off_n[3][30] = 2.49e-13;
- I_off_n[3][40] = 4.41e-13;
- I_off_n[3][50] = 7.55e-13;
- I_off_n[3][60] = 1.26e-12;
- I_off_n[3][70] = 2.03e-12;
- I_off_n[3][80] = 3.19e-12;
- I_off_n[3][90] = 4.87e-12;
- I_off_n[3][100] = 7.16e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7;
- curr_sckt_co_eff = 1.1111;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ if (tech == 32) {
+ SENSE_AMP_D = .03e-9; // s
+ SENSE_AMP_P = 2.16e-15; // J
+ //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
+ //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
+ //HP and LSTP.
+ vdd[0] = 0.9;
+ Lphy[0] = 0.013;
+ Lelec[0] = 0.01013;
+ t_ox[0] = 0.5e-3;
+ v_th[0] = 0.21835;
+ c_ox[0] = 4.11e-14;
+ mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 5.09E-2;
+ c_g_ideal[0] = 5.34e-16;
+ c_fringe[0] = 0.04e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 2211.7e-6;
+ I_on_p[0] = I_on_n[0] / 2;
+ nmos_effective_resistance_multiplier = 1.49;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1 / 3.706;
+ //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
+ //whichever comes first
+ I_off_n[0][0] = 1.52e-7;
+ I_off_n[0][10] = 1.55e-7;
+ I_off_n[0][20] = 1.59e-7;
+ I_off_n[0][30] = 1.68e-7;
+ I_off_n[0][40] = 1.90e-7;
+ I_off_n[0][50] = 2.69e-7;
+ I_off_n[0][60] = 5.32e-7;
+ I_off_n[0][70] = 1.02e-6;
+ I_off_n[0][80] = 1.62e-6;
+ I_off_n[0][90] = 2.73e-6;
+ I_off_n[0][100] = 6.1e-6;
+
+ I_g_on_n[0][0] = 6.55e-8;//A/micron
+ I_g_on_n[0][10] = 6.55e-8;
+ I_g_on_n[0][20] = 6.55e-8;
+ I_g_on_n[0][30] = 6.55e-8;
+ I_g_on_n[0][40] = 6.55e-8;
+ I_g_on_n[0][50] = 6.55e-8;
+ I_g_on_n[0][60] = 6.55e-8;
+ I_g_on_n[0][70] = 6.55e-8;
+ I_g_on_n[0][80] = 6.55e-8;
+ I_g_on_n[0][90] = 6.55e-8;
+ I_g_on_n[0][100] = 6.55e-8;
+
+ //LSTP device type
+ vdd[1] = 1;
+ Lphy[1] = 0.020;
+ Lelec[1] = 0.0173;
+ t_ox[1] = 1.2e-3;
+ v_th[1] = 0.513;
+ c_ox[1] = 2.29e-14;
+ mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 8.64e-2;
+ c_g_ideal[1] = 4.58e-16;
+ c_fringe[1] = 0.053e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 683.6e-6;
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1 / 1.93;
+ I_off_n[1][0] = 2.06e-11;
+ I_off_n[1][10] = 3.30e-11;
+ I_off_n[1][20] = 5.15e-11;
+ I_off_n[1][30] = 7.83e-11;
+ I_off_n[1][40] = 1.16e-10;
+ I_off_n[1][50] = 1.69e-10;
+ I_off_n[1][60] = 2.40e-10;
+ I_off_n[1][70] = 3.34e-10;
+ I_off_n[1][80] = 4.54e-10;
+ I_off_n[1][90] = 5.96e-10;
+ I_off_n[1][100] = 7.44e-10;
+
+ I_g_on_n[1][0] = 3.73e-11;//A/micron
+ I_g_on_n[1][10] = 3.73e-11;
+ I_g_on_n[1][20] = 3.73e-11;
+ I_g_on_n[1][30] = 3.73e-11;
+ I_g_on_n[1][40] = 3.73e-11;
+ I_g_on_n[1][50] = 3.73e-11;
+ I_g_on_n[1][60] = 3.73e-11;
+ I_g_on_n[1][70] = 3.73e-11;
+ I_g_on_n[1][80] = 3.73e-11;
+ I_g_on_n[1][90] = 3.73e-11;
+ I_g_on_n[1][100] = 3.73e-11;
+
+ //LOP device type
+ vdd[2] = 0.6;
+ Lphy[2] = 0.016;
+ Lelec[2] = 0.01232;
+ t_ox[2] = 0.9e-3;
+ v_th[2] = 0.24227;
+ c_ox[2] = 2.84e-14;
+ mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 4.64e-2;
+ c_g_ideal[2] = 4.54e-16;
+ c_fringe[2] = 0.057e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 827.8e-6;
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.73;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1 / 1.89;
+ I_off_n[2][0] = 5.94e-8;
+ I_off_n[2][10] = 7.23e-8;
+ I_off_n[2][20] = 8.7e-8;
+ I_off_n[2][30] = 1.04e-7;
+ I_off_n[2][40] = 1.22e-7;
+ I_off_n[2][50] = 1.43e-7;
+ I_off_n[2][60] = 1.65e-7;
+ I_off_n[2][70] = 1.90e-7;
+ I_off_n[2][80] = 2.15e-7;
+ I_off_n[2][90] = 2.39e-7;
+ I_off_n[2][100] = 2.63e-7;
+
+ I_g_on_n[2][0] = 2.93e-9;//A/micron
+ I_g_on_n[2][10] = 2.93e-9;
+ I_g_on_n[2][20] = 2.93e-9;
+ I_g_on_n[2][30] = 2.93e-9;
+ I_g_on_n[2][40] = 2.93e-9;
+ I_g_on_n[2][50] = 2.93e-9;
+ I_g_on_n[2][60] = 2.93e-9;
+ I_g_on_n[2][70] = 2.93e-9;
+ I_g_on_n[2][80] = 2.93e-9;
+ I_g_on_n[2][90] = 2.93e-9;
+ I_g_on_n[2][100] = 2.93e-9;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.0;
+ Lphy[3] = 0.056;
+ Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 0.44129;
+ width_dram_access_transistor = 0.056;
+ curr_I_on_dram_cell = 36e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.5;
+ t_ox[3] = 2e-3;
+ v_th[3] = 0.44467;
+ c_ox[3] = 1.48e-14;
+ mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.174;
+ c_g_ideal[3] = 7.45e-16;
+ c_fringe[3] = 0.053e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1055.4e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 3.57e-11;
+ I_off_n[3][10] = 5.51e-11;
+ I_off_n[3][20] = 8.27e-11;
+ I_off_n[3][30] = 1.21e-10;
+ I_off_n[3][40] = 1.74e-10;
+ I_off_n[3][50] = 2.45e-10;
+ I_off_n[3][60] = 3.38e-10;
+ I_off_n[3][70] = 4.53e-10;
+ I_off_n[3][80] = 5.87e-10;
+ I_off_n[3][90] = 7.29e-10;
+ I_off_n[3][100] = 8.87e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.0;
+ Lphy[3] = 0.032;
+ Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.032;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.032 * 0.032;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 2.6;
+ t_ox[3] = 4e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 7.99e-15;
+ mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.129;
+ c_g_ideal[3] = 2.56e-16;
+ c_fringe[3] = 0.053e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1024.5e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 3.63e-14;
+ I_off_n[3][10] = 7.18e-14;
+ I_off_n[3][20] = 1.36e-13;
+ I_off_n[3][30] = 2.49e-13;
+ I_off_n[3][40] = 4.41e-13;
+ I_off_n[3][50] = 7.55e-13;
+ I_off_n[3][60] = 1.26e-12;
+ I_off_n[3][70] = 2.03e-12;
+ I_off_n[3][80] = 3.19e-12;
+ I_off_n[3][90] = 4.87e-12;
+ I_off_n[3][100] = 7.16e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7;
+ curr_core_tx_density = 1.25 / 0.7;
+ curr_sckt_co_eff = 1.1111;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
- if(tech == 22){
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
- //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
- //22 nm HP
- vdd[0] = 0.8;
- Lphy[0] = 0.009;//Lphy is the physical gate-length.
- Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
- t_ox[0] = 0.55e-3;//micron
- v_th[0] = 0.1395;//V
- c_ox[0] = 3.63e-14;//F/micron2
- mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 2.33e-2; //V/micron
- c_g_ideal[0] = 3.27e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron
- c_junc[0] = 0;//F/micron2
- I_on_n[0] = 2626.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.45;
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.274;
- I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there.
- I_off_n[0][10] = 1.55e-7/1.5*1.2;
- I_off_n[0][20] = 1.59e-7/1.5*1.2;
- I_off_n[0][30] = 1.68e-7/1.5*1.2;
- I_off_n[0][40] = 1.90e-7/1.5*1.2;
- I_off_n[0][50] = 2.69e-7/1.5*1.2;
- I_off_n[0][60] = 5.32e-7/1.5*1.2;
- I_off_n[0][70] = 1.02e-6/1.5*1.2;
- I_off_n[0][80] = 1.62e-6/1.5*1.2;
- I_off_n[0][90] = 2.73e-6/1.5*1.2;
- I_off_n[0][100] = 6.1e-6/1.5*1.2;
- //for 22nm DG HP
- I_g_on_n[0][0] = 1.81e-9;//A/micron
- I_g_on_n[0][10] = 1.81e-9;
- I_g_on_n[0][20] = 1.81e-9;
- I_g_on_n[0][30] = 1.81e-9;
- I_g_on_n[0][40] = 1.81e-9;
- I_g_on_n[0][50] = 1.81e-9;
- I_g_on_n[0][60] = 1.81e-9;
- I_g_on_n[0][70] = 1.81e-9;
- I_g_on_n[0][80] = 1.81e-9;
- I_g_on_n[0][90] = 1.81e-9;
- I_g_on_n[0][100] = 1.81e-9;
-
- //22 nm LSTP DG
- vdd[1] = 0.8;
- Lphy[1] = 0.014;
- Lelec[1] = 0.008;//Lelec is the electrical gate-length.
- t_ox[1] = 1.1e-3;//micron
- v_th[1] = 0.40126;//V
- c_ox[1] = 2.30e-14;//F/micron2
- mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[1] = 6.64e-2; //V/micron
- c_g_ideal[1] = 3.22e-16;//F/micron
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 0;//F/micron2
- I_on_n[1] = 727.6e-6;//A/micron
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
- long_channel_leakage_reduction[1] = 1/1.89;
- I_off_n[1][0] = 2.43e-11;
- I_off_n[1][10] = 4.85e-11;
- I_off_n[1][20] = 9.68e-11;
- I_off_n[1][30] = 1.94e-10;
- I_off_n[1][40] = 3.87e-10;
- I_off_n[1][50] = 7.73e-10;
- I_off_n[1][60] = 3.55e-10;
- I_off_n[1][70] = 3.09e-9;
- I_off_n[1][80] = 6.19e-9;
- I_off_n[1][90] = 1.24e-8;
- I_off_n[1][100]= 2.48e-8;
-
- I_g_on_n[1][0] = 4.51e-10;//A/micron
- I_g_on_n[1][10] = 4.51e-10;
- I_g_on_n[1][20] = 4.51e-10;
- I_g_on_n[1][30] = 4.51e-10;
- I_g_on_n[1][40] = 4.51e-10;
- I_g_on_n[1][50] = 4.51e-10;
- I_g_on_n[1][60] = 4.51e-10;
- I_g_on_n[1][70] = 4.51e-10;
- I_g_on_n[1][80] = 4.51e-10;
- I_g_on_n[1][90] = 4.51e-10;
- I_g_on_n[1][100] = 4.51e-10;
-
- //22 nm LOP
- vdd[2] = 0.6;
- Lphy[2] = 0.011;
- Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
- t_ox[2] = 0.8e-3;//micron
- v_th[2] = 0.2315;//V
- c_ox[2] = 2.87e-14;//F/micron2
- mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[2] = 1.81e-2; //V/micron
- c_g_ideal[2] = 3.16e-16;//F/micron
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
- I_on_n[2] = 916.1e-6;//A/micron
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
- long_channel_leakage_reduction[2] = 1/2.38;
-
- I_off_n[2][0] = 1.31e-8;
- I_off_n[2][10] = 2.60e-8;
- I_off_n[2][20] = 5.14e-8;
- I_off_n[2][30] = 1.02e-7;
- I_off_n[2][40] = 2.02e-7;
- I_off_n[2][50] = 3.99e-7;
- I_off_n[2][60] = 7.91e-7;
- I_off_n[2][70] = 1.09e-6;
- I_off_n[2][80] = 2.09e-6;
- I_off_n[2][90] = 4.04e-6;
- I_off_n[2][100]= 4.48e-6;
-
- I_g_on_n[2][0] = 2.74e-9;//A/micron
- I_g_on_n[2][10] = 2.74e-9;
- I_g_on_n[2][20] = 2.74e-9;
- I_g_on_n[2][30] = 2.74e-9;
- I_g_on_n[2][40] = 2.74e-9;
- I_g_on_n[2][50] = 2.74e-9;
- I_g_on_n[2][60] = 2.74e-9;
- I_g_on_n[2][70] = 2.74e-9;
- I_g_on_n[2][80] = 2.74e-9;
- I_g_on_n[2][90] = 2.74e-9;
- I_g_on_n[2][100] = 2.74e-9;
-
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
+ if (tech == 22) {
+ SENSE_AMP_D = .03e-9; // s
+ SENSE_AMP_P = 2.16e-15; // J
+ //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
+ //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
+ //22 nm HP
+ vdd[0] = 0.8;
+ Lphy[0] = 0.009;//Lphy is the physical gate-length.
+ Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
+ t_ox[0] = 0.55e-3;//micron
+ v_th[0] = 0.1395;//V
+ c_ox[0] = 3.63e-14;//F/micron2
+ mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 2.33e-2; //V/micron
+ c_g_ideal[0] = 3.27e-16;//F/micron
+ c_fringe[0] = 0.06e-15;//F/micron
+ c_junc[0] = 0;//F/micron2
+ I_on_n[0] = 2626.4e-6;//A/micron
+ I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.45;
+ n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
+ //"Dynamic" tab of Device workspace.
+ gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1 / 3.274;
+ //From 22nm, leakage current are directly from ITRS report rather
+ //than MASTAR, since MASTAR has serious bugs there.
+ I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2;
+ I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2;
+ I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2;
+ I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2;
+ I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2;
+ I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2;
+ I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2;
+ I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2;
+ I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2;
+ I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2;
+ I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2;
+ //for 22nm DG HP
+ I_g_on_n[0][0] = 1.81e-9;//A/micron
+ I_g_on_n[0][10] = 1.81e-9;
+ I_g_on_n[0][20] = 1.81e-9;
+ I_g_on_n[0][30] = 1.81e-9;
+ I_g_on_n[0][40] = 1.81e-9;
+ I_g_on_n[0][50] = 1.81e-9;
+ I_g_on_n[0][60] = 1.81e-9;
+ I_g_on_n[0][70] = 1.81e-9;
+ I_g_on_n[0][80] = 1.81e-9;
+ I_g_on_n[0][90] = 1.81e-9;
+ I_g_on_n[0][100] = 1.81e-9;
+
+ //22 nm LSTP DG
+ vdd[1] = 0.8;
+ Lphy[1] = 0.014;
+ Lelec[1] = 0.008;//Lelec is the electrical gate-length.
+ t_ox[1] = 1.1e-3;//micron
+ v_th[1] = 0.40126;//V
+ c_ox[1] = 2.30e-14;//F/micron2
+ mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[1] = 6.64e-2; //V/micron
+ c_g_ideal[1] = 3.22e-16;//F/micron
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 0;//F/micron2
+ I_on_n[1] = 727.6e-6;//A/micron
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
+ long_channel_leakage_reduction[1] = 1 / 1.89;
+ I_off_n[1][0] = 2.43e-11;
+ I_off_n[1][10] = 4.85e-11;
+ I_off_n[1][20] = 9.68e-11;
+ I_off_n[1][30] = 1.94e-10;
+ I_off_n[1][40] = 3.87e-10;
+ I_off_n[1][50] = 7.73e-10;
+ I_off_n[1][60] = 3.55e-10;
+ I_off_n[1][70] = 3.09e-9;
+ I_off_n[1][80] = 6.19e-9;
+ I_off_n[1][90] = 1.24e-8;
+ I_off_n[1][100] = 2.48e-8;
+
+ I_g_on_n[1][0] = 4.51e-10;//A/micron
+ I_g_on_n[1][10] = 4.51e-10;
+ I_g_on_n[1][20] = 4.51e-10;
+ I_g_on_n[1][30] = 4.51e-10;
+ I_g_on_n[1][40] = 4.51e-10;
+ I_g_on_n[1][50] = 4.51e-10;
+ I_g_on_n[1][60] = 4.51e-10;
+ I_g_on_n[1][70] = 4.51e-10;
+ I_g_on_n[1][80] = 4.51e-10;
+ I_g_on_n[1][90] = 4.51e-10;
+ I_g_on_n[1][100] = 4.51e-10;
+
+ //22 nm LOP
+ vdd[2] = 0.6;
+ Lphy[2] = 0.011;
+ Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
+ t_ox[2] = 0.8e-3;//micron
+ v_th[2] = 0.2315;//V
+ c_ox[2] = 2.87e-14;//F/micron2
+ mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[2] = 1.81e-2; //V/micron
+ c_g_ideal[2] = 3.16e-16;//F/micron
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
+ I_on_n[2] = 916.1e-6;//A/micron
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.73;
+ n_to_p_eff_curr_drv_ratio[2] = 2;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
+ long_channel_leakage_reduction[2] = 1 / 2.38;
+
+ I_off_n[2][0] = 1.31e-8;
+ I_off_n[2][10] = 2.60e-8;
+ I_off_n[2][20] = 5.14e-8;
+ I_off_n[2][30] = 1.02e-7;
+ I_off_n[2][40] = 2.02e-7;
+ I_off_n[2][50] = 3.99e-7;
+ I_off_n[2][60] = 7.91e-7;
+ I_off_n[2][70] = 1.09e-6;
+ I_off_n[2][80] = 2.09e-6;
+ I_off_n[2][90] = 4.04e-6;
+ I_off_n[2][100] = 4.48e-6;
+
+ I_g_on_n[2][0] = 2.74e-9;//A/micron
+ I_g_on_n[2][10] = 2.74e-9;
+ I_g_on_n[2][20] = 2.74e-9;
+ I_g_on_n[2][30] = 2.74e-9;
+ I_g_on_n[2][40] = 2.74e-9;
+ I_g_on_n[2][50] = 2.74e-9;
+ I_g_on_n[2][60] = 2.74e-9;
+ I_g_on_n[2][70] = 2.74e-9;
+ I_g_on_n[2][80] = 2.74e-9;
+ I_g_on_n[2][90] = 2.74e-9;
+ I_g_on_n[2][100] = 2.74e-9;
+
+
+
+ if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) {
+ //22 nm commodity DRAM cell access transistor technology parameters.
//parameters
curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
//2005 ITRS, the value was about twice the value in 2007 ITRS
@@ -1486,12 +1423,12 @@ void init_tech_params(double technology, bool is_tag)
curr_Wmemcella_dram = width_dram_access_transistor;
curr_Wmemcellpmos_dram = 0;
curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
+ curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2.
curr_asp_ratio_cell_dram = 0.667;
curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
//kept constant.
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
+ //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
curr_vpp = 2.3;//vpp. V
t_ox[3] = 3.5e-3;//micron
v_th[3] = 1.0;//V
@@ -1522,130 +1459,80 @@ void init_tech_params(double technology, bool is_tag)
I_off_n[3][90] = 1.18e-11;
I_off_n[3][100] = 1.72e-11;
- }
- else
- {
- //some error handler
+ } else {
+ //some error handler
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7;
+ curr_core_tx_density = 1.25 / 0.7 / 0.7;
+ curr_sckt_co_eff = 1.1296;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
}
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if(tech == 16){
- //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
- //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
- //16 nm HP
- vdd[0] = 0.7;
- Lphy[0] = 0.006;//Lphy is the physical gate-length.
- Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
- t_ox[0] = 0.5e-3;//micron
- v_th[0] = 0.1489;//V
- c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
- mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
- c_g_ideal[0] = 2.30e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
- c_junc[0] = 0;//F/micron2 MASTAR result dynamic
- I_on_n[0] = 2768.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/2.655;
- I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07;
- I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07;
- I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07;
- I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07;
- I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07;
- I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07;
- I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07;
- I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07;
- I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07;
- I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07;
- I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07;
- //for 16nm DG HP
- I_g_on_n[0][0] = 1.07e-9;//A/micron
- I_g_on_n[0][10] = 1.07e-9;
- I_g_on_n[0][20] = 1.07e-9;
- I_g_on_n[0][30] = 1.07e-9;
- I_g_on_n[0][40] = 1.07e-9;
- I_g_on_n[0][50] = 1.07e-9;
- I_g_on_n[0][60] = 1.07e-9;
- I_g_on_n[0][70] = 1.07e-9;
- I_g_on_n[0][80] = 1.07e-9;
- I_g_on_n[0][90] = 1.07e-9;
- I_g_on_n[0][100] = 1.07e-9;
-
-// //16 nm LSTP DG
-// vdd[1] = 0.8;
-// Lphy[1] = 0.014;
-// Lelec[1] = 0.008;//Lelec is the electrical gate-length.
-// t_ox[1] = 1.1e-3;//micron
-// v_th[1] = 0.40126;//V
-// c_ox[1] = 2.30e-14;//F/micron2
-// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
-// Vdsat[1] = 6.64e-2; //V/micron
-// c_g_ideal[1] = 3.22e-16;//F/micron
-// c_fringe[1] = 0.008e-15;
-// c_junc[1] = 0;//F/micron2
-// I_on_n[1] = 727.6e-6;//A/micron
-// I_on_p[1] = I_on_n[1] / 2;
-// nmos_effective_resistance_multiplier = 1.99;
-// n_to_p_eff_curr_drv_ratio[1] = 2;
-// gmp_to_gmn_multiplier[1] = 0.99;
-// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
-// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
-// I_off_n[1][0] = 2.43e-11;
-// I_off_n[1][10] = 4.85e-11;
-// I_off_n[1][20] = 9.68e-11;
-// I_off_n[1][30] = 1.94e-10;
-// I_off_n[1][40] = 3.87e-10;
-// I_off_n[1][50] = 7.73e-10;
-// I_off_n[1][60] = 3.55e-10;
-// I_off_n[1][70] = 3.09e-9;
-// I_off_n[1][80] = 6.19e-9;
-// I_off_n[1][90] = 1.24e-8;
-// I_off_n[1][100]= 2.48e-8;
-//
-// // for 22nm LSTP HP
-// I_g_on_n[1][0] = 4.51e-10;//A/micron
-// I_g_on_n[1][10] = 4.51e-10;
-// I_g_on_n[1][20] = 4.51e-10;
-// I_g_on_n[1][30] = 4.51e-10;
-// I_g_on_n[1][40] = 4.51e-10;
-// I_g_on_n[1][50] = 4.51e-10;
-// I_g_on_n[1][60] = 4.51e-10;
-// I_g_on_n[1][70] = 4.51e-10;
-// I_g_on_n[1][80] = 4.51e-10;
-// I_g_on_n[1][90] = 4.51e-10;
-// I_g_on_n[1][100] = 4.51e-10;
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
+ if (tech == 16) {
+ //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
+ //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
+ //16 nm HP
+ vdd[0] = 0.7;
+ Lphy[0] = 0.006;//Lphy is the physical gate-length.
+ Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
+ t_ox[0] = 0.5e-3;//micron
+ v_th[0] = 0.1489;//V
+ c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
+ mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
+ c_g_ideal[0] = 2.30e-16;//F/micron
+ c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
+ c_junc[0] = 0;//F/micron2 MASTAR result dynamic
+ I_on_n[0] = 2768.4e-6;//A/micron
+ I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
+ n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
+ //"Dynamic" tab of Device workspace.
+ gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1 / 2.655;
+ I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2 * 1.07;
+ //for 16nm DG HP
+ I_g_on_n[0][0] = 1.07e-9;//A/micron
+ I_g_on_n[0][10] = 1.07e-9;
+ I_g_on_n[0][20] = 1.07e-9;
+ I_g_on_n[0][30] = 1.07e-9;
+ I_g_on_n[0][40] = 1.07e-9;
+ I_g_on_n[0][50] = 1.07e-9;
+ I_g_on_n[0][60] = 1.07e-9;
+ I_g_on_n[0][70] = 1.07e-9;
+ I_g_on_n[0][80] = 1.07e-9;
+ I_g_on_n[0][90] = 1.07e-9;
+ I_g_on_n[0][100] = 1.07e-9;
+
+ if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) {
+ //22 nm commodity DRAM cell access transistor technology parameters.
//parameters
curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
//2005 ITRS, the value was about twice the value in 2007 ITRS
@@ -1659,12 +1546,12 @@ void init_tech_params(double technology, bool is_tag)
curr_Wmemcella_dram = width_dram_access_transistor;
curr_Wmemcellpmos_dram = 0;
curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
+ curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2.
curr_asp_ratio_cell_dram = 0.667;
curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
//kept constant.
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
+ //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
curr_vpp = 2.3;//vpp. V
t_ox[3] = 3.5e-3;//micron
v_th[3] = 1.0;//V
@@ -1695,930 +1582,766 @@ void init_tech_params(double technology, bool is_tag)
I_off_n[3][90] = 1.18e-11;
I_off_n[3][100] = 1.72e-11;
- }
- else
- {
- //some error handler
+ } else {
+ //some error handler
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7 * 0.7;
+ curr_core_tx_density = 1.25 / 0.7 / 0.7 / 0.7;
+ curr_sckt_co_eff = 1.1296;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
}
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type];
+ g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
+ g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
+ g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
+ g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
+ g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
+ g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
+ g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
+ g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
+ g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
+ g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
+ g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
+ g_tp.peri_global.n_to_p_eff_curr_drv_ratio
+ += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
+ g_tp.peri_global.long_channel_leakage_reduction
+ += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
+ g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
+ gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
+
+ g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
+ g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
+ g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
+ g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
+ g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
+ g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
+ g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
+ g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
+ g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
+ g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
+ g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
+ g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
+ g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
+ g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+
+ g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
+ g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
+ g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
+ g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
+ g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
+ g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
+ g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
+ g_tp.vpp += curr_alpha * curr_vpp;
+ g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
+ g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
+ g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
+ g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
+ g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
+ g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
+ g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
+ g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
+
+ g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
+ g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
+ g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
+ g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
+ g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
+ g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
+ g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
+ g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
+ g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
+ g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
+ g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
+ g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
+ g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
+ g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+
+ g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
+ g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
+ g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
+ area_cell_dram += curr_alpha * curr_area_cell_dram;
+ asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
+
+ g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
+ g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
+ g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
+ area_cell_sram += curr_alpha * curr_area_cell_sram;
+ asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
+
+ g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
+ g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
+ g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
+ area_cell_cam += curr_alpha * curr_area_cell_cam;
+ asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
+
+ //Sense amplifier latch Gm calculation
+ mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
+ Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
- g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type];
- g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
- g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
- g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
- g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
- g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
- g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
- g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
- g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
- g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
- g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
- g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
- g_tp.peri_global.n_to_p_eff_curr_drv_ratio
- += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
- g_tp.peri_global.long_channel_leakage_reduction
- += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
- g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
-
- g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
- g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
- g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
- g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
- g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
- g_tp.vpp += curr_alpha * curr_vpp;
- g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
- g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
- g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
- g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
-
- g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
- g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
- g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
- area_cell_dram += curr_alpha * curr_area_cell_dram;
- asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
-
- g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
- g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
- g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
- area_cell_sram += curr_alpha * curr_area_cell_sram;
- asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
-
- g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
- g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
- g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
- area_cell_cam += curr_alpha * curr_area_cell_cam;
- asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
-
- //Sense amplifier latch Gm calculation
- mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
- Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
-
- //Empirical undifferetiated core/FU coefficient
- g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
- g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
- g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
- g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
- g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
- }
-
-
- //Currently we are not modeling the resistance/capacitance of poly anywhere.
- //Continuous function (or date have been processed) does not need linear interpolation
- g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
- g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
- g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
- g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
- g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
- g_tp.cell_h_def = 50 * g_ip->F_sz_um;
- g_tp.w_poly_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
- g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
-
- g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
- g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
- g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process
- g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process
- g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process
- g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process
- g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
- g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_;
-
- if (ram_cell_tech_type == comm_dram)
- {
- g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
- g_tp.h_dec = 8; // in the unit of memory cell height
- }
- else
- {
- g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
- g_tp.h_dec = 4; // in the unit of memory cell height
- }
-
- g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
- g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
- g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
-
- g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
- g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
- //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
-
- g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
-
- double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
- double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
- g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch;
-
- g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
- g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
- g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
- g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
- g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
- g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
-
- g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
- g_tp.sram.Vbitpre = vdd[ram_cell_tech_type];
- g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng
- pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
- g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
-
-
- double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
-
- for (iter=0; iter<=1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
+ //Empirical undifferetiated core/FU coefficient
+ g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
+ g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
+ g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
+ g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
+ g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
}
- if (tech == 180)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.0;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.017;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.75;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.75;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.2;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 1.5;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0]= 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.017;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.75;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.75;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.98;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.18;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
- wire_r_per_micron[1][3] = 12 / 0.18;
- }
- else if (tech == 90)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.4;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.01;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.48;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.48;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.7;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.96;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.008;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.48;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.48;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.1;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.09;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
- wire_r_per_micron[1][3] = 12 / 0.09;
- }
- else if (tech == 65)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 2.7;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.405;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.303;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.7;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.405;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.303;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.8;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.81;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.303;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.006;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.405;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.734;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.405;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.734;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.77;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.734;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.065;
- wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
- wire_r_per_micron[1][3] = 12 / 0.065;
- }
- else if (tech == 45)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.315;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.958;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.315;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.958;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.63;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.958;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.004;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.315;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.46;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.315;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.46;
- vert_dielectric_constant[1][1] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.55;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.46;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.045;
- wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
- wire_r_per_micron[1][3] = 12 / 0.045;
- }
- else if (tech == 32)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.21;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.664;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.21;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.664;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.42;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.664;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.21;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.214;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- aspect_ratio[1][1] = 2.0;
- wire_width = wire_pitch[1][1] / 2;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.21;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.214;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.385;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.214;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.032;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
+
+ //Currently we are not modeling the resistance/capacitance of poly anywhere.
+ //Continuous function (or date have been processed) does not need linear interpolation
+ g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
+ g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
+ g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
+ g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
+ g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
+ g_tp.cell_h_def = 50 * g_ip->F_sz_um;
+ g_tp.w_poly_contact = g_ip->F_sz_um;
+ g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
+ g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
+ g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
+
+ g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
+ g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
+ //was 10 micron for the 0.8 micron process
+ g_tp.w_iso = 12.5 * g_ip->F_sz_um;
+ // sense amplifier N-trans; was 3 micron for the 0.8 micron process
+ g_tp.w_sense_n = 3.75 * g_ip->F_sz_um;
+ // sense amplifier P-trans; was 6 micron for the 0.8 micron process
+ g_tp.w_sense_p = 7.5 * g_ip->F_sz_um;
+ // Sense enable transistor of the sense amplifier; was 4 micron for the
+ //0.8 micron process
+ g_tp.w_sense_en = 5 * g_ip->F_sz_um;
+ g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
+ g_tp.w_nmos_sa_mux= 6 * g_tp.min_w_nmos_;
+
+ if (ram_cell_tech_type == comm_dram) {
+ g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
+ g_tp.h_dec = 8; // in the unit of memory cell height
+ } else {
+ g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
+ g_tp.h_dec = 4; // in the unit of memory cell height
}
- else if (tech == 22)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.15;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.414;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.15;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.414;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.3;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.414;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.15;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.104;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.15;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.104;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
+
+ g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
+ g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
+ g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
+
+ g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
+ g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
+ //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
+
+ g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
+
+ double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
+ double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
+ g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch;
+
+ g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
+ g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
+ g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
+ g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
+ g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
+ g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
+
+ g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
+ g_tp.sram.Vbitpre = vdd[ram_cell_tech_type];
+ g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng
+ pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
+ g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
+
+
+ double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
+
+ for (iter = 0; iter <= 1; ++iter) {
+ // linear interpolation
+ if (iter == 0) {
+ tech = tech_lo;
+ if (tech_lo == tech_hi) {
+ curr_alpha = 1;
+ } else {
+ curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi);
+ }
+ } else {
+ tech = tech_hi;
+ if (tech_lo == tech_hi) {
+ break;
+ } else {
+ curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi);
+ }
+ }
+
+ if (tech == 180) {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
+ aspect_ratio[0][0] = 2.0;
+ wire_width = wire_pitch[0][0] / 2; //micron
+ wire_thickness = aspect_ratio[0][0] * wire_width;//micron
+ wire_spacing = wire_pitch[0][0] - wire_width;//micron
+ barrier_thickness = 0.017;//micron
+ dishing_thickness = 0;//micron
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
+ ild_thickness[0][0] = 0.75;//micron
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.709;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
+ vert_dielectric_constant[0][0],
+ fringe_cap);//F/micron.
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.4;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.75;//micron
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.709;
+ vert_dielectric_constant[0][1] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.2;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 1.5;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.709;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.017;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.75;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 3.038;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
+ vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.75;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 3.038;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
+ vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 1.98;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 3.038;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.18;
+ wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
+ wire_r_per_micron[1][3] = 12 / 0.18;
+ } else if (tech == 90) {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
+ aspect_ratio[0][0] = 2.4;
+ wire_width = wire_pitch[0][0] / 2; //micron
+ wire_thickness = aspect_ratio[0][0] * wire_width;//micron
+ wire_spacing = wire_pitch[0][0] - wire_width;//micron
+ barrier_thickness = 0.01;//micron
+ dishing_thickness = 0;//micron
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
+ ild_thickness[0][0] = 0.48;//micron
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.709;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
+ vert_dielectric_constant[0][0],
+ fringe_cap);//F/micron.
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.4;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.48;//micron
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.709;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.7;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.96;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.709;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.008;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.48;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 3.038;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
+ vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.48;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 3.038;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
+ vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 1.1;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 3.038;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.09;
+ wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
+ wire_r_per_micron[1][3] = 12 / 0.09;
+ } else if (tech == 65) {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 2.7;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.405;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.303;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.7;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.405;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.303;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.8;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.81;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.303;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.006;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.405;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.734;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.405;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.734;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.77;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.734;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.065;
+ wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
+ wire_r_per_micron[1][3] = 12 / 0.065;
+ } else if (tech == 45) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.315;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.958;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.315;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.958;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.63;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.958;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.004;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.315;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.46;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.315;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.46;
+ vert_dielectric_constant[1][1] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.55;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.46;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.045;
+ wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
+ wire_r_per_micron[1][3] = 12 / 0.045;
+ } else if (tech == 32) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.21;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.664;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.21;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.664;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.42;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.664;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.003;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.21;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.214;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ aspect_ratio[1][1] = 2.0;
+ wire_width = wire_pitch[1][1] / 2;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.21;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.214;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
wire_pitch[1][2] = 8 * g_ip->F_sz_um;
aspect_ratio[1][2] = 2.2;
@@ -2627,184 +2350,210 @@ void init_tech_params(double technology, bool is_tag)
wire_spacing = wire_pitch[1][2] - wire_width;
dishing_thickness = 0.1 * wire_thickness;
wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.385;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.214;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.032;//micron
+ wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
+ wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
+ } else if (tech == 22) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.15;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.414;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.15;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.414;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.3;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.414;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.003;
+ dishing_thickness = 0;
+ alpha_scatter = 1.05;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.15;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.104;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.15;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.104;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
ild_thickness[1][2] = 0.275;
miller_value[1][2] = 1.5;
horiz_dielectric_constant[1][2] = 2.104;
vert_dielectric_constant[1][2] = 3.9;
wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
//Nominal projections for commodity DRAM wordline/bitline
wire_pitch[1][3] = 2 * 0.022;//micron
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron
wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
}
- else if (tech == 16)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.108;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.202;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- aspect_ratio[0][1] = 3.0;
- wire_width = wire_pitch[0][1] / 2;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.108;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.202;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.216;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.202;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.002;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.108;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 1.998;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.108;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 1.998;
- vert_dielectric_constant[1][1] = 3.9;
+ else if (tech == 16) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.108;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.202;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
+ aspect_ratio[0][1] = 3.0;
+ wire_width = wire_pitch[0][1] / 2;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.108;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.202;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.216;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.202;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.002;
+ dishing_thickness = 0;
+ alpha_scatter = 1.05;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.108;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 1.998;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.108;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 1.998;
+ vert_dielectric_constant[1][1] = 3.9;
wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
wire_pitch[1][2] = 8 * g_ip->F_sz_um;
aspect_ratio[1][2] = 2.2;
@@ -2813,109 +2562,101 @@ void init_tech_params(double technology, bool is_tag)
wire_spacing = wire_pitch[1][2] - wire_width;
dishing_thickness = 0.1 * wire_thickness;
wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
ild_thickness[1][2] = 0.198;
miller_value[1][2] = 1.5;
horiz_dielectric_constant[1][2] = 1.998;
vert_dielectric_constant[1][2] = 3.9;
wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
//Nominal projections for commodity DRAM wordline/bitline
wire_pitch[1][3] = 2 * 0.016;//micron
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron
wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
}
- g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
-
- g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type];
-
- g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type];
-
- g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2;
-
- g_tp.sense_delay += curr_alpha *SENSE_AMP_D;
- g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P;
-// g_tp.horiz_dielectric_constant += horiz_dielectric_constant;
-// g_tp.vert_dielectric_constant += vert_dielectric_constant;
-// g_tp.aspect_ratio += aspect_ratio;
-// g_tp.miller_value += miller_value;
-// g_tp.ild_thickness += ild_thickness;
-
- }
- g_tp.fringe_cap = fringe_cap;
-
- double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
- double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
- double tf = rd * c_load;
- g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
- double KLOAD = 1;
- c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
- tf = rd * c_load;
- g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
+ g_tp.wire_local.pitch += curr_alpha *
+ wire_pitch[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.R_per_um += curr_alpha *
+ wire_r_per_micron[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.C_per_um += curr_alpha *
+ wire_c_per_micron[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.aspect_ratio += curr_alpha *
+ aspect_ratio[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.ild_thickness += curr_alpha *
+ ild_thickness[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.miller_value += curr_alpha *
+ miller_value[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.horiz_dielectric_constant += curr_alpha *
+ horiz_dielectric_constant[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.vert_dielectric_constant += curr_alpha *
+ vert_dielectric_constant[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+
+ g_tp.wire_inside_mat.pitch += curr_alpha *
+ wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.R_per_um += curr_alpha *
+ wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.C_per_um += curr_alpha *
+ wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.aspect_ratio += curr_alpha *
+ aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.ild_thickness += curr_alpha *
+ ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.miller_value += curr_alpha *
+ miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha *
+ horiz_dielectric_constant[g_ip->ic_proj_type]
+ [g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha *
+ vert_dielectric_constant [g_ip->ic_proj_type]
+ [g_ip->wire_is_mat_type];
+
+ g_tp.wire_outside_mat.pitch += curr_alpha *
+ wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.R_per_um += curr_alpha *
+ wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.C_per_um += curr_alpha *
+ wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.aspect_ratio += curr_alpha *
+ aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.ild_thickness += curr_alpha *
+ ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.miller_value += curr_alpha *
+ miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha *
+ horiz_dielectric_constant[g_ip->ic_proj_type]
+ [g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha *
+ vert_dielectric_constant [g_ip->ic_proj_type]
+ [g_ip->wire_os_mat_type];
+
+ g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um *
+ g_tp.wire_inside_mat.C_per_um / 2;
+
+ g_tp.sense_delay += curr_alpha * SENSE_AMP_D;
+ g_tp.sense_dy_power += curr_alpha * SENSE_AMP_P;
+
+ }
+ g_tp.fringe_cap = fringe_cap;
+
+ double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
+ double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
+ double tf = rd * c_load;
+ g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
+ double KLOAD = 1;
+ c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
+ tf = rd * c_load;
+ g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
}
diff --git a/ext/mcpat/cacti/uca.cc b/ext/mcpat/cacti/uca.cc
index 568cd9e44..703ad470f 100755
--- a/ext/mcpat/cacti/uca.cc
+++ b/ext/mcpat/cacti/uca.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -37,390 +38,390 @@
#include "uca.h"
UCA::UCA(const DynamicParameter & dyn_p)
- :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
-{
- int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
- int num_banks_hor_dir = nbanks/num_banks_ver_dir;
-
- if (dp.use_inp_params)
- {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else
- {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
- }
-
- num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
- num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
- num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
- num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
- num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
-
- if (!dp.fully_assoc && !dp.pure_cam)
- {
-
- if (g_ip->fast_access && dp.is_tag == false)
- {
- num_do_b_bank *= g_ip->data_assoc;
- }
-
- htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
- htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- }
-
- else
- {
-
- htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
- htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- }
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
-
- area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
+ : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) {
+ int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)
+ / 2 : (_log2(nbanks) - _log2(nbanks) / 2));
+ int num_banks_hor_dir = nbanks / num_banks_ver_dir;
+
+ if (dp.use_inp_params) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+ }
+
+ num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) *
+ (RWP + ERP + EWP);
+ num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
+ num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
+ num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
+ num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
+
+ if (!dp.fully_assoc && !dp.pure_cam) {
+
+ if (g_ip->fast_access && dp.is_tag == false) {
+ num_do_b_bank *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0,
+ num_do_b_bank, 0, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Add_htree, true);
+ htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0,
+ num_do_b_bank, 0, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_in_htree, true);
+ htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0,
+ num_do_b_bank, 0, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_out_htree, true);
+ }
+
+ else {
+
+ htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank, num_so_b_bank,
+ num_banks_ver_dir * 2, num_banks_hor_dir * 2,
+ Add_htree, true);
+ htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank, num_so_b_bank,
+ num_banks_ver_dir * 2, num_banks_hor_dir * 2,
+ Data_in_htree, true);
+ htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank,
+ num_so_b_bank, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_out_htree, true);
+ htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank,
+ num_so_b_bank, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_in_htree, true);
+ htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank,
+ num_so_b_bank, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_out_htree,
+ true);
+ }
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+
+ area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
// cout<<"area cell"<<area_all_dataramcells<<endl;
// cout<<area.get_area()<<endl;
- // delay calculation
- double inrisetime = 0.0;
- compute_delays(inrisetime);
- compute_power_energy();
+ // delay calculation
+ double inrisetime = 0.0;
+ compute_delays(inrisetime);
+ compute_power_energy();
}
-UCA::~UCA()
-{
- delete htree_in_add;
- delete htree_in_data;
- delete htree_out_data;
+UCA::~UCA() {
+ delete htree_in_add;
+ delete htree_in_data;
+ delete htree_out_data;
}
-double UCA::compute_delays(double inrisetime)
-{
- double outrisetime = bank.compute_delays(inrisetime);
-
- double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
- double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
- delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
- bank.mat.sa_mux_lev_1_predec->delay +
- bank.mat.sa_mux_lev_1_dec->delay;
- delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
- bank.mat.sa_mux_lev_2_predec->delay +
- bank.mat.sa_mux_lev_2_dec->delay;
- double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
-
- delay_before_subarray_output_driver =
- MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
- delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
- MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
- delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
- delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
- bank.htree_out_data->delay + htree_out_data->delay;
- access_time = bank.mat.delay_comparator;
-
- double ram_delay_inside_mat;
- if (dp.fully_assoc)
- {
- //delay of FA contains both CAM tag and RAM data
- { //delay of CAM
- ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
- access_time = htree_in_add->delay + bank.htree_in_add->delay;
- //delay of fully-associative data array
- access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
+double UCA::compute_delays(double inrisetime) {
+ double outrisetime = bank.compute_delays(inrisetime);
+
+ double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
+ double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
+ delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
+ bank.mat.sa_mux_lev_1_predec->delay +
+ bank.mat.sa_mux_lev_1_dec->delay;
+ delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
+ bank.mat.sa_mux_lev_2_predec->delay +
+ bank.mat.sa_mux_lev_2_dec->delay;
+ double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
+
+ delay_before_subarray_output_driver =
+ MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
+ delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
+ MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
+ delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
+ delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
+ bank.htree_out_data->delay + htree_out_data->delay;
+ access_time = bank.mat.delay_comparator;
+
+ double ram_delay_inside_mat;
+ if (dp.fully_assoc) {
+ //delay of FA contains both CAM tag and RAM data
+ { //delay of CAM
+ ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
+ access_time = htree_in_add->delay + bank.htree_in_add->delay;
+ //delay of fully-associative data array
+ access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
+ }
+ } else {
+ access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
}
- }
- else
- {
- access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
- }
-
- if (dp.is_main_mem)
- {
- double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
- double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
- delay_from_subarray_out_drv_to_out;
- access_time = t_rcd + cas_latency;
- }
-
- double temp;
-
- if (!dp.fully_assoc)
- {
- temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
- if (dp.is_dram)
- {
- temp += bank.mat.delay_writeback; // temp stores random cycle time
+
+ if (dp.is_main_mem) {
+ double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
+ double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
+ delay_from_subarray_out_drv_to_out;
+ access_time = t_rcd + cas_latency;
+ }
+
+ double temp;
+
+ if (!dp.fully_assoc) {
+ temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
+ if (dp.is_dram) {
+ temp += bank.mat.delay_writeback; // temp stores random cycle time
+ }
+
+
+ temp = MAX(temp, bank.mat.r_predec->delay);
+ temp = MAX(temp, bank.mat.b_mux_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
+ } else {
+ ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
+ temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
+ + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
+
+ temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
+ temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
+ }
+
+ // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
+ if (g_ip->rpters_in_htree == false) {
+ temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
+ }
+ cycle_time = temp;
+
+ double delay_req_network = max_delay_before_row_decoder;
+ double delay_rep_network = delay_from_subarray_out_drv_to_out;
+ multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
+
+ if (dp.is_main_mem) {
+ multisubbank_interleave_cycle_time = htree_in_add->delay;
+ precharge_delay = htree_in_add->delay +
+ bank.htree_in_add->delay + bank.mat.delay_writeback +
+ bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
+ cycle_time = access_time + precharge_delay;
+ } else {
+ precharge_delay = 0;
}
+ double dram_array_availability = 0;
+ if (dp.is_dram) {
+ dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
+ }
- temp = MAX(temp, bank.mat.r_predec->delay);
- temp = MAX(temp, bank.mat.b_mux_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
- }
- else
- {
- ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
- temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
- + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
-
- temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
- temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
- }
-
- // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
- if (g_ip->rpters_in_htree == false)
- {
- temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
- }
- cycle_time = temp;
-
- double delay_req_network = max_delay_before_row_decoder;
- double delay_rep_network = delay_from_subarray_out_drv_to_out;
- multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
-
- if (dp.is_main_mem)
- {
- multisubbank_interleave_cycle_time = htree_in_add->delay;
- precharge_delay = htree_in_add->delay +
- bank.htree_in_add->delay + bank.mat.delay_writeback +
- bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
- cycle_time = access_time + precharge_delay;
- }
- else
- {
- precharge_delay = 0;
- }
-
- double dram_array_availability = 0;
- if (dp.is_dram)
- {
- dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
- }
-
- return outrisetime;
+ return outrisetime;
}
// note: currently, power numbers are for a bank of an array
-void UCA::compute_power_energy()
-{
- bank.compute_power_energy();
- power = bank.power;
-
- power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
- power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
- if (dp.fully_assoc || dp.pure_cam)
- power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
-
- power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
- htree_in_data->power.readOp.leakage +
- htree_out_data->power.readOp.leakage;
-
- power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
- htree_in_data->power.readOp.gate_leakage +
- htree_out_data->power.readOp.gate_leakage;
- if (dp.fully_assoc || dp.pure_cam)
- {
+void UCA::compute_power_energy() {
+ bank.compute_power_energy();
+ power = bank.power;
+
+ power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
+ power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
+ if (dp.fully_assoc || dp.pure_cam)
+ power_routing_to_bank.searchOp.dynamic =
+ htree_in_search->power.searchOp.dynamic +
+ htree_out_search->power.searchOp.dynamic;
+
+ power_routing_to_bank.readOp.leakage +=
+ htree_in_add->power.readOp.leakage +
+ htree_in_data->power.readOp.leakage +
+ htree_out_data->power.readOp.leakage;
+
+ power_routing_to_bank.readOp.gate_leakage +=
+ htree_in_add->power.readOp.gate_leakage +
+ htree_in_data->power.readOp.gate_leakage +
+ htree_out_data->power.readOp.gate_leakage;
+ if (dp.fully_assoc || dp.pure_cam) {
power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
- }
-
- power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
- power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
- power.readOp.leakage += power_routing_to_bank.readOp.leakage;
- power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
-
- // calculate total write energy per access
- power.writeOp.dynamic = power.readOp.dynamic
- - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
- + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- - power_routing_to_bank.readOp.dynamic
- + power_routing_to_bank.writeOp.dynamic
- + bank.htree_in_data->power.readOp.dynamic
- - bank.htree_out_data->power.readOp.dynamic;
-
- if (dp.is_dram == false)
- {
- power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- }
-
- dyn_read_energy_from_closed_page = power.readOp.dynamic;
- dyn_read_energy_from_open_page = power.readOp.dynamic -
- (bank.mat.r_predec->power.readOp.dynamic +
- bank.mat.power_row_decoders.readOp.dynamic +
- bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
- bank.mat.power_sa.readOp.dynamic +
- bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
-
- dyn_read_energy_remaining_words_in_burst =
- (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
- ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
- bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
- bank.htree_out_data->power.readOp.dynamic +
- power_routing_to_bank.readOp.dynamic);
- dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
- dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
-
- activate_energy = htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
- (bank.mat.r_predec->power.readOp.dynamic +
- bank.mat.power_row_decoders.readOp.dynamic +
- bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
- read_energy = (htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
- (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
- bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
- bank.htree_out_data->power.readOp.dynamic +
- htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
- write_energy = (htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
- htree_in_data->power.readOp.dynamic +
- bank.htree_in_data->power.readOp.dynamic +
- (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
- precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
- bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_closed_page =
- (bank.mat.r_predec->power.readOp.leakage +
- bank.mat.b_mux_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
- bank.mat.power_row_decoders.readOp.leakage +
- bank.mat.power_bit_mux_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
- bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_closed_page +=
- (bank.mat.r_predec->power.readOp.gate_leakage +
- bank.mat.b_mux_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
- bank.mat.power_row_decoders.readOp.gate_leakage +
- bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
- //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_open_page =
- (bank.mat.r_predec->power.readOp.leakage +
- bank.mat.b_mux_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
- bank.mat.power_row_decoders.readOp.leakage +
- bank.mat.power_bit_mux_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
- bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_open_page +=
- (bank.mat.r_predec->power.readOp.gate_leakage +
- bank.mat.b_mux_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
- bank.mat.power_row_decoders.readOp.gate_leakage +
- bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
- //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_request_and_reply_networks =
- power_routing_to_bank.readOp.leakage +
- bank.htree_in_add->power.readOp.leakage +
- bank.htree_in_data->power.readOp.leakage +
- bank.htree_out_data->power.readOp.leakage;
-
- leak_power_request_and_reply_networks +=
- power_routing_to_bank.readOp.gate_leakage +
- bank.htree_in_add->power.readOp.gate_leakage +
- bank.htree_in_data->power.readOp.gate_leakage +
- bank.htree_out_data->power.readOp.gate_leakage;
-
- if (dp.fully_assoc || dp.pure_cam)
- {
+ }
+
+ power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
+ power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
+ power.readOp.leakage += power_routing_to_bank.readOp.leakage;
+ power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
+
+ // calculate total write energy per access
+ power.writeOp.dynamic = power.readOp.dynamic
+ - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ - power_routing_to_bank.readOp.dynamic
+ + power_routing_to_bank.writeOp.dynamic
+ + bank.htree_in_data->power.readOp.dynamic
+ - bank.htree_out_data->power.readOp.dynamic;
+
+ if (dp.is_dram == false) {
+ power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ }
+
+ dyn_read_energy_from_closed_page = power.readOp.dynamic;
+ dyn_read_energy_from_open_page = power.readOp.dynamic -
+ (bank.mat.r_predec->power.readOp.dynamic +
+ bank.mat.power_row_decoders.readOp.dynamic +
+ bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
+ bank.mat.power_sa.readOp.dynamic +
+ bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
+
+ dyn_read_energy_remaining_words_in_burst =
+ (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
+ ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
+ bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
+ bank.htree_out_data->power.readOp.dynamic +
+ power_routing_to_bank.readOp.dynamic);
+ dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
+ dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
+
+ activate_energy = htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
+ (bank.mat.r_predec->power.readOp.dynamic +
+ bank.mat.power_row_decoders.readOp.dynamic +
+ bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
+ read_energy = (htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
+ (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
+ bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
+ bank.htree_out_data->power.readOp.dynamic +
+ htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
+ write_energy = (htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
+ htree_in_data->power.readOp.dynamic +
+ bank.htree_in_data->power.readOp.dynamic +
+ (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
+ precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
+ bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_closed_page =
+ (bank.mat.r_predec->power.readOp.leakage +
+ bank.mat.b_mux_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
+ bank.mat.power_row_decoders.readOp.leakage +
+ bank.mat.power_bit_mux_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
+ bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_closed_page +=
+ (bank.mat.r_predec->power.readOp.gate_leakage +
+ bank.mat.b_mux_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ bank.mat.power_row_decoders.readOp.gate_leakage +
+ bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
+ //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_open_page =
+ (bank.mat.r_predec->power.readOp.leakage +
+ bank.mat.b_mux_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
+ bank.mat.power_row_decoders.readOp.leakage +
+ bank.mat.power_bit_mux_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
+ bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_open_page +=
+ (bank.mat.r_predec->power.readOp.gate_leakage +
+ bank.mat.b_mux_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ bank.mat.power_row_decoders.readOp.gate_leakage +
+ bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
+ //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_request_and_reply_networks =
+ power_routing_to_bank.readOp.leakage +
+ bank.htree_in_add->power.readOp.leakage +
+ bank.htree_in_data->power.readOp.leakage +
+ bank.htree_out_data->power.readOp.leakage;
+
+ leak_power_request_and_reply_networks +=
+ power_routing_to_bank.readOp.gate_leakage +
+ bank.htree_in_add->power.readOp.gate_leakage +
+ bank.htree_in_data->power.readOp.gate_leakage +
+ bank.htree_out_data->power.readOp.gate_leakage;
+
+ if (dp.fully_assoc || dp.pure_cam) {
leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
- }
-
-
- if (dp.is_dram)
- { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
- refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
- bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
- refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
- refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
- refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- refresh_power /= dp.dram_refresh_period;
- }
-
-
- if (dp.is_tag == false)
- {
- power.readOp.dynamic = dyn_read_energy_from_closed_page;
- power.writeOp.dynamic = dyn_read_energy_from_closed_page
- - dyn_read_energy_remaining_words_in_burst
- - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
- + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- + (power_routing_to_bank.writeOp.dynamic -
- power_routing_to_bank.readOp.dynamic -
- bank.htree_out_data->power.readOp.dynamic +
- bank.htree_in_data->power.readOp.dynamic) *
- (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
+ }
- if (dp.is_dram == false)
- {
- power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+
+ // if DRAM, add contribution of power spent in row predecoder drivers,
+ // blocks and decoders to refresh power
+ if (dp.is_dram) {
+ refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
+ bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
+ refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
+ refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
+ refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ refresh_power /= dp.dram_refresh_period;
}
- }
-
- // if DRAM, add refresh power to total leakage
- if (dp.is_dram)
- {
- power.readOp.leakage += refresh_power;
- }
-
- // TODO: below should be avoided.
- /*if (dp.is_main_mem)
- {
- power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
- }*/
-
- assert(power.readOp.dynamic > 0);
- assert(power.writeOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
+
+
+ if (dp.is_tag == false) {
+ power.readOp.dynamic = dyn_read_energy_from_closed_page;
+ power.writeOp.dynamic = dyn_read_energy_from_closed_page
+ - dyn_read_energy_remaining_words_in_burst
+ - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ + (power_routing_to_bank.writeOp.dynamic -
+ power_routing_to_bank.readOp.dynamic -
+ bank.htree_out_data->power.readOp.dynamic +
+ bank.htree_in_data->power.readOp.dynamic) *
+ (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
+
+ if (dp.is_dram == false) {
+ power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ }
+ }
+
+ // if DRAM, add refresh power to total leakage
+ if (dp.is_dram) {
+ power.readOp.leakage += refresh_power;
+ }
+
+ // TODO: below should be avoided.
+ /*if (dp.is_main_mem)
+ {
+ power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
+ }*/
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.writeOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
}
diff --git a/ext/mcpat/cacti/uca.h b/ext/mcpat/cacti/uca.h
index fdab14fc7..402035f9a 100755
--- a/ext/mcpat/cacti/uca.h
+++ b/ext/mcpat/cacti/uca.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -40,9 +41,8 @@
#include "htree2.h"
#include "parameter.h"
-class UCA : public Component
-{
- public:
+class UCA : public Component {
+public:
UCA(const DynamicParameter & dyn_p);
~UCA();
double compute_delays(double inrisetime); // returns outrisetime
@@ -66,7 +66,10 @@ class UCA : public Component
int num_do_b_bank;
int num_si_b_bank;
int num_so_b_bank;
- int RWP, ERP, EWP,SCHP;
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
double area_all_dataramcells;
double dyn_read_energy_from_closed_page;
diff --git a/ext/mcpat/cacti/wire.cc b/ext/mcpat/cacti/wire.cc
index 742000c85..b7d9e34ce 100644
--- a/ext/mcpat/cacti/wire.cc
+++ b/ext/mcpat/cacti/wire.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -41,173 +42,173 @@ Wire::Wire(
enum Wire_placement wp,
double resistivity,
TechnologyParameter::DeviceType *dt
- ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s),
- resistivity(resistivity), deviceType(dt)
-{
- wire_placement = wp;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- in_rise_time = 0;
- out_rise_time = 0;
- if (initialized != 1) {
- cout << "Wire not initialized. Initializing it with default values\n";
- Wire winit;
- }
- calculate_wire_stats();
- // change everything back to seconds, microns, and Joules
- repeater_spacing *= 1e6;
- wire_length *= 1e6;
- wire_width *= 1e6;
- wire_spacing *= 1e6;
- assert(wire_length > 0);
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
+ ): wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s),
+ s_scale(s_s),
+ resistivity(resistivity), deviceType(dt) {
+ wire_placement = wp;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ in_rise_time = 0;
+ out_rise_time = 0;
+ if (initialized != 1) {
+ cout << "Wire not initialized. Initializing it with default values\n";
+ Wire winit;
+ }
+ calculate_wire_stats();
+ // change everything back to seconds, microns, and Joules
+ repeater_spacing *= 1e6;
+ wire_length *= 1e6;
+ wire_width *= 1e6;
+ wire_spacing *= 1e6;
+ assert(wire_length > 0);
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
}
- // the following values are for peripheral global technology
- // specified in the input config file
- Component Wire::global;
- Component Wire::global_5;
- Component Wire::global_10;
- Component Wire::global_20;
- Component Wire::global_30;
- Component Wire::low_swing;
-
- int Wire::initialized;
- double Wire::wire_width_init;
- double Wire::wire_spacing_init;
-
-
-Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt)
-{
- w_scale = w_s;
- s_scale = s_s;
- deviceType = dt;
- wire_placement = wp;
- resistivity = resis;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
- in_rise_time = 0;
- out_rise_time = 0;
-
- switch (wire_placement)
- {
- case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break;
- case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break;
- default: wire_width = g_tp.wire_local.pitch; break;
- }
-
- wire_spacing = wire_width;
-
- wire_width *= (w_scale * 1e-6/2) /* (m) */;
- wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
-
- initialized = 1;
- init_wire();
- wire_width_init = wire_width;
- wire_spacing_init = wire_spacing;
-
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
+// the following values are for peripheral global technology
+// specified in the input config file
+Component Wire::global;
+Component Wire::global_5;
+Component Wire::global_10;
+Component Wire::global_20;
+Component Wire::global_30;
+Component Wire::low_swing;
+
+int Wire::initialized;
+double Wire::wire_width_init;
+double Wire::wire_spacing_init;
+
+
+Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis,
+ TechnologyParameter::DeviceType *dt) {
+ w_scale = w_s;
+ s_scale = s_s;
+ deviceType = dt;
+ wire_placement = wp;
+ resistivity = resis;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ in_rise_time = 0;
+ out_rise_time = 0;
+
+ switch (wire_placement) {
+ case outside_mat:
+ wire_width = g_tp.wire_outside_mat.pitch;
+ break;
+ case inside_mat :
+ wire_width = g_tp.wire_inside_mat.pitch;
+ break;
+ default:
+ wire_width = g_tp.wire_local.pitch;
+ break;
+ }
+
+ wire_spacing = wire_width;
+
+ wire_width *= (w_scale * 1e-6 / 2) /* (m) */;
+ wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */;
+
+ initialized = 1;
+ init_wire();
+ wire_width_init = wire_width;
+ wire_spacing_init = wire_spacing;
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
}
-Wire::~Wire()
-{
+Wire::~Wire() {
}
void
-Wire::calculate_wire_stats()
-{
-
- if (wire_placement == outside_mat) {
- wire_width = g_tp.wire_outside_mat.pitch;
- }
- else if (wire_placement == inside_mat) {
- wire_width = g_tp.wire_inside_mat.pitch;
- }
- else {
- wire_width = g_tp.wire_local.pitch;
- }
-
- wire_spacing = wire_width;
-
- wire_width *= (w_scale * 1e-6/2) /* (m) */;
- wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
-
-
- if (wt != Low_swing) {
-
- // delay_optimal_wire();
-
- if (wt == Global) {
- delay = global.delay * wire_length;
- power.readOp.dynamic = global.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global.area.w;
- repeater_size = global.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_5) {
- delay = global_5.delay * wire_length;
- power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_5.area.w;
- repeater_size = global_5.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_10) {
- delay = global_10.delay * wire_length;
- power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_10.area.w;
- repeater_size = global_10.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_20) {
- delay = global_20.delay * wire_length;
- power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_20.area.w;
- repeater_size = global_20.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_30) {
- delay = global_30.delay * wire_length;
- power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_30.area.w;
- repeater_size = global_30.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- out_rise_time = delay*repeater_spacing/deviceType->Vth;
- }
- else if (wt == Low_swing) {
- low_swing_model ();
- repeater_spacing = wire_length;
- repeater_size = 1;
- }
- else {
- assert(0);
- }
+Wire::calculate_wire_stats() {
+
+ if (wire_placement == outside_mat) {
+ wire_width = g_tp.wire_outside_mat.pitch;
+ } else if (wire_placement == inside_mat) {
+ wire_width = g_tp.wire_inside_mat.pitch;
+ } else {
+ wire_width = g_tp.wire_local.pitch;
+ }
+
+ wire_spacing = wire_width;
+
+ wire_width *= (w_scale * 1e-6 / 2) /* (m) */;
+ wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */;
+
+
+ if (wt != Low_swing) {
+
+ // delay_optimal_wire();
+
+ if (wt == Global) {
+ delay = global.delay * wire_length;
+ power.readOp.dynamic = global.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global.area.w;
+ repeater_size = global.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_5) {
+ delay = global_5.delay * wire_length;
+ power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_5.area.w;
+ repeater_size = global_5.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_10) {
+ delay = global_10.delay * wire_length;
+ power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_10.area.w;
+ repeater_size = global_10.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_20) {
+ delay = global_20.delay * wire_length;
+ power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_20.area.w;
+ repeater_size = global_20.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_30) {
+ delay = global_30.delay * wire_length;
+ power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_30.area.w;
+ repeater_size = global_30.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ }
+ out_rise_time = delay * repeater_spacing / deviceType->Vth;
+ } else if (wt == Low_swing) {
+ low_swing_model ();
+ repeater_spacing = wire_length;
+ repeater_size = 1;
+ } else {
+ assert(0);
+ }
}
@@ -218,51 +219,55 @@ Wire::calculate_wire_stats()
* inverters connected in series (refer: CACTI 1 Technical report,
* section 6.1.3)
*/
- double
-Wire::signal_fall_time ()
-{
-
- /* rise time of inverter 1's output */
- double rt;
- /* fall time of inverter 2's output */
- double ft;
- double timeconst;
-
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(min_w_pmos, PCH, 1);
- rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
- return ft;
+double
+Wire::signal_fall_time () {
+
+ /* rise time of inverter 1's output */
+ double rt;
+ /* fall time of inverter 2's output */
+ double ft;
+ double timeconst;
+
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(min_w_pmos, PCH, 1);
+ rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, FALL) /
+ (deviceType->Vdd - deviceType->Vth);
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth;
+ return ft;
}
-double Wire::signal_rise_time ()
-{
-
- /* rise time of inverter 1's output */
- double ft;
- /* fall time of inverter 2's output */
- double rt;
- double timeconst;
-
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(min_w_pmos, PCH, 1);
- ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
- return ft; //sec
+double Wire::signal_rise_time () {
+
+ /* rise time of inverter 1's output */
+ double ft;
+ /* fall time of inverter 2's output */
+ double rt;
+ double timeconst;
+
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth;
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(min_w_pmos, PCH, 1);
+ ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, FALL) /
+ (deviceType->Vdd - deviceType->Vth);
+ return ft; //sec
}
@@ -281,111 +286,110 @@ double Wire::signal_rise_time ()
*
*/
-double Wire::wire_cap (double len /* in m */, bool call_from_outside)
-{
- //TODO: this should be consistent with the wire_res in technology file
- double sidewall, adj, tot_cap;
- double wire_height;
- double epsilon0 = 8.8542e-12;
- double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness;
+double Wire::wire_cap (double len /* in m */, bool call_from_outside) {
+ //TODO: this should be consistent with the wire_res in technology file
+ double sidewall, adj, tot_cap;
+ double wire_height;
+ double epsilon0 = 8.8542e-12;
+ double aspect_ratio;
+ double horiz_dielectric_constant;
+ double vert_dielectric_constant;
+ double miller_value;
+ double ild_thickness;
+
+ switch (wire_placement) {
+ case outside_mat: {
+ aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
+ miller_value = g_tp.wire_outside_mat.miller_value;
+ ild_thickness = g_tp.wire_outside_mat.ild_thickness;
+ break;
+ }
+ case inside_mat : {
+ aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
+ miller_value = g_tp.wire_inside_mat.miller_value;
+ ild_thickness = g_tp.wire_inside_mat.ild_thickness;
+ break;
+ }
+ default: {
+ aspect_ratio = g_tp.wire_local.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
+ miller_value = g_tp.wire_local.miller_value;
+ ild_thickness = g_tp.wire_local.ild_thickness;
+ break;
+ }
+ }
- switch (wire_placement)
- {
- case outside_mat:
- {
- aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
- miller_value = g_tp.wire_outside_mat.miller_value;
- ild_thickness = g_tp.wire_outside_mat.ild_thickness;
- break;
- }
- case inside_mat :
- {
- aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
- miller_value = g_tp.wire_inside_mat.miller_value;
- ild_thickness = g_tp.wire_inside_mat.ild_thickness;
- break;
- }
- default:
- {
- aspect_ratio = g_tp.wire_local.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
- miller_value = g_tp.wire_local.miller_value;
- ild_thickness = g_tp.wire_local.ild_thickness;
- break;
- }
- }
-
- if (call_from_outside)
- {
- wire_width *= 1e-6;
- wire_spacing *= 1e-6;
- }
- wire_height = wire_width/w_scale*aspect_ratio;
- /*
- * assuming height does not change. wire_width = width_original*w_scale
- * So wire_height does not change as wire width increases
- */
+ if (call_from_outside) {
+ wire_width *= 1e-6;
+ wire_spacing *= 1e-6;
+ }
+ wire_height = wire_width / w_scale * aspect_ratio;
+ /*
+ * assuming height does not change. wire_width = width_original*w_scale
+ * So wire_height does not change as wire width increases
+ */
// capacitance between wires in the same level
// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
// * epsilon0;
- sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
- * epsilon0;
+ sidewall = miller_value * horiz_dielectric_constant *
+ (wire_height / wire_spacing)
+ * epsilon0;
- // capacitance between wires in adjacent levels
- //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
- //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
+ // capacitance between wires in adjacent levels
+ //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
+ //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
- adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
- //Change ild_thickness from micron to M
+ adj = miller_value * vert_dielectric_constant * wire_width /
+ (ild_thickness * 1e-6) * epsilon0;
+ //Change ild_thickness from micron to M
- //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
- tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
+ //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
+ tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
- if (call_from_outside)
- {
- wire_width *= 1e6;
- wire_spacing *= 1e6;
- }
- return (tot_cap*len); // (F)
+ if (call_from_outside) {
+ wire_width *= 1e6;
+ wire_spacing *= 1e6;
+ }
+ return (tot_cap*len); // (F)
}
- double
-Wire::wire_res (double len /*(in m)*/)
-{
-
- double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0;
- //TODO: this should be consistent with the wire_res in technology file
- //The whole computation should be consistent with the wire_res in technology.cc too!
-
- switch (wire_placement)
- {
- case outside_mat:
- {
- aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
- break;
- }
- case inside_mat :
- {
- aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
- break;
- }
- default:
- {
- aspect_ratio = g_tp.wire_local.aspect_ratio;
- break;
- }
- }
- return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)*
- (wire_width-2*barrier_thickness)));
+double
+Wire::wire_res (double len /*(in m)*/) {
+
+ double aspect_ratio;
+ double alpha_scatter = 1.05;
+ double dishing_thickness = 0;
+ double barrier_thickness = 0;
+ //TODO: this should be consistent with the wire_res in technology file
+ //The whole computation should be consistent with the wire_res in technology.cc too!
+
+ switch (wire_placement) {
+ case outside_mat: {
+ aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
+ break;
+ }
+ case inside_mat : {
+ aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
+ break;
+ }
+ default: {
+ aspect_ratio = g_tp.wire_local.aspect_ratio;
+ break;
+ }
+ }
+ return (alpha_scatter * resistivity * 1e-6 * len /
+ ((aspect_ratio*wire_width / w_scale - dishing_thickness -
+ barrier_thickness)*
+ (wire_width - 2*barrier_thickness)));
}
/*
@@ -395,438 +399,456 @@ Wire::wire_res (double len /*(in m)*/)
* low swing nmos delay, and the wire delay
* (ref: Technical report 6)
*/
- void
-Wire::low_swing_model()
-{
- double len = wire_length;
- double beta = pmos_to_nmos_sz_ratio();
-
-
- double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time;
-
- /* Final nmos low swing driver size calculation:
- * Try to size the driver such that the delay
- * is less than 8FO4.
- * If the driver size is greater than
- * the max allowable size, assume max size for the driver.
- * In either case, recalculate the delay using
- * the final driver size assuming slow input with
- * finite rise time instead of ideal step input
- *
- * (ref: Technical report 6)
- */
- double cwire = wire_cap(len); /* load capacitance */
- double rwire = wire_res(len);
+void
+Wire::low_swing_model() {
+ double len = wire_length;
+ double beta = pmos_to_nmos_sz_ratio();
+
+
+ double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time;
+
+ /* Final nmos low swing driver size calculation:
+ * Try to size the driver such that the delay
+ * is less than 8FO4.
+ * If the driver size is greater than
+ * the max allowable size, assume max size for the driver.
+ * In either case, recalculate the delay using
+ * the final driver size assuming slow input with
+ * finite rise time instead of ideal step input
+ *
+ * (ref: Technical report 6)
+ */
+ double cwire = wire_cap(len); /* load capacitance */
+ double rwire = wire_res(len);
#define RES_ADJ (8.6) // Increase in resistance due to low driving vol.
- double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ;
- double nsize = R_to_w(driver_res, NCH);
-
- nsize = MIN(nsize, g_tp.max_w_nmos_);
- nsize = MAX(nsize, g_tp.min_w_nmos_);
-
- if(rwire*cwire > 8*g_tp.FO4)
- {
- nsize = g_tp.max_w_nmos_;
- }
-
- // size the inverter appropriately to minimize the transmitter delay
- // Note - In order to minimize leakage, we are not adding a set of inverters to
- // bring down delay. Instead, we are sizing the single gate
- // based on the logical effort.
- double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0)
- + gate_C(2*min_w_pmos, 0)));
- double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff;
- double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0));
- inv_size = MAX(inv_size, 1);
-
- /* nand gate delay */
- double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1));
- double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(inv_size*g_tp.min_w_nmos_, 0) +
- gate_C(inv_size*min_w_pmos, 0);
-
- double timeconst = res_eq * cap_eq;
-
- delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- deviceType->Vth/deviceType->Vdd, RISE);
- double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd;
-
- inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */
-
- /* Inverter delay:
- * The load capacitance of this inv depends on
- * the gate capacitance of the final stage nmos
- * transistor which in turn depends on nsize
- */
- res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1);
- cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(nsize, 0);
- timeconst = res_eq * cap_eq;
-
- delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- deviceType->Vth/deviceType->Vdd, FALL);
- temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd;
-
-
- transmitter.delay = delay;
- transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/
- transmitter.power.readOp.leakage = deviceType->Vdd *
- (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
- 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
-
- transmitter.power.readOp.gate_leakage = deviceType->Vdd *
- (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
- 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
-
- inputrise = delay / deviceType->Vth;
-
- /* nmos delay + wire delay */
- cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 +
- nsense * sense_amp_input_cap(); //+receiver cap
- /*
- * NOTE: nmos is used as both pull up and pull down transistor
- * in the transmitter. This is because for low voltage swing, drive
- * resistance of nmos is less than pmos
- * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency)
- */
- timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire +
- drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) +
- rwire*cwire/2 +
- (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) *
- nsense * sense_amp_input_cap();
-
- /*
- * since we are pre-equalizing and overdriving the low
- * swing wires, the net time constant is less
- * than the actual value
- */
- delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0);
+ double driver_res = (-8 * g_tp.FO4 / (log(0.5) * cwire)) / RES_ADJ;
+ double nsize = R_to_w(driver_res, NCH);
+
+ nsize = MIN(nsize, g_tp.max_w_nmos_);
+ nsize = MAX(nsize, g_tp.min_w_nmos_);
+
+ if (rwire*cwire > 8*g_tp.FO4) {
+ nsize = g_tp.max_w_nmos_;
+ }
+
+ // size the inverter appropriately to minimize the transmitter delay
+ // Note - In order to minimize leakage, we are not adding a set of inverters to
+ // bring down delay. Instead, we are sizing the single gate
+ // based on the logical effort.
+ double st_eff = sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) /
+ (gate_C(2 * g_tp.min_w_nmos_, 0)
+ + gate_C(2 * min_w_pmos, 0)));
+ double req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff;
+ double inv_size = req_cin / (gate_C(min_w_pmos, 0) +
+ gate_C(g_tp.min_w_nmos_, 0));
+ inv_size = MAX(inv_size, 1);
+
+ /* nand gate delay */
+ double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1));
+ double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(inv_size * g_tp.min_w_nmos_, 0) +
+ gate_C(inv_size * min_w_pmos, 0);
+
+ double timeconst = res_eq * cap_eq;
+
+ delay = horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, RISE);
+ double temp_power = cap_eq * deviceType->Vdd * deviceType->Vdd;
+
+ inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */
+
+ /* Inverter delay:
+ * The load capacitance of this inv depends on
+ * the gate capacitance of the final stage nmos
+ * transistor which in turn depends on nsize
+ */
+ res_eq = tr_R_on(inv_size * min_w_pmos, PCH, 1);
+ cap_eq = drain_C_(inv_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(nsize, 0);
+ timeconst = res_eq * cap_eq;
+
+ delay += horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, FALL);
+ temp_power += cap_eq * deviceType->Vdd * deviceType->Vdd;
+
+
+ transmitter.delay = delay;
+ /* since it is a diff. model*/
+ transmitter.power.readOp.dynamic = temp_power * 2;
+ transmitter.power.readOp.leakage = deviceType->Vdd *
+ (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
+ 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
+
+ transmitter.power.readOp.gate_leakage = deviceType->Vdd *
+ (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
+ 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
+
+ inputrise = delay / deviceType->Vth;
+
+ /* nmos delay + wire delay */
+ cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2 +
+ nsense * sense_amp_input_cap(); //+receiver cap
+ /*
+ * NOTE: nmos is used as both pull up and pull down transistor
+ * in the transmitter. This is because for low voltage swing, drive
+ * resistance of nmos is less than pmos
+ * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency)
+ */
+ timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * (cwire +
+ drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2) +
+ rwire * cwire / 2 +
+ (tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) *
+ nsense * sense_amp_input_cap();
+
+ /*
+ * since we are pre-equalizing and overdriving the low
+ * swing wires, the net time constant is less
+ * than the actual value
+ */
+ delay += horowitz(inputrise, timeconst, deviceType->Vth /
+ deviceType->Vdd, .25, 0);
#define VOL_SWING .1
- temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */
- temp_power *= 2; /* differential wire */
-
- l_wire.delay = delay - transmitter.delay;
- l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic;
- l_wire.power.readOp.leakage = deviceType->Vdd*
- (4* cmos_Isub_leakage(nsize, 0, 1, nmos));
-
- l_wire.power.readOp.gate_leakage = deviceType->Vdd*
- (4* cmos_Ig_leakage(nsize, 0, 1, nmos));
-
- //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth;
-
- delay += g_tp.sense_delay;
-
- sense_amp.delay = g_tp.sense_delay;
- out_rise_time = g_tp.sense_delay/(deviceType->Vth);
- sense_amp.power.readOp.dynamic = g_tp.sense_dy_power;
- sense_amp.power.readOp.leakage = 0; //FIXME
- sense_amp.power.readOp.gate_leakage = 0;
-
- power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic;
- power.readOp.leakage = transmitter.power.readOp.leakage +
- l_wire.power.readOp.leakage +
- sense_amp.power.readOp.leakage;
- power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage +
- l_wire.power.readOp.gate_leakage +
- sense_amp.power.readOp.gate_leakage;
+ temp_power += cap_eq * VOL_SWING * .400; /* .4v is the over drive voltage */
+ temp_power *= 2; /* differential wire */
+
+ l_wire.delay = delay - transmitter.delay;
+ l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic;
+ l_wire.power.readOp.leakage = deviceType->Vdd *
+ (4 * cmos_Isub_leakage(nsize, 0, 1, nmos));
+
+ l_wire.power.readOp.gate_leakage = deviceType->Vdd *
+ (4 * cmos_Ig_leakage(nsize, 0, 1, nmos));
+
+ //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
+ // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth;
+
+ delay += g_tp.sense_delay;
+
+ sense_amp.delay = g_tp.sense_delay;
+ out_rise_time = g_tp.sense_delay / (deviceType->Vth);
+ sense_amp.power.readOp.dynamic = g_tp.sense_dy_power;
+ sense_amp.power.readOp.leakage = 0; //FIXME
+ sense_amp.power.readOp.gate_leakage = 0;
+
+ power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic;
+ power.readOp.leakage = transmitter.power.readOp.leakage +
+ l_wire.power.readOp.leakage +
+ sense_amp.power.readOp.leakage;
+ power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage +
+ l_wire.power.readOp.gate_leakage +
+ sense_amp.power.readOp.gate_leakage;
}
- double
-Wire::sense_amp_input_cap()
-{
- return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def);
+double
+Wire::sense_amp_input_cap() {
+ return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def);
}
-void Wire::delay_optimal_wire ()
-{
- double len = wire_length;
- //double min_wire_width = wire_width; //m
- double beta = pmos_to_nmos_sz_ratio();
- double switching = 0; // switching energy
- double short_ckt = 0; // short-circuit energy
- double tc = 0; // time constant
- // input cap of min sized driver
- double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
+void Wire::delay_optimal_wire () {
+ double len = wire_length;
+ //double min_wire_width = wire_width; //m
+ double beta = pmos_to_nmos_sz_ratio();
+ double switching = 0; // switching energy
+ double short_ckt = 0; // short-circuit energy
+ double tc = 0; // time constant
+ // input cap of min sized driver
+ double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
- // output parasitic capacitance of
- // the min. sized driver
- double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
- // drive resistance
- double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
- tr_R_on(min_w_pmos, PCH, 1))/2;
- double wr = wire_res(len); //ohm
+ // output parasitic capacitance of
+ // the min. sized driver
+ double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
+ // drive resistance
+ double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
+ tr_R_on(min_w_pmos, PCH, 1)) / 2;
+ double wr = wire_res(len); //ohm
- // wire cap /m
- double wc = wire_cap(len);
+ // wire cap /m
+ double wc = wire_cap(len);
- // size the repeater such that the delay of the wire is minimum
- double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel
+ // size the repeater such that the delay of the wire is minimum
+ // len will cancel
+ double repeater_scaling = sqrt(out_res * wc / (wr * input_cap));
- // calc the optimum spacing between the repeaters (m)
+ // calc the optimum spacing between the repeaters (m)
- repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap)/
- ((wr/len)*(wc/len)));
- repeater_size = repeater_scaling;
+ repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap) /
+ ((wr / len) * (wc / len)));
+ repeater_size = repeater_scaling;
- switching = (repeater_scaling * (input_cap + out_cap) +
- repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
+ switching = (repeater_scaling * (input_cap + out_cap) +
+ repeater_spacing * (wc / len)) * deviceType->Vdd *
+ deviceType->Vdd;
- tc = out_res * (input_cap + out_cap) +
- out_res * wc/len * repeater_spacing/repeater_scaling +
- wr/len * repeater_spacing * input_cap * repeater_scaling +
- 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
+ tc = out_res * (input_cap + out_cap) +
+ out_res * wc / len * repeater_spacing / repeater_scaling +
+ wr / len * repeater_spacing * input_cap * repeater_scaling +
+ 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing;
- delay = 0.693 * tc * len/repeater_spacing;
+ delay = 0.693 * tc * len / repeater_spacing;
#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
- short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
- repeater_scaling * tc;
-
- area.set_area((len/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
- g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def));
- power.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
- power.readOp.leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
- power.readOp.gate_leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
+ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
+ repeater_scaling * tc;
+
+ area.set_area((len / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
+ g_tp.min_w_nmos_ * repeater_scaling,
+ g_tp.cell_h_def));
+ power.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt));
+ power.readOp.leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Isub_leakage(g_tp.min_w_nmos_ *
+ repeater_scaling, beta *
+ g_tp.min_w_nmos_ *
+ repeater_scaling, 1, inv));
+ power.readOp.gate_leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Ig_leakage(g_tp.min_w_nmos_ *
+ repeater_scaling, beta *
+ g_tp.min_w_nmos_ *
+ repeater_scaling, 1, inv));
}
// calculate power/delay values for wires with suboptimal repeater sizing/spacing
void
-Wire::init_wire(){
- wire_length = 1;
- delay_optimal_wire();
+Wire::init_wire() {
+ wire_length = 1;
+ delay_optimal_wire();
double sp, si;
- powerDef pow;
- si = repeater_size;
- sp = repeater_spacing;
- sp *= 1e6; // in microns
-
- double i, j, del;
- repeated_wire.push_back(Component());
- for (j=sp; j < 4*sp; j+=100) {
- for (i = si; i > 1; i--) {
- pow = wire_model(j*1e-6, i, &del);
- if (j == sp && i == si) {
- global.delay = del;
- global.power = pow;
- global.area.h = si;
- global.area.w = sp*1e-6; // m
- }
+ powerDef pow;
+ si = repeater_size;
+ sp = repeater_spacing;
+ sp *= 1e6; // in microns
+
+ double i, j, del;
+ repeated_wire.push_back(Component());
+ for (j = sp; j < 4*sp; j += 100) {
+ for (i = si; i > 1; i--) {
+ pow = wire_model(j * 1e-6, i, &del);
+ if (j == sp && i == si) {
+ global.delay = del;
+ global.power = pow;
+ global.area.h = si;
+ global.area.w = sp * 1e-6; // m
+ }
// cout << "Repeater size - "<< i <<
// " Repeater spacing - " << j <<
// " Delay - " << del <<
// " PowerD - " << pow.readOp.dynamic <<
// " PowerL - " << pow.readOp.leakage <<endl;
- repeated_wire.back().delay = del;
- repeated_wire.back().power.readOp = pow.readOp;
- repeated_wire.back().area.w = j*1e-6; //m
- repeated_wire.back().area.h = i;
- repeated_wire.push_back(Component());
+ repeated_wire.back().delay = del;
+ repeated_wire.back().power.readOp = pow.readOp;
+ repeated_wire.back().area.w = j * 1e-6; //m
+ repeated_wire.back().area.h = i;
+ repeated_wire.push_back(Component());
+ }
}
- }
- repeated_wire.pop_back();
- update_fullswing();
- Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1);
- low_swing.delay = l_wire->delay;
- low_swing.power = l_wire->power;
- delete l_wire;
+ repeated_wire.pop_back();
+ update_fullswing();
+ Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1);
+ low_swing.delay = l_wire->delay;
+ low_swing.power = l_wire->power;
+ delete l_wire;
}
-void Wire::update_fullswing()
-{
-
- list<Component>::iterator citer;
- double del[4];
- del[3] = this->global.delay + this->global.delay*.3;
- del[2] = global.delay + global.delay*.2;
- del[1] = global.delay + global.delay*.1;
- del[0] = global.delay + global.delay*.05;
- double threshold;
- double ncost;
- double cost;
- int i = 4;
- while (i>0) {
- threshold = del[i-1];
- cost = BIGNUM;
- for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++)
- {
- if (citer->delay > threshold) {
- citer = repeated_wire.erase(citer);
- citer --;
- }
- else {
- ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic +
- citer->power.readOp.leakage/global.power.readOp.leakage;
- if(ncost < cost)
- {
- cost = ncost;
- if (i == 4) {
- global_30.delay = citer->delay;
- global_30.power = citer->power;
- global_30.area = citer->area;
- }
- else if (i==3) {
- global_20.delay = citer->delay;
- global_20.power = citer->power;
- global_20.area = citer->area;
- }
- else if(i==2) {
- global_10.delay = citer->delay;
- global_10.power = citer->power;
- global_10.area = citer->area;
- }
- else if(i==1) {
- global_5.delay = citer->delay;
- global_5.power = citer->power;
- global_5.area = citer->area;
- }
+void Wire::update_fullswing() {
+
+ list<Component>::iterator citer;
+ double del[4];
+ del[3] = this->global.delay + this->global.delay * .3;
+ del[2] = global.delay + global.delay * .2;
+ del[1] = global.delay + global.delay * .1;
+ del[0] = global.delay + global.delay * .05;
+ double threshold;
+ double ncost;
+ double cost;
+ int i = 4;
+ while (i > 0) {
+ threshold = del[i-1];
+ cost = BIGNUM;
+ for (citer = repeated_wire.begin(); citer != repeated_wire.end();
+ citer++) {
+ if (citer->delay > threshold) {
+ citer = repeated_wire.erase(citer);
+ citer --;
+ } else {
+ ncost = citer->power.readOp.dynamic /
+ global.power.readOp.dynamic +
+ citer->power.readOp.leakage / global.power.readOp.leakage;
+ if (ncost < cost) {
+ cost = ncost;
+ if (i == 4) {
+ global_30.delay = citer->delay;
+ global_30.power = citer->power;
+ global_30.area = citer->area;
+ } else if (i == 3) {
+ global_20.delay = citer->delay;
+ global_20.power = citer->power;
+ global_20.area = citer->area;
+ } else if (i == 2) {
+ global_10.delay = citer->delay;
+ global_10.power = citer->power;
+ global_10.area = citer->area;
+ } else if (i == 1) {
+ global_5.delay = citer->delay;
+ global_5.power = citer->power;
+ global_5.area = citer->area;
+ }
+ }
+ }
}
- }
+ i--;
}
- i--;
- }
}
-powerDef Wire::wire_model (double space, double size, double *delay)
-{
- powerDef ptemp;
- double len = 1;
- //double min_wire_width = wire_width; //m
- double beta = pmos_to_nmos_sz_ratio();
- // switching energy
- double switching = 0;
- // short-circuit energy
- double short_ckt = 0;
- // time constant
- double tc = 0;
- // input cap of min sized driver
- double input_cap = gate_C (g_tp.min_w_nmos_ +
- min_w_pmos, 0);
-
- // output parasitic capacitance of
- // the min. sized driver
- double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
- // drive resistance
- double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
- tr_R_on(min_w_pmos, PCH, 1))/2;
- double wr = wire_res(len); //ohm
-
- // wire cap /m
- double wc = wire_cap(len);
-
- repeater_spacing = space;
- repeater_size = size;
-
- switching = (repeater_size * (input_cap + out_cap) +
- repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
-
- tc = out_res * (input_cap + out_cap) +
- out_res * wc/len * repeater_spacing/repeater_size +
- wr/len * repeater_spacing * out_cap * repeater_size +
- 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
-
- *delay = 0.693 * tc * len/repeater_spacing;
+powerDef Wire::wire_model (double space, double size, double *delay) {
+ powerDef ptemp;
+ double len = 1;
+ //double min_wire_width = wire_width; //m
+ double beta = pmos_to_nmos_sz_ratio();
+ // switching energy
+ double switching = 0;
+ // short-circuit energy
+ double short_ckt = 0;
+ // time constant
+ double tc = 0;
+ // input cap of min sized driver
+ double input_cap = gate_C (g_tp.min_w_nmos_ +
+ min_w_pmos, 0);
+
+ // output parasitic capacitance of
+ // the min. sized driver
+ double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
+ // drive resistance
+ double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
+ tr_R_on(min_w_pmos, PCH, 1)) / 2;
+ double wr = wire_res(len); //ohm
+
+ // wire cap /m
+ double wc = wire_cap(len);
+
+ repeater_spacing = space;
+ repeater_size = size;
+
+ switching = (repeater_size * (input_cap + out_cap) +
+ repeater_spacing * (wc / len)) * deviceType->Vdd *
+ deviceType->Vdd;
+
+ tc = out_res * (input_cap + out_cap) +
+ out_res * wc / len * repeater_spacing / repeater_size +
+ wr / len * repeater_spacing * out_cap * repeater_size +
+ 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing;
+
+ *delay = 0.693 * tc * len / repeater_spacing;
#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
- short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
- repeater_size * tc;
-
- ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
- ptemp.readOp.leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
-
- ptemp.readOp.gate_leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
-
- return ptemp;
+ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
+ repeater_size * tc;
+
+ ptemp.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt));
+ ptemp.readOp.leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Isub_leakage(g_tp.min_w_nmos_ *
+ repeater_size, beta *
+ g_tp.min_w_nmos_ *
+ repeater_size, 1, inv));
+
+ ptemp.readOp.gate_leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Ig_leakage(g_tp.min_w_nmos_ *
+ repeater_size, beta *
+ g_tp.min_w_nmos_ *
+ repeater_size, 1, inv));
+
+ return ptemp;
}
void
-Wire::print_wire()
-{
-
- cout << "\nWire Properties:\n\n";
- cout << " Delay Optimal\n\tRepeater size - "<< global.area.h <<
- " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
- " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
-
- cout << " 5% Overhead\n\tRepeater size - "<< global_5.area.h <<
- " \n\tRepeater spacing - " << global_5.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_5.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_5.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_5.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_5.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " 10% Overhead\n\tRepeater size - "<< global_10.area.h <<
- " \n\tRepeater spacing - " << global_10.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_10.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_10.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_10.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_10.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " 20% Overhead\n\tRepeater size - "<< global_20.area.h <<
- " \n\tRepeater spacing - " << global_20.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_20.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_20.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_20.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_20.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " 30% Overhead\n\tRepeater size - "<< global_30.area.h <<
- " \n\tRepeater spacing - " << global_30.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_30.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_30.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_30.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_30.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\tdelay and power "
- "values of low-swing wires do not\n\thave a linear relationship with length." <<
- " \n\tdelay - " << low_swing.delay *1e9<< " (ns)"
- " \n\tpowerD - " << low_swing.power.readOp.dynamic *1e9<< " (nJ)"
- " \n\tPowerL - " << low_swing.power.readOp.leakage << " (mW)"
- " \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage << " (mW)\n";
- cout << "\tWire width - " <<wire_width_init * 2 /* differential */<< " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init * 2 /* differential */<< " microns\n";
- cout <<endl;
- cout <<endl;
+Wire::print_wire() {
+
+ cout << "\nWire Properties:\n\n";
+ cout << " Delay Optimal\n\tRepeater size - " << global.area.h <<
+ " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+
+ cout << " 5% Overhead\n\tRepeater size - " << global_5.area.h <<
+ " \n\tRepeater spacing - " << global_5.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_5.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_5.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_5.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_5.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " 10% Overhead\n\tRepeater size - " << global_10.area.h <<
+ " \n\tRepeater spacing - " << global_10.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_10.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_10.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_10.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_10.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " 20% Overhead\n\tRepeater size - " << global_20.area.h <<
+ " \n\tRepeater spacing - " << global_20.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_20.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_20.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_20.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_20.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " 30% Overhead\n\tRepeater size - " << global_30.area.h <<
+ " \n\tRepeater spacing - " << global_30.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_30.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_30.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_30.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_30.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\t" <<
+ "delay and power values of low-swing wires do not\n\t" <<
+ "have a linear relationship with length." <<
+ " \n\tdelay - " << low_swing.delay *1e9 << " (ns)"
+ " \n\tpowerD - " << low_swing.power.readOp.dynamic *1e9 << " (nJ)"
+ " \n\tPowerL - " << low_swing.power.readOp.leakage << " (mW)"
+ " \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage <<
+ " (mW)\n";
+ cout << "\tWire width - " << wire_width_init * 2 /* differential */ <<
+ " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init * 2 /* differential */ <<
+ " microns\n";
+ cout << endl;
+ cout << endl;
}
diff --git a/ext/mcpat/cacti/wire.h b/ext/mcpat/cacti/wire.h
index 51d55afff..906030dde 100644
--- a/ext/mcpat/cacti/wire.h
+++ b/ext/mcpat/cacti/wire.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -43,9 +44,8 @@
#include "component.h"
#include "parameter.h"
-class Wire : public Component
-{
- public:
+class Wire : public Component {
+public:
Wire(enum Wire_type wire_model, double len /* in u*/,
int nsense = 1/* no. of sense amps connected to the low-swing wire */,
double width_scaling = 1,
@@ -56,16 +56,16 @@ class Wire : public Component
~Wire();
Wire( double width_scaling = 1,
- double spacing_scaling = 1,
- enum Wire_placement wire_placement = outside_mat,
- double resistivity = CU_RESISTIVITY,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- ); // should be used only once for initializing static members
+ double spacing_scaling = 1,
+ enum Wire_placement wire_placement = outside_mat,
+ double resistivity = CU_RESISTIVITY,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+ ); // should be used only once for initializing static members
void init_wire();
void calculate_wire_stats();
void delay_optimal_wire();
- double wire_cap(double len, bool call_from_outside=false);
+ double wire_cap(double len, bool call_from_outside = false);
double wire_res(double len);
void low_swing_model();
double signal_fall_time();
@@ -81,9 +81,8 @@ class Wire : public Component
double wire_length;
double in_rise_time, out_rise_time;
- void set_in_rise_time(double rt)
- {
- in_rise_time = rt;
+ void set_in_rise_time(double rt) {
+ in_rise_time = rt;
}
static Component global;
static Component global_5;
@@ -95,10 +94,10 @@ class Wire : public Component
static double wire_spacing_init;
void print_wire();
- private:
+private:
int nsense; // no. of sense amps connected to a low-swing wire if it
- // is broadcasting data to multiple destinations
+ // is broadcasting data to multiple destinations
// width and spacing scaling factor can be used
// to model low level wires or special
// fat wires