summaryrefslogtreecommitdiff
path: root/ext/mcpat/cacti
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mcpat/cacti')
-rw-r--r--ext/mcpat/cacti/README94
-rw-r--r--ext/mcpat/cacti/Ucache.cc916
-rw-r--r--ext/mcpat/cacti/Ucache.h115
-rw-r--r--ext/mcpat/cacti/arbiter.cc130
-rw-r--r--ext/mcpat/cacti/arbiter.h79
-rw-r--r--ext/mcpat/cacti/area.cc47
-rw-r--r--ext/mcpat/cacti/area.h71
-rwxr-xr-xext/mcpat/cacti/bank.cc198
-rwxr-xr-xext/mcpat/cacti/bank.h69
-rw-r--r--ext/mcpat/cacti/basic_circuit.cc829
-rw-r--r--ext/mcpat/cacti/basic_circuit.h248
-rwxr-xr-xext/mcpat/cacti/batch_tests41
-rwxr-xr-xext/mcpat/cacti/cache.cfg175
-rw-r--r--ext/mcpat/cacti/cacti.i8
-rw-r--r--ext/mcpat/cacti/cacti.mk51
-rw-r--r--ext/mcpat/cacti/cacti_interface.cc173
-rw-r--r--ext/mcpat/cacti/cacti_interface.h633
-rw-r--r--ext/mcpat/cacti/component.cc236
-rw-r--r--ext/mcpat/cacti/component.h84
-rw-r--r--ext/mcpat/cacti/const.h270
-rwxr-xr-xext/mcpat/cacti/contention.dat126
-rw-r--r--ext/mcpat/cacti/crossbar.cc161
-rw-r--r--ext/mcpat/cacti/crossbar.h85
-rw-r--r--ext/mcpat/cacti/decoder.cc1577
-rw-r--r--ext/mcpat/cacti/decoder.h247
-rw-r--r--ext/mcpat/cacti/htree2.cc641
-rw-r--r--ext/mcpat/cacti/htree2.h97
-rw-r--r--ext/mcpat/cacti/io.cc2350
-rw-r--r--ext/mcpat/cacti/io.h44
-rw-r--r--ext/mcpat/cacti/main.cc191
-rw-r--r--ext/mcpat/cacti/makefile28
-rwxr-xr-xext/mcpat/cacti/mat.cc1748
-rwxr-xr-xext/mcpat/cacti/mat.h148
-rw-r--r--ext/mcpat/cacti/nuca.cc612
-rw-r--r--ext/mcpat/cacti/nuca.h100
-rw-r--r--ext/mcpat/cacti/parameter.cc713
-rw-r--r--ext/mcpat/cacti/parameter.h367
-rw-r--r--ext/mcpat/cacti/router.cc311
-rw-r--r--ext/mcpat/cacti/router.h115
-rwxr-xr-xext/mcpat/cacti/subarray.cc196
-rwxr-xr-xext/mcpat/cacti/subarray.h70
-rw-r--r--ext/mcpat/cacti/technology.cc2921
-rwxr-xr-xext/mcpat/cacti/uca.cc426
-rwxr-xr-xext/mcpat/cacti/uca.h95
-rw-r--r--ext/mcpat/cacti/wire.cc832
-rw-r--r--ext/mcpat/cacti/wire.h124
46 files changed, 18792 insertions, 0 deletions
diff --git a/ext/mcpat/cacti/README b/ext/mcpat/cacti/README
new file mode 100644
index 000000000..de429d2bb
--- /dev/null
+++ b/ext/mcpat/cacti/README
@@ -0,0 +1,94 @@
+-----------------------------------------------------------
+ ____ _ ____ _____ ___ __ ____
+ / ___| / \ / ___|_ _|_ _| / /_ | ___|
+ | | / _ \| | | | | | | '_ \ |___ \
+ | |___ / ___ \ |___ | | | | | (_) | ___) |
+ \____/_/ \_\____| |_| |___| \___(_)____/
+
+
+ A Tool to Model Caches/Memories
+-----------------------------------------------------------
+
+CACTI is an analytical tool that takes a set of cache/memory para-
+meters as input and calculates its access time, power, cycle
+time, and area.
+CACTI was originally developed by Dr. Jouppi and Dr. Wilton
+in 1993 and since then it has undergone five major
+revisions.
+
+List of features (version 1-6.5):
+===============================
+The following is the list of features supported by the tool.
+
+* Power, delay, area, and cycle time model for
+ direct mapped caches
+ set-associative caches
+ fully associative caches
+ Embedded DRAM memories
+ Commodity DRAM memories
+
+* Support for modeling multi-ported uniform cache access (UCA)
+ and multi-banked, multi-ported non-uniform cache access (NUCA).
+
+* Leakage power calculation that also considers the operating
+ temperature of the cache.
+
+* Router power model.
+
+* Interconnect model with different delay, power, and area
+ properties including low-swing wire model.
+
+* An interface to perform trade-off analysis involving power, delay,
+ area, and bandwidth.
+
+* All process specific values used by the tool are obtained
+ from ITRS and currently, the tool supports 90nm, 65nm, 45nm,
+ and 32nm technology nodes.
+
+Version 6.5 has a new c++ code base and includes numerous bug fixes.
+CACTI 5.3 and 6.0 activate an entire row of mats to read/write a single
+block of data. This technique improves reliability at the cost of
+power. CACTI 6.5 activates minimum number of mats just enough to retrieve
+a block to minimize power.
+
+How to use the tool?
+====================
+Prior versions of CACTI take input parameters such as cache
+size and technology node as a set of command line arguments.
+To avoid a long list of command line arguments,
+CACTI 6.5 lets users specify their cache model in a more
+detailed manner by using a config file (cache.cfg).
+
+-> define the cache model using cache.cfg
+-> run the "cacti" binary <./cacti -infile cache.cfg>
+
+CACTI6.5 also provides a command line interface similar to earlier versions
+of CACTI. The command line interface can be used as
+
+./cacti cache_size line_size associativity rw_ports excl_read_ports excl_write_ports
+ single_ended_read_ports search_ports banks tech_node output_width specific_tag tag_width
+ access_mode cache main_mem obj_func_delay obj_func_dynamic_power obj_func_leakage_power
+ obj_func_cycle_time obj_func_area dev_func_delay dev_func_dynamic_power dev_func_leakage_power
+ dev_func_area dev_func_cycle_time ed_ed2_none temp wt data_arr_ram_cell_tech_flavor_in
+ data_arr_peri_global_tech_flavor_in tag_arr_ram_cell_tech_flavor_in tag_arr_peri_global_tech_flavor_in
+ interconnect_projection_type_in wire_inside_mat_type_in wire_outside_mat_type_in
+ REPEATERS_IN_HTREE_SEGMENTS_in VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in
+ BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in PAGE_SIZE_BITS_in BURST_LENGTH_in
+ INTERNAL_PREFETCH_WIDTH_in force_wiretype wiretype force_config ndwl ndbl nspd ndcm
+ ndsam1 ndsam2 ecc
+
+For complete documentation of the tool, please refer CACTI-5.3 and 6.0
+technical reports and the following paper,
+"Optimizing NUCA Organizations and Wiring Alternatives for
+Large Caches With CACTI 6.0", that appears in MICRO 2007.
+
+We are still improving the tool and refining the code. If you
+have any comments, questions, or suggestions please write to
+us.
+
+Naveen Muralimanohar Jung Ho Ahn Sheng Li
+naveen.muralimanohar@hp.com gajh@snu.ac.kr sheng.li@hp.com
+
+
+
+
diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc
new file mode 100644
index 000000000..f3e1227df
--- /dev/null
+++ b/ext/mcpat/cacti/Ucache.cc
@@ -0,0 +1,916 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#include <pthread.h>
+
+#include <algorithm>
+#include <cmath>
+#include <ctime>
+#include <iostream>
+#include <list>
+
+#include "Ucache.h"
+#include "area.h"
+#include "bank.h"
+#include "basic_circuit.h"
+#include "component.h"
+#include "const.h"
+#include "decoder.h"
+#include "parameter.h"
+#include "subarray.h"
+#include "uca.h"
+
+using namespace std;
+
+const uint32_t nthreads = NTHREADS;
+
+
+void min_values_t::update_min_values(const min_values_t * val)
+{
+ min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
+ min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
+ min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
+ min_area = (min_area > val->min_area) ? val->min_area : min_area;
+ min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
+}
+
+
+
+void min_values_t::update_min_values(const uca_org_t & res)
+{
+ min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
+ min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res.area) ? res.area : min_area;
+ min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
+}
+
+void min_values_t::update_min_values(const nuca_org_t * res)
+{
+ min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
+ min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
+ min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
+}
+
+void min_values_t::update_min_values(const mem_array * res)
+{
+ min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
+ min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->area) ? res->area : min_area;
+ min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
+}
+
+
+
+void * calc_time_mt_wrapper(void * void_obj)
+{
+ calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
+ uint32_t tid = calc_obj->tid;
+ list<mem_array *> & data_arr = calc_obj->data_arr;
+ list<mem_array *> & tag_arr = calc_obj->tag_arr;
+ bool is_tag = calc_obj->is_tag;
+ bool pure_ram = calc_obj->pure_ram;
+ bool pure_cam = calc_obj->pure_cam;
+ bool is_main_mem = calc_obj->is_main_mem;
+ double Nspd_min = calc_obj->Nspd_min;
+ min_values_t * data_res = calc_obj->data_res;
+ min_values_t * tag_res = calc_obj->tag_res;
+
+ data_arr.clear();
+ data_arr.push_back(new mem_array);
+ tag_arr.clear();
+ tag_arr.push_back(new mem_array);
+
+ uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
+ uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
+
+
+ bool is_valid_partition;
+ int wt_min, wt_max;
+
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == 0) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ }
+ else {
+ wt_min = Global;
+ wt_max = Low_swing-1;
+ }
+ }
+ else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
+
+ for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
+ {
+ for (int wr = wt_min; wr <= wt_max; wr++)
+ {
+ for (uint32_t iter = tid; iter < niter; iter += nthreads)
+ {
+ // reconstruct Ndwl, Ndbl, Ndcm
+ unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
+ unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
+ unsigned int Ndcm = 1 << (iter % Ndcm_niter);
+ for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
+ {
+ for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
+ {
+ //for debuging
+ if (g_ip->force_cache_config && is_tag == false)
+ {
+ wr = g_ip->wt;
+ Ndwl = g_ip->ndwl;
+ Ndbl = g_ip->ndbl;
+ Ndcm = g_ip->ndcm;
+ if(g_ip->nspd != 0) {
+ Nspd = g_ip->nspd;
+ }
+ if(g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = g_ip->ndsam1;
+ Ndsam_lev_2 = g_ip->ndsam2;
+ }
+ }
+
+ if (is_tag == true)
+ {
+ is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ tag_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+ // If it's a fully-associative cache, the data array partition parameters are identical to that of
+ // the tag array, so compute data array partition properties also here.
+ if (is_tag == false || g_ip->fully_assoc)
+ {
+ is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ data_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+
+ if (is_valid_partition)
+ {
+ if (is_tag == true)
+ {
+ tag_arr.back()->wt = (enum Wire_type) wr;
+ tag_res->update_min_values(tag_arr.back());
+ tag_arr.push_back(new mem_array);
+ }
+ if (is_tag == false || g_ip->fully_assoc)
+ {
+ data_arr.back()->wt = (enum Wire_type) wr;
+ data_res->update_min_values(data_arr.back());
+ data_arr.push_back(new mem_array);
+ }
+ }
+
+ if (g_ip->force_cache_config && is_tag == false)
+ {
+ wr = wt_max;
+ iter = niter;
+ if(g_ip->nspd != 0) {
+ Nspd = MAXDATASPD;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = MAX_COL_MUX+1;
+ Ndsam_lev_2 = MAX_COL_MUX+1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ delete data_arr.back();
+ delete tag_arr.back();
+ data_arr.pop_back();
+ tag_arr.pop_back();
+
+ pthread_exit(NULL);
+}
+
+
+
+bool calculate_time(
+ bool is_tag,
+ int pure_ram,
+ bool pure_cam,
+ double Nspd,
+ unsigned int Ndwl,
+ unsigned int Ndbl,
+ unsigned int Ndcm,
+ unsigned int Ndsam_lev_1,
+ unsigned int Ndsam_lev_2,
+ mem_array *ptr_array,
+ int flag_results_populate,
+ results_mem_array *ptr_results,
+ uca_org_t *ptr_fin_res,
+ bool is_main_mem)
+{
+ DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
+
+ if (dyn_p.is_valid == false)
+ {
+ return false;
+ }
+
+ UCA * uca = new UCA(dyn_p);
+
+
+ if (flag_results_populate)
+ { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
+ }
+ else
+ {
+ int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
+ int num_mats = uca->bank.dp.num_mats;
+ bool is_fa = uca->bank.dp.fully_assoc;
+ bool pure_cam = uca->bank.dp.pure_cam;
+ ptr_array->Ndwl = Ndwl;
+ ptr_array->Ndbl = Ndbl;
+ ptr_array->Nspd = Nspd;
+ ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
+ ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
+ ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
+ ptr_array->access_time = uca->access_time;
+ ptr_array->cycle_time = uca->cycle_time;
+ ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
+ ptr_array->area_ram_cells = uca->area_all_dataramcells;
+ ptr_array->area = uca->area.get_area();
+ ptr_array->height = uca->area.h;
+ ptr_array->width = uca->area.w;
+ ptr_array->mat_height = uca->bank.mat.area.h;
+ ptr_array->mat_length = uca->bank.mat.area.w;
+ ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
+ ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
+ ptr_array->power = uca->power;
+ ptr_array->delay_senseamp_mux_decoder =
+ MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
+ uca->delay_array_to_sa_mux_lev_2_decoder);
+ ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
+ ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
+
+ ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
+ ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
+ ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
+ ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
+ ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
+ ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
+ ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
+ ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
+ ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
+ ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
+
+ ptr_array->all_banks_height = uca->area.h;
+ ptr_array->all_banks_width = uca->area.w;
+ ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
+
+ ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
+ ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
+ ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
+// cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl;
+ ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
+// cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl;
+ ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
+ ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
+ ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
+ ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
+ ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
+ ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
+ ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
+ ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
+ ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bitlines = uca->bank.mat.power_bitline;
+ ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_sense_amps = uca->bank.mat.power_sa;
+ ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
+ ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
+ ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_comparators = uca->bank.mat.power_comparator;
+ ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
+
+// cout << " num of mats: " << dyn_p.num_mats << endl;
+ if (is_fa || pure_cam)
+ {
+ ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
+// cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl;
+ ptr_array->power_htree_out_search = uca->bank.htree_out_search->power;
+// cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl;
+ ptr_array->power_searchline = uca->bank.mat.power_searchline;
+// cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl;
+ ptr_array->power_searchline.searchOp.dynamic *= num_mats;
+ ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
+ ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchlines = uca->bank.mat.power_matchline;
+ ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
+ ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
+// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl;
+ }
+
+ ptr_array->activate_energy = uca->activate_energy;
+ ptr_array->read_energy = uca->read_energy;
+ ptr_array->write_energy = uca->write_energy;
+ ptr_array->precharge_energy = uca->precharge_energy;
+ ptr_array->refresh_power = uca->refresh_power;
+ ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
+ ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
+ ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
+
+ ptr_array->precharge_delay = uca->precharge_delay;
+
+
+// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl;
+//
+// if (!(is_fa || pure_cam))
+// {
+// cout << " num of cols: " << dyn_p.num_c_subarray << endl;
+// }
+// else if (is_fa)
+// {
+// cout << " num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl;
+// } else
+// cout << " num of cols: " << dyn_p.tag_num_c_subarray<< endl;
+// cout << uca->bank.mat.subarray.get_total_cell_area()<<endl;
+ }
+
+
+ delete uca;
+ return true;
+}
+
+
+
+bool check_uca_org(uca_org_t & u, min_values_t *minval)
+{
+ if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area)/minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
+}
+
+bool check_mem_org(mem_array & u, const min_values_t *minval)
+{
+ if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area)/minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
+}
+
+
+
+
+void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist)
+{
+ double cost = 0;
+ double min_cost = BIGNUM;
+ float d, a, dp, lp, c;
+
+ dp = g_ip->dynamic_power_wt;
+ lp = g_ip->leakage_power_wt;
+ a = g_ip->area_wt;
+ d = g_ip->delay_wt;
+ c = g_ip->cycle_time_wt;
+
+ if (ulist.empty() == true)
+ {
+ cout << "ERROR: no valid cache organizations found" << endl;
+ exit(0);
+ }
+
+ for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
+ {
+ if (g_ip->ed == 1)
+ {
+ cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
+ if (min_cost > cost)
+ {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ }
+ else if (g_ip->ed == 2)
+ {
+ cost = ((niter)->access_time/minval->min_delay)*
+ ((niter)->access_time/minval->min_delay)*
+ ((niter)->power.readOp.dynamic/minval->min_dyn);
+ if (min_cost > cost)
+ {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ }
+ else
+ {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ bool v = check_uca_org(*niter, minval);
+ //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
+
+ if (v)
+ {
+ cost = (d * ((niter)->access_time/minval->min_delay) +
+ c * ((niter)->cycle_time/minval->min_cyc) +
+ dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
+ lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
+ a * ((niter)->area/minval->min_area));
+ //fprintf(stderr, "cost = %g\n", cost);
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ niter = ulist.erase(niter);
+ if (niter!=ulist.begin())
+ niter--;
+ }
+ }
+ else {
+ niter = ulist.erase(niter);
+ if (niter!=ulist.begin())
+ niter--;
+ }
+ }
+ }
+
+ if (min_cost == BIGNUM)
+ {
+ cout << "ERROR: no cache organizations met optimization criteria" << endl;
+ exit(0);
+ }
+}
+
+
+
+void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
+{
+ double cost = BIGNUM;
+ double cur_cost;
+ double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
+ mem_array * res = NULL;
+
+ if (list.empty() == true)
+ {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(1);
+ }
+
+
+ while (list.empty() != true)
+ {
+ bool v = check_mem_org(*list.back(), min);
+ if (v)
+ {
+ cur_cost = wt_delay * (list.back()->access_time/min->min_delay) +
+ wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
+ wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
+ wt_area * (list.back()->area/min->min_area) +
+ wt_cyc * (list.back()->cycle_time/min->min_cyc);
+ }
+ else
+ {
+ cur_cost = BIGNUM;
+ }
+ if (cur_cost < cost)
+ {
+ if (res != NULL)
+ {
+ delete res;
+ }
+ cost = cur_cost;
+ res = list.back();
+ }
+ else
+ {
+ delete list.back();
+ }
+ list.pop_back();
+ }
+ if(!res)
+ {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(0);
+ }
+
+ list.push_back(res);
+}
+
+
+
+void filter_data_arr(list<mem_array *> & curr_list)
+{
+ if (curr_list.empty() == true)
+ {
+ cout << "ERROR: no valid data array organizations found" << endl;
+ exit(1);
+ }
+
+ list<mem_array *>::iterator iter;
+
+ for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
+ {
+ mem_array * m = *iter;
+
+ if (m == NULL) exit(1);
+
+ if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
+ ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
+ {
+ delete m;
+ iter = curr_list.erase(iter);
+ iter --;
+ }
+ }
+}
+
+
+
+/*
+ * Performs exhaustive search across different sub-array sizes,
+ * wire types and aspect ratios to find an optimal UCA organization
+ * 1. First different valid tag array organizations are calculated
+ * and stored in tag_arr array
+ * 2. The exhaustive search is repeated to find valid data array
+ * organizations and stored in data_arr array
+ * 3. Cache area, delay, power, and cycle time for different
+ * cache organizations are calculated based on the
+ * above results
+ * 4. Cache model with least cost is picked from sol_list
+ */
+void solve(uca_org_t *fin_res)
+{
+ bool is_dram = false;
+ int pure_ram = g_ip->pure_ram;
+ bool pure_cam = g_ip->pure_cam;
+
+ init_tech_params(g_ip->F_sz_um, false);
+
+
+ list<mem_array *> tag_arr (0);
+ list<mem_array *> data_arr(0);
+ list<mem_array *>::iterator miter;
+ list<uca_org_t> sol_list(1, uca_org_t());
+
+ fin_res->tag_array.access_time = 0;
+ fin_res->tag_array.Ndwl = 0;
+ fin_res->tag_array.Ndbl = 0;
+ fin_res->tag_array.Nspd = 0;
+ fin_res->tag_array.deg_bl_muxing = 0;
+ fin_res->tag_array.Ndsam_lev_1 = 0;
+ fin_res->tag_array.Ndsam_lev_2 = 0;
+
+
+ // distribute calculate_time() execution to multiple threads
+ calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
+ pthread_t threads[nthreads];
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].tid = t;
+ calc_array[t].pure_ram = pure_ram;
+ calc_array[t].pure_cam = pure_cam;
+ calc_array[t].data_res = new min_values_t();
+ calc_array[t].tag_res = new min_values_t();
+ }
+
+ bool is_tag;
+ uint32_t ram_cell_tech_type;
+
+ // If it's a cache, first calculate the area, delay and power for all tag array partitions.
+ if (!(pure_ram||pure_cam||g_ip->fully_assoc))
+ { //cache
+ is_tag = true;
+ ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = false;
+ calc_array[t].Nspd_min = 0.125;
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+ }
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ pthread_join(threads[t], NULL);
+ }
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ calc_array[t].tag_arr.sort(mem_array::lt);
+ tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+ }
+ }
+
+
+ // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
+// if (!g_ip->fully_assoc)
+// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
+ is_tag = false;
+ ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = g_ip->is_main_mem;
+ if (!(pure_cam||g_ip->fully_assoc))
+ {
+ calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
+ }
+ else
+ {
+ calc_array[t].Nspd_min = 1;
+ }
+
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+ }
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ pthread_join(threads[t], NULL);
+ }
+
+ data_arr.clear();
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ }
+// }
+
+
+ min_values_t * d_min = new min_values_t();
+ min_values_t * t_min = new min_values_t();
+ min_values_t * cache_min = new min_values_t();
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ d_min->update_min_values(calc_array[t].data_res);
+ t_min->update_min_values(calc_array[t].tag_res);
+ }
+
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
+ {
+ (*miter)->arr_min = d_min;
+ }
+
+
+ //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
+ filter_data_arr(data_arr);
+ if(!(pure_ram||pure_cam||g_ip->fully_assoc))
+ {
+ filter_tag_arr(t_min, tag_arr);
+ }
+ //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
+
+
+ if (pure_ram||pure_cam||g_ip->fully_assoc)
+ {
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
+ {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = NULL;
+ curr_org.data_array2 = (*miter);
+
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
+
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
+ else
+ {
+ while (tag_arr.empty() != true)
+ {
+ mem_array * arr_temp = (tag_arr.back());
+ //delete tag_arr.back();
+ tag_arr.pop_back();
+
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
+ {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = arr_temp;
+ curr_org.data_array2 = (*miter);
+
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
+
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
+ }
+
+ sol_list.pop_back();
+
+ find_optimal_uca(fin_res, cache_min, sol_list);
+
+ sol_list.clear();
+
+ for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
+ {
+ if (*miter != fin_res->data_array2)
+ {
+ delete *miter;
+ }
+ }
+ data_arr.clear();
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ delete calc_array[t].data_res;
+ delete calc_array[t].tag_res;
+ }
+
+ delete [] calc_array;
+ delete cache_min;
+ delete d_min;
+ delete t_min;
+}
+
+void update(uca_org_t *fin_res)
+{
+ if(fin_res->tag_array2)
+ {
+ init_tech_params(g_ip->F_sz_um,true);
+ DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ if(tag_arr_dyn_p.is_valid)
+ {
+ UCA * tag_arr = new UCA(tag_arr_dyn_p);
+ fin_res->tag_array2->power = tag_arr->power;
+ }
+ else
+ {
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ exit(1);
+ }
+ }
+ init_tech_params(g_ip->F_sz_um,false);
+ DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ if(data_arr_dyn_p.is_valid)
+ {
+ UCA * data_arr = new UCA(data_arr_dyn_p);
+ fin_res->data_array2->power = data_arr->power;
+ }
+ else
+ {
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ exit(1);
+ }
+
+ fin_res->find_energy();
+}
+
diff --git a/ext/mcpat/cacti/Ucache.h b/ext/mcpat/cacti/Ucache.h
new file mode 100644
index 000000000..20985fff1
--- /dev/null
+++ b/ext/mcpat/cacti/Ucache.h
@@ -0,0 +1,115 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __UCACHE_H__
+#define __UCACHE_H__
+
+#include <list>
+
+#include "area.h"
+#include "nuca.h"
+#include "router.h"
+
+class min_values_t
+{
+ public:
+ double min_delay;
+ double min_dyn;
+ double min_leakage;
+ double min_area;
+ double min_cyc;
+
+ min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { }
+
+ void update_min_values(const min_values_t * val);
+ void update_min_values(const uca_org_t & res);
+ void update_min_values(const nuca_org_t * res);
+ void update_min_values(const mem_array * res);
+};
+
+
+
+struct solution
+{
+ int tag_array_index;
+ int data_array_index;
+ list<mem_array *>::iterator tag_array_iter;
+ list<mem_array *>::iterator data_array_iter;
+ double access_time;
+ double cycle_time;
+ double area;
+ double efficiency;
+ powerDef total_power;
+};
+
+
+
+bool calculate_time(
+ bool is_tag,
+ int pure_ram,
+ bool pure_cam,
+ double Nspd,
+ unsigned int Ndwl,
+ unsigned int Ndbl,
+ unsigned int Ndcm,
+ unsigned int Ndsam_lev_1,
+ unsigned int Ndsam_lev_2,
+ mem_array *ptr_array,
+ int flag_results_populate,
+ results_mem_array *ptr_results,
+ uca_org_t *ptr_fin_res,
+ bool is_main_mem);
+void update(uca_org_t *fin_res);
+
+void solve(uca_org_t *fin_res);
+void init_tech_params(double tech, bool is_tag);
+
+
+struct calc_time_mt_wrapper_struct
+{
+ uint32_t tid;
+ bool is_tag;
+ bool pure_ram;
+ bool pure_cam;
+ bool is_main_mem;
+ double Nspd_min;
+
+ min_values_t * data_res;
+ min_values_t * tag_res;
+
+ list<mem_array *> data_arr;
+ list<mem_array *> tag_arr;
+};
+
+void *calc_time_mt_wrapper(void * void_obj);
+
+#endif
diff --git a/ext/mcpat/cacti/arbiter.cc b/ext/mcpat/cacti/arbiter.cc
new file mode 100644
index 000000000..6664abf13
--- /dev/null
+++ b/ext/mcpat/cacti/arbiter.cc
@@ -0,0 +1,130 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include "arbiter.h"
+
+Arbiter::Arbiter(
+ double n_req,
+ double flit_size_,
+ double output_len,
+ TechnologyParameter::DeviceType *dt
+ ):R(n_req), flit_size(flit_size_),
+ o_len (output_len), deviceType(dt)
+{
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ double technology = g_ip->F_sz_um;
+ NTn1 = 13.5*technology/2;
+ PTn1 = 76*technology/2;
+ NTn2 = 13.5*technology/2;
+ PTn2 = 76*technology/2;
+ NTi = 12.5*technology/2;
+ PTi = 25*technology/2;
+ NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20*technology/2; /* pmos tr. length*/
+}
+
+Arbiter::~Arbiter(){}
+
+double
+Arbiter::arb_req() {
+ double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
+ gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
+ return temp;
+}
+
+double
+Arbiter::arb_pri() {
+ double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
+ of flip-flop is ignored */
+ return temp;
+}
+
+
+double
+Arbiter::arb_grant() {
+ double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
+ return temp;
+}
+
+double
+Arbiter::arb_int() {
+ double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
+ 2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
+ return temp;
+}
+
+void
+Arbiter::compute_power() {
+ power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
+ arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
+ double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
+ double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
+ double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
+ double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
+ double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
+ double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
+ power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
+ power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
+}
+
+double //wire cap with triple spacing
+Arbiter::Cw3(double length) {
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ double temp = (wc.wire_cap(length,true));
+ return temp;
+}
+
+double
+Arbiter::crossbar_ctrline() {
+ double temp = (Cw3(o_len * 1e-6 /* m */) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
+ gate_C(NTi, 0) + gate_C(PTi, 0));
+ return temp;
+}
+
+double
+Arbiter::transmission_buf_ctrcap() {
+ double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
+ return temp;
+}
+
+
+void Arbiter::print_arbiter()
+{
+ cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
+}
+
+
diff --git a/ext/mcpat/cacti/arbiter.h b/ext/mcpat/cacti/arbiter.h
new file mode 100644
index 000000000..32ada92c2
--- /dev/null
+++ b/ext/mcpat/cacti/arbiter.h
@@ -0,0 +1,79 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#ifndef __ARBITER__
+#define __ARBITER__
+
+#include <assert.h>
+
+#include <iostream>
+
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "mat.h"
+#include "parameter.h"
+#include "wire.h"
+
+class Arbiter : public Component
+{
+ public:
+ Arbiter(
+ double Req,
+ double flit_sz,
+ double output_len,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ ~Arbiter();
+
+ void print_arbiter();
+ double arb_req();
+ double arb_pri();
+ double arb_grant();
+ double arb_int();
+ void compute_power();
+ double Cw3(double len);
+ double crossbar_ctrline();
+ double transmission_buf_ctrcap();
+
+
+
+ private:
+ double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi;
+ double flit_size;
+ double NTtr, PTtr;
+ double o_len;
+ TechnologyParameter::DeviceType *deviceType;
+ double TriS1, TriS2;
+ double min_w_pmos, Vdd;
+
+};
+
+#endif
diff --git a/ext/mcpat/cacti/area.cc b/ext/mcpat/cacti/area.cc
new file mode 100644
index 000000000..14ea4a9d7
--- /dev/null
+++ b/ext/mcpat/cacti/area.cc
@@ -0,0 +1,47 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+#include "area.h"
+#include "basic_circuit.h"
+#include "component.h"
+#include "decoder.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+
diff --git a/ext/mcpat/cacti/area.h b/ext/mcpat/cacti/area.h
new file mode 100644
index 000000000..7705e6250
--- /dev/null
+++ b/ext/mcpat/cacti/area.h
@@ -0,0 +1,71 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __AREA_H__
+#define __AREA_H__
+
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+
+using namespace std;
+
+class Area
+{
+ public:
+ double w;
+ double h;
+
+ Area():w(0), h(0), area(0) { }
+ double get_w() const { return w; }
+ double get_h() const { return h; }
+ double get_area() const
+ {
+ if (w == 0 && h == 0)
+ {
+ return area;
+ }
+ else
+ {
+ return w*h;
+ }
+ }
+ void set_w(double w_) { w = w_; }
+ void set_h(double h_) { h = h_; }
+ void set_area(double a_) { area = a_; }
+
+ private:
+ double area;
+};
+
+#endif
+
diff --git a/ext/mcpat/cacti/bank.cc b/ext/mcpat/cacti/bank.cc
new file mode 100755
index 000000000..a18c7f1ed
--- /dev/null
+++ b/ext/mcpat/cacti/bank.cc
@@ -0,0 +1,198 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <iostream>
+
+#include "bank.h"
+
+Bank::Bank(const DynamicParameter & dyn_p):
+ dp(dyn_p), mat(dp),
+ num_addr_b_mat(dyn_p.number_addr_bits_mat),
+ num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
+{
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
+
+ if (dp.use_inp_params)
+ {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ }
+ else
+ {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+ }
+
+ int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
+ int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ int searchinbits;
+ int searchoutbits;
+
+ if (dp.fully_assoc || dp.pure_cam)
+ {
+ datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ searchinbits = dp.num_si_b_bank_per_port * SCHP;
+ searchoutbits = dp.num_so_b_bank_per_port * SCHP;
+ }
+
+ if (!(dp.fully_assoc || dp.pure_cam))
+ {
+ if (g_ip->fast_access && dp.is_tag == false)
+ {
+ dataoutbits *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
+ htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
+ htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+
+// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
+// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ }
+ else
+ {
+ htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
+ htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
+ htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+ htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
+ htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ }
+
+ num_addr_b_row_dec = _log2(mat.subarray.num_rows);
+ num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
+ num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
+}
+
+
+
+Bank::~Bank()
+{
+ delete htree_in_add;
+ delete htree_out_data;
+ delete htree_in_data;
+ if (dp.fully_assoc || dp.pure_cam)
+ {
+ delete htree_in_search;
+ delete htree_out_search;
+ }
+}
+
+
+
+double Bank::compute_delays(double inrisetime)
+{
+ return mat.compute_delays(inrisetime);
+}
+
+
+
+void Bank::compute_power_energy()
+{
+ mat.compute_power_energy();
+
+ if (!(dp.fully_assoc || dp.pure_cam))
+ {
+ power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ }
+ else
+ {
+
+ power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+
+ power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
+ power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
+ mat.power_sa.searchOp.dynamic +
+ mat.power_bitline.searchOp.dynamic +
+ mat.power_subarray_out_drv.searchOp.dynamic+
+ mat.ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+
+ power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
+ power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
+
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.leakage += htree_in_search->power.readOp.leakage;
+ power.readOp.leakage += htree_out_search->power.readOp.leakage;
+
+
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
+
+ }
+
+}
+
diff --git a/ext/mcpat/cacti/bank.h b/ext/mcpat/cacti/bank.h
new file mode 100755
index 000000000..153609ab0
--- /dev/null
+++ b/ext/mcpat/cacti/bank.h
@@ -0,0 +1,69 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __BANK_H__
+#define __BANK_H__
+
+#include "component.h"
+#include "decoder.h"
+#include "htree2.h"
+#include "mat.h"
+
+class Bank : public Component
+{
+ public:
+ Bank(const DynamicParameter & dyn_p);
+ ~Bank();
+ double compute_delays(double inrisetime); // return outrisetime
+ void compute_power_energy();
+
+ const DynamicParameter & dp;
+ Mat mat;
+ Htree2 *htree_in_add;
+ Htree2 *htree_in_data;
+ Htree2 *htree_out_data;
+ Htree2 *htree_in_search;
+ Htree2 *htree_out_search;
+
+ int num_addr_b_mat;
+ int num_mats_hor_dir;
+ int num_mats_ver_dir;
+
+ int num_addr_b_row_dec;
+ int num_addr_b_routed_to_mat_for_act;
+ int num_addr_b_routed_to_mat_for_rd_or_wr;
+};
+
+
+
+#endif
diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc
new file mode 100644
index 000000000..6efd5dd27
--- /dev/null
+++ b/ext/mcpat/cacti/basic_circuit.cc
@@ -0,0 +1,829 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+#include "basic_circuit.h"
+#include "parameter.h"
+
+uint32_t _log2(uint64_t num)
+{
+ uint32_t log2 = 0;
+
+ if (num == 0)
+ {
+ std::cerr << "log0?" << std::endl;
+ exit(1);
+ }
+
+ while (num > 1)
+ {
+ num = (num >> 1);
+ log2++;
+ }
+
+ return log2;
+}
+
+
+bool is_pow2(int64_t val)
+{
+ if (val <= 0)
+ {
+ return false;
+ }
+ else if (val == 1)
+ {
+ return true;
+ }
+ else
+ {
+ return (_log2(val) != _log2(val-1));
+ }
+}
+
+
+int powers (int base, int n)
+{
+ int i, p;
+
+ p = 1;
+ for (i = 1; i <= n; ++i)
+ p *= base;
+ return p;
+}
+
+/*----------------------------------------------------------------------*/
+
+double logtwo (double x)
+{
+ assert(x > 0);
+ return ((double) (log (x) / log (2.0)));
+}
+
+/*----------------------------------------------------------------------*/
+
+
+double gate_C(
+ double width,
+ double wirelength,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ const TechnologyParameter::DeviceType * dt;
+
+ if (_is_dram && _is_cell)
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if (_is_dram && _is_wl_tr)
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if (!_is_dram && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+}
+
+
+// returns gate capacitance in Farads
+// actually this function is the same as gate_C() now
+double gate_C_pass(
+ double width, // gate width in um (length is Lphy_periph_global)
+ double wirelength, // poly wire length going to gate in lambda
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ // v5.0
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if ((!_is_dram) && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+}
+
+
+
+double drain_C_(
+ double width,
+ int nchannel,
+ int stack,
+ int next_arg_thresh_folding_width_or_height_cell,
+ double fold_dimension,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ double w_folded_tr;
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; // DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; // DRAM wordline transistor
+ }
+ else if ((!_is_dram) && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ double c_junc_area = dt->C_junc;
+ double c_junc_sidewall = dt->C_junc_sidewall;
+ double c_fringe = 2*dt->C_fringe;
+ double c_overlap = 2*dt->C_overlap;
+ double drain_C_metal_connecting_folded_tr = 0;
+
+ // determine the width of the transistor after folding (if it is getting folded)
+ if (next_arg_thresh_folding_width_or_height_cell == 0)
+ { // interpret fold_dimension as the the folding width threshold
+ // i.e. the value of transistor width above which the transistor gets folded
+ w_folded_tr = fold_dimension;
+ }
+ else
+ { // interpret fold_dimension as the height of the cell that this transistor is part of.
+ double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
+ // TODO : w_folded_tr must come from Component::compute_gate_area()
+ double ratio_p_to_n = 2.0 / (2.0 + 1.0);
+ if (nchannel)
+ {
+ w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ }
+ else
+ {
+ w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ }
+ }
+ int num_folded_tr = (int) (ceil(width / w_folded_tr));
+
+ if (num_folded_tr < 2)
+ {
+ w_folded_tr = width;
+ }
+
+ double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
+ (stack - 1) * g_tp.spacing_poly_to_poly;
+ double drain_h_for_sidewall = w_folded_tr;
+ double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
+ if (num_folded_tr > 1)
+ {
+ total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
+ (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
+
+ if (num_folded_tr%2 == 0)
+ {
+ drain_h_for_sidewall = 0;
+ }
+ total_drain_height_for_cap_wrt_gate *= num_folded_tr;
+ drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
+ }
+
+ double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
+ double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
+ double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
+
+ return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
+}
+
+
+double tr_R_on(
+ double width,
+ int nchannel,
+ int stack,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if ((!_is_dram) && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (stack * restrans / width);
+}
+
+
+/* This routine operates in reverse: given a resistance, it finds
+ * the transistor width that would have this R. It is used in the
+ * data wordline to estimate the wordline driver size. */
+
+// returns width in um
+double R_to_w(
+ double res,
+ int nchannel,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if ((!_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (restrans / res);
+}
+
+
+double pmos_to_nmos_sz_ratio(
+ bool _is_dram,
+ bool _is_wl_tr)
+{
+ double p_to_n_sizing_ratio;
+ if ((_is_dram) && (_is_wl_tr))
+ { //DRAM wordline transistor
+ p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
+ }
+ return p_to_n_sizing_ratio;
+}
+
+
+// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
+double horowitz(
+ double inputramptime, // input rise time
+ double tf, // time constant of gate
+ double vs1, // threshold voltage
+ double vs2, // threshold voltage
+ int rise) // whether input rises or fall
+{
+ if (inputramptime == 0 && vs1 == vs2)
+ {
+ return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
+ }
+ double a, b, td;
+
+ a = inputramptime / tf;
+ if (rise == RISE)
+ {
+ b = 0.5;
+ td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
+ }
+ else
+ {
+ b = 0.4;
+ td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
+ }
+ return (td);
+}
+
+double cmos_Ileak(
+ double nWidth,
+ double pWidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
+}
+
+
+double simplified_nmos_leakage(
+ double nwidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nwidth * dt->I_off_n;
+}
+
+int factorial(int n, int m)
+{
+ int fa = m, i;
+ for (i=m+1; i<=n; i++)
+ fa *=i;
+ return fa;
+}
+
+int combination(int n, int m)
+{
+ int ret;
+ ret = factorial(n, m+1) / factorial(n - m);
+ return ret;
+}
+
+double simplified_pmos_leakage(
+ double pwidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pwidth * dt->I_off_p;
+}
+
+double cmos_Ig_n(
+ double nWidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_g_on_n;
+}
+
+double cmos_Ig_p(
+ double pWidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pWidth*dt->I_g_on_p;
+}
+
+double cmos_Isub_leakage(
+ double nWidth,
+ double pWidth,
+ int fanin,
+ enum Gate_type g_type,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr,
+ enum Half_net_topology topo)
+{
+ assert (fanin>=1);
+ double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Isub=0;
+ int num_states;
+ int num_off_tx;
+
+ num_states = int(pow(2.0, fanin));
+
+ switch (g_type)
+ {
+ case nmos:
+ if (fanin==1)
+ {
+ Isub = nmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
+ }
+ else
+ {
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
+ {
+ //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub /=num_states;
+ }
+
+ }
+ break;
+ case pmos:
+ if (fanin==1)
+ {
+ Isub = pmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
+ }
+ else
+ {
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
+ {
+ //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub /=num_states;
+ }
+
+ }
+ break;
+ case inv:
+ Isub = (nmos_leak + pmos_leak)/2;
+ break;
+ case nand:
+ Isub += fanin*pmos_leak;//the pullup network
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
+ {
+ //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub /=num_states;
+ break;
+ case nor:
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
+ {
+ //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub += fanin*nmos_leak;//the pulldown network
+ Isub /=num_states;
+ break;
+ case tri:
+ Isub += (nmos_leak + pmos_leak)/2;//enabled
+ Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
+ Isub /=2;
+ break;
+ case tg:
+ Isub = (nmos_leak + pmos_leak)/2;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return Isub;
+}
+
+
+double cmos_Ig_leakage(
+ double nWidth,
+ double pWidth,
+ int fanin,
+ enum Gate_type g_type,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr,
+ enum Half_net_topology topo)
+{
+ assert (fanin>=1);
+ double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Ig_on=0;
+ int num_states;
+ int num_on_tx;
+
+ num_states = int(pow(2.0, fanin));
+
+ switch (g_type)
+ {
+ case nmos:
+ if (fanin==1)
+ {
+ Ig_on = nmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
+ {
+ Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ }
+ }
+ else
+ {
+ Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ //num_on_tx is the number of on tx
+ for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
+ {
+ Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+ }
+ Ig_on /=num_states;
+ }
+ }
+ break;
+ case pmos:
+ if (fanin==1)
+ {
+ Ig_on = pmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
+ {
+ Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ }
+ }
+ else
+ {
+ Ig_on += pmos_leak * fanin;//pull down network when all TXs are on.
+ //num_on_tx is the number of on tx
+ for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
+ {
+ Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+ }
+ Ig_on /=num_states;
+ }
+ }
+ break;
+
+ case inv:
+ Ig_on = (nmos_leak + pmos_leak)/2;
+ break;
+ case nand:
+ //pull up network
+ for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
+ {
+ Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ }
+
+ //pull down network
+ Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ //num_on_tx is the number of on tx
+ for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
+ {
+ Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+ }
+ Ig_on /=num_states;
+ break;
+ case nor:
+ // num_on_tx is the number of on tx in pull up network
+ Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
+ for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)
+ {
+ Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;
+
+ }
+ //pull down network
+ for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
+ {
+ Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ }
+ Ig_on /=num_states;
+ break;
+ case tri:
+ Ig_on += (2*nmos_leak + 2*pmos_leak)/2;//enabled
+ Ig_on += (nmos_leak + pmos_leak)/2; //disabled upper bound of leakage power
+ Ig_on /=2;
+ break;
+ case tg:
+ Ig_on = (nmos_leak + pmos_leak)/2;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return Ig_on;
+}
+
+double shortcircuit_simple(
+ double vt,
+ double velocity_index,
+ double c_in,
+ double c_out,
+ double w_nmos,
+ double w_pmos,
+ double i_on_n,
+ double i_on_p,
+ double i_on_n_in,
+ double i_on_p_in,
+ double vdd)
+{
+
+ double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
+ double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+
+ fo_n = i_on_n/i_on_n_in;
+ fo_p = i_on_p/i_on_p_in;
+ fanout = c_out/c_in;
+ beta_ratio = i_on_p/i_on_n;
+ vt_to_vdd_ratio = vt/vdd;
+
+ //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+ p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+ p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
+// double t1, t2, t3, t4, t5;
+// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
+// t2=pow(velocity_index,2.0);
+// t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
+// t4=t1/t2/t3;
+// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
+
+ p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+ p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+
+// t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
+// t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+// t3=t1/t2;
+// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
+// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
+// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
+
+ p_short_circuit_discharge = p_short_circuit_discharge_low;
+ p_short_circuit_charge = p_short_circuit_charge_low;
+ p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
+
+ return (p_short_circuit);
+}
+
+double shortcircuit(
+ double vt,
+ double velocity_index,
+ double c_in,
+ double c_out,
+ double w_nmos,
+ double w_pmos,
+ double i_on_n,
+ double i_on_p,
+ double i_on_n_in,
+ double i_on_p_in,
+ double vdd)
+{
+
+ double p_short_circuit=0, p_short_circuit_discharge;//, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
+ double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+ double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
+
+ fo_n = i_on_n/i_on_n_in;
+ fo_p = i_on_p/i_on_p_in;
+ fanout = 1;
+ beta_ratio = i_on_p/i_on_n;
+ vt_to_vdd_ratio = vt/vdd;
+ e = 2.71828;
+ f_alpha = 1/(velocity_index+2) -velocity_index/(2*(velocity_index+3)) +velocity_index/(velocity_index+4)*(velocity_index/2-1);
+ k_v = 0.9/0.8+(vdd-vt)/0.8*log(10*(vdd-vt)/e);
+ g_v_alpha = (velocity_index + 1)*pow((1-velocity_index),velocity_index)*pow((1-velocity_index),velocity_index/2)/f_alpha/pow((1-velocity_index-velocity_index),(velocity_index/2+velocity_index+2));
+ h_v_alpha = pow(2, velocity_index)*(velocity_index+1)*pow((1-velocity_index),velocity_index)/pow((1-velocity_index-velocity_index),(velocity_index+1));
+
+ //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+// p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+// p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
+// double t1, t2, t3, t4, t5;
+// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
+// t2=pow(velocity_index,2.0);
+// t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
+// t4=t1/t2/t3;
+//
+// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
+//
+//
+// p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+// p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+//
+// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
+// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
+//
+// p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
+//
+// p_short_circuit = p_short_circuit_discharge;
+
+ p_short_circuit_discharge = k_v*vdd*vdd*c_in*fo_p*fo_p/((vdd-vt)*g_v_alpha*fanout*beta_ratio/2/k_v + h_v_alpha*fo_p);
+ return (p_short_circuit);
+}
diff --git a/ext/mcpat/cacti/basic_circuit.h b/ext/mcpat/cacti/basic_circuit.h
new file mode 100644
index 000000000..aaab6c0ea
--- /dev/null
+++ b/ext/mcpat/cacti/basic_circuit.h
@@ -0,0 +1,248 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __BASIC_CIRCUIT_H__
+#define __BASIC_CIRCUIT_H__
+
+#include "cacti_interface.h"
+#include "const.h"
+
+using namespace std;
+
+#define UNI_LEAK_STACK_FACTOR 0.43
+
+int powers (int base, int n);
+bool is_pow2(int64_t val);
+uint32_t _log2(uint64_t num);
+int factorial(int n, int m = 1);
+int combination(int n, int m);
+
+//#define DBG
+#ifdef DBG
+ #define PRINTDW(a);\
+ a;
+#else
+ #define PRINTDW(a);\
+
+#endif
+
+
+enum Wire_placement {
+ outside_mat,
+ inside_mat,
+ local_wires
+};
+
+
+
+enum Htree_type {
+ Add_htree,
+ Data_in_htree,
+ Data_out_htree,
+ Search_in_htree,
+ Search_out_htree,
+};
+
+enum Gate_type {
+ nmos,
+ pmos,
+ inv,
+ nand,
+ nor,
+ tri,
+ tg
+};
+
+enum Half_net_topology {
+ parallel,
+ series
+};
+
+double logtwo (double x);
+
+double gate_C(
+ double width,
+ double wirelength,
+ bool _is_dram = false,
+ bool _is_sram = false,
+ bool _is_wl_tr = false);
+
+double gate_C_pass(
+ double width,
+ double wirelength,
+ bool _is_dram = false,
+ bool _is_sram = false,
+ bool _is_wl_tr = false);
+
+double drain_C_(
+ double width,
+ int nchannel,
+ int stack,
+ int next_arg_thresh_folding_width_or_height_cell,
+ double fold_dimension,
+ bool _is_dram = false,
+ bool _is_sram = false,
+ bool _is_wl_tr = false);
+
+double tr_R_on(
+ double width,
+ int nchannel,
+ int stack,
+ bool _is_dram = false,
+ bool _is_sram = false,
+ bool _is_wl_tr = false);
+
+double R_to_w(
+ double res,
+ int nchannel,
+ bool _is_dram = false,
+ bool _is_sram = false,
+ bool _is_wl_tr = false);
+
+double horowitz (
+ double inputramptime,
+ double tf,
+ double vs1,
+ double vs2,
+ int rise);
+
+double pmos_to_nmos_sz_ratio(
+ bool _is_dram = false,
+ bool _is_wl_tr = false);
+
+double simplified_nmos_leakage(
+ double nwidth,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr = false);
+
+double simplified_pmos_leakage(
+ double pwidth,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr = false);
+
+
+double cmos_Ileak(
+ double nWidth,
+ double pWidth,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr = false);
+
+double cmos_Ig_n(
+ double nWidth,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr= false);
+
+double cmos_Ig_p(
+ double pWidth,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr= false);
+
+
+double cmos_Isub_leakage(
+ double nWidth,
+ double pWidth,
+ int fanin,
+ enum Gate_type g_type,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr = false,
+ enum Half_net_topology topo = series);
+
+double cmos_Ig_leakage(
+ double nWidth,
+ double pWidth,
+ int fanin,
+ enum Gate_type g_type,
+ bool _is_dram = false,
+ bool _is_cell = false,
+ bool _is_wl_tr = false,
+ enum Half_net_topology topo = series);
+
+double shortcircuit(
+ double vt,
+ double velocity_index,
+ double c_in,
+ double c_out,
+ double w_nmos,
+ double w_pmos,
+ double i_on_n,
+ double i_on_p,
+ double i_on_n_in,
+ double i_on_p_in,
+ double vdd);
+
+double shortcircuit_simple(
+ double vt,
+ double velocity_index,
+ double c_in,
+ double c_out,
+ double w_nmos,
+ double w_pmos,
+ double i_on_n,
+ double i_on_p,
+ double i_on_n_in,
+ double i_on_p_in,
+ double vdd);
+//set power point product mask; strictly speaking this is not real point product
+inline void set_pppm(
+ double * pppv,
+ double a=1,
+ double b=1,
+ double c=1,
+ double d=1
+ ){
+ pppv[0]= a;
+ pppv[1]= b;
+ pppv[2]= c;
+ pppv[3]= d;
+
+}
+
+inline void set_sppm(
+ double * sppv,
+ double a=1,
+ double b=1,
+ double c=1,
+ double d=1
+ ){
+ sppv[0]= a;
+ sppv[1]= b;
+ sppv[2]= c;
+}
+
+#endif
diff --git a/ext/mcpat/cacti/batch_tests b/ext/mcpat/cacti/batch_tests
new file mode 100755
index 000000000..45a03898e
--- /dev/null
+++ b/ext/mcpat/cacti/batch_tests
@@ -0,0 +1,41 @@
+rm -rf ./out.csv
+./cacti 8192 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 16384 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 32768 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 65536 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 131072 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 262144 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 524288 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 1048576 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 2097152 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 4194304 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 8388608 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 16777216 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 0 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 8192 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 16384 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 32768 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 65536 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 131072 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 262144 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 524288 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 1048576 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 2097152 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 4194304 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 8388608 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 16777216 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 3 0 0 0 1 1 1 1 0 0 0 1 1
+./cacti 8192 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 16384 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 32768 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 65536 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 131072 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 262144 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 524288 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 1048576 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 2097152 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 4194304 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 8388608 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 16777216 64 1 1 0 0 0 1 65 512 0 0 0 0 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 2097152 64 1 1 0 0 0 1 65 512 0 0 0 1 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 4194304 64 1 1 0 0 0 1 65 512 0 0 0 1 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 8388608 64 1 1 0 0 0 1 65 512 0 0 0 1 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
+./cacti 16777216 64 1 1 0 0 0 1 65 512 0 0 0 1 0 1 0 0 0 0 10 1000 1000 1000 1000 360 4 1 1 1 1 1 1 1 0 0 0 1 1
diff --git a/ext/mcpat/cacti/cache.cfg b/ext/mcpat/cacti/cache.cfg
new file mode 100755
index 000000000..03de34a13
--- /dev/null
+++ b/ext/mcpat/cacti/cache.cfg
@@ -0,0 +1,175 @@
+# Cache size
+//-size (bytes) 2048
+//-size (bytes) 4096
+//-size (bytes) 32768
+//-size (bytes) 262144
+//-size (bytes) 1048576
+//-size (bytes) 2097152
+//-size (bytes) 4194304
+//-size (bytes) 8388608
+//-size (bytes) 16777216
+//-size (bytes) 33554432
+//-size (bytes) 134217728
+//-size (bytes) 67108864
+-size (bytes) 1073741824
+
+# Line size
+//-block size (bytes) 8
+-block size (bytes) 64
+
+# To model Fully Associative cache, set associativity to zero
+//-associativity 0
+//-associativity 2
+//-associativity 4
+-associativity 8
+//-associativity 16
+
+-read-write port 1
+-exclusive read port 0
+-exclusive write port 0
+-single ended read ports 0
+
+# Multiple banks connected using a bus
+-UCA bank count 1
+-technology (u) 0.022
+//-technology (u) 0.040
+//-technology (u) 0.032
+//-technology (u) 0.090
+
+# following three parameters are meaningful only for main memories
+
+-page size (bits) 8192
+-burst length 8
+-internal prefetch width 8
+
+# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
+-Data array cell type - "itrs-hp"
+//-Data array cell type - "itrs-lstp"
+//-Data array cell type - "itrs-lop"
+
+# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
+-Data array peripheral type - "itrs-hp"
+//-Data array peripheral type - "itrs-lstp"
+//-Data array peripheral type - "itrs-lop"
+
+# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
+-Tag array cell type - "itrs-hp"
+//-Tag array cell type - "itrs-lstp"
+//-Tag array cell type - "itrs-lop"
+
+# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
+-Tag array peripheral type - "itrs-hp"
+//-Tag array peripheral type - "itrs-lstp"
+//-Tag array peripheral type - "itrs-lop
+
+# Bus width include data bits and address bits required by the decoder
+//-output/input bus width 16
+-output/input bus width 512
+
+// 300-400 in steps of 10
+-operating temperature (K) 360
+
+# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
+# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
+-cache type "cache"
+//-cache type "ram"
+//-cache type "main memory"
+
+# to model special structure like branch target buffers, directory, etc.
+# change the tag size parameter
+# if you want cacti to calculate the tagbits, set the tag size to "default"
+-tag size (b) "default"
+//-tag size (b) 22
+
+# fast - data and tag access happen in parallel
+# sequential - data array is accessed after accessing the tag array
+# normal - data array lookup and tag access happen in parallel
+# final data block is broadcasted in data array h-tree
+# after getting the signal from the tag array
+//-access mode (normal, sequential, fast) - "fast"
+-access mode (normal, sequential, fast) - "normal"
+//-access mode (normal, sequential, fast) - "sequential"
+
+
+# DESIGN OBJECTIVE for UCA (or banks in NUCA)
+-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
+
+# Percentage deviation from the minimum value
+# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
+# that compromises at most 10% delay.
+# NOTE: Try reasonable values for % deviation. Inconsistent deviation
+# percentage values will not produce any valid organizations. For example,
+# 0:0:100:100:100 will try to identify an organization that has both
+# least delay and dynamic power. Since such an organization is not possible, CACTI will
+# throw an error. Refer CACTI-6 Technical report for more details
+-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
+
+# Objective for NUCA
+-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
+-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
+
+# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
+# energy-delay or energy-delay sq. product
+# Note: Optimize tag will disable weight or deviate values mentioned above
+# Set it to NONE to let weight and deviate values determine the
+# appropriate cache configuration
+//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
+-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
+//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
+
+-Cache model (NUCA, UCA) - "UCA"
+//-Cache model (NUCA, UCA) - "NUCA"
+
+# In order for CACTI to find the optimal NUCA bank value the following
+# variable should be assigned 0.
+-NUCA bank count 0
+
+# NOTE: for nuca network frequency is set to a default value of
+# 5GHz in time.c. CACTI automatically
+# calculates the maximum possible frequency and downgrades this value if necessary
+
+# By default CACTI considers both full-swing and low-swing
+# wires to find an optimal configuration. However, it is possible to
+# restrict the search space by changing the signalling from "default" to
+# "fullswing" or "lowswing" type.
+//-Wire signalling (fullswing, lowswing, default) - "Global_10"
+-Wire signalling (fullswing, lowswing, default) - "default"
+//-Wire signalling (fullswing, lowswing, default) - "lowswing"
+
+//-Wire inside mat - "global"
+-Wire inside mat - "semi-global"
+//-Wire outside mat - "global"
+-Wire outside mat - "semi-global"
+
+//-Interconnect projection - "conservative"
+-Interconnect projection - "aggressive"
+
+# Contention in network (which is a function of core count and cache level) is one of
+# the critical factor used for deciding the optimal bank count value
+# core count can be 4, 8, or 16
+//-Core count 4
+-Core count 8
+//-Core count 16
+-Cache level (L2/L3) - "L3"
+
+-Add ECC - "true"
+
+//-Print level (DETAILED, CONCISE) - "CONCISE"
+-Print level (DETAILED, CONCISE) - "DETAILED"
+
+# for debugging
+//-Print input parameters - "true"
+-Print input parameters - "false"
+# force CACTI to model the cache with the
+# following Ndbl, Ndwl, Nspd, Ndsam,
+# and Ndcm values
+//-Force cache config - "true"
+-Force cache config - "false"
+-Ndwl 1
+-Ndbl 1
+-Nspd 0
+-Ndcm 1
+-Ndsam1 0
+-Ndsam2 0
+
+
diff --git a/ext/mcpat/cacti/cacti.i b/ext/mcpat/cacti/cacti.i
new file mode 100644
index 000000000..796413872
--- /dev/null
+++ b/ext/mcpat/cacti/cacti.i
@@ -0,0 +1,8 @@
+%module cacti
+%{
+/* Includes the header in the wrapper code */
+#include "cacti_interface.h"
+%}
+
+/* Parse the header file to generate wrappers */
+%include "cacti_interface.h" \ No newline at end of file
diff --git a/ext/mcpat/cacti/cacti.mk b/ext/mcpat/cacti/cacti.mk
new file mode 100644
index 000000000..4d6de8db8
--- /dev/null
+++ b/ext/mcpat/cacti/cacti.mk
@@ -0,0 +1,51 @@
+TARGET = cacti
+SHELL = /bin/sh
+.PHONY: all depend clean
+.SUFFIXES: .cc .o
+
+ifndef NTHREADS
+ NTHREADS = 8
+endif
+
+
+LIBS =
+INCS = -lm
+
+ifeq ($(TAG),dbg)
+ DBG = -Wall
+ OPT = -ggdb -g -O0 -DNTHREADS=1 -gstabs+
+else
+ DBG =
+ OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS)
+endif
+
+#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
+CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
+CXX = g++ -m32
+CC = gcc -m32
+
+SRCS = area.cc bank.cc mat.cc main.cc Ucache.cc io.cc technology.cc basic_circuit.cc parameter.cc \
+ decoder.cc component.cc uca.cc subarray.cc wire.cc htree2.cc \
+ cacti_interface.cc router.cc nuca.cc crossbar.cc arbiter.cc
+
+OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
+PYTHONLIB_SRCS = $(patsubst main.cc, ,$(SRCS)) obj_$(TAG)/cacti_wrap.cc
+PYTHONLIB_OBJS = $(patsubst %.cc,%.o,$(PYTHONLIB_SRCS))
+INCLUDES = -I /usr/include/python2.4 -I /usr/lib/python2.4/config
+
+all: obj_$(TAG)/$(TARGET)
+ cp -f obj_$(TAG)/$(TARGET) $(TARGET)
+
+obj_$(TAG)/$(TARGET) : $(OBJS)
+ $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
+
+#obj_$(TAG)/%.o : %.cc
+# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
+
+obj_$(TAG)/%.o : %.cc
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
+clean:
+ -rm -f *.o _cacti.so cacti.py $(TARGET)
+
+
diff --git a/ext/mcpat/cacti/cacti_interface.cc b/ext/mcpat/cacti/cacti_interface.cc
new file mode 100644
index 000000000..b6d0d13de
--- /dev/null
+++ b/ext/mcpat/cacti/cacti_interface.cc
@@ -0,0 +1,173 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include <pthread.h>
+
+#include <algorithm>
+#include <cmath>
+#include <ctime>
+#include <iostream>
+
+#include "Ucache.h"
+#include "area.h"
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "const.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+bool mem_array::lt(const mem_array * m1, const mem_array * m2)
+{
+ if (m1->Nspd < m2->Nspd) return true;
+ else if (m1->Nspd > m2->Nspd) return false;
+ else if (m1->Ndwl < m2->Ndwl) return true;
+ else if (m1->Ndwl > m2->Ndwl) return false;
+ else if (m1->Ndbl < m2->Ndbl) return true;
+ else if (m1->Ndbl > m2->Ndbl) return false;
+ else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
+ else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
+ else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
+ else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
+ else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
+ else return false;
+}
+
+
+
+void uca_org_t::find_delay()
+{
+ mem_array * data_arr = data_array2;
+ mem_array * tag_arr = tag_array2;
+
+ // check whether it is a regular cache or scratch ram
+ if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
+ {
+ access_time = data_arr->access_time;
+ }
+ // Both tag and data lookup happen in parallel
+ // and the entire set is sent over the data array h-tree without
+ // waiting for the way-select signal --TODO add the corresponding
+ // power overhead Nav
+ else if (g_ip->fast_access == true)
+ {
+ access_time = MAX(tag_arr->access_time, data_arr->access_time);
+ }
+ // Tag is accessed first. On a hit, way-select signal along with the
+ // address is sent to read/write the appropriate block in the data
+ // array
+ else if (g_ip->is_seq_acc == true)
+ {
+ access_time = tag_arr->access_time + data_arr->access_time;
+ }
+ // Normal access: tag array access and data array access happen in parallel.
+ // But, the data array will wait for the way-select and transfer only the
+ // appropriate block over the h-tree.
+ else
+ {
+ access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
+ data_arr->delay_before_subarray_output_driver) +
+ data_arr->delay_from_subarray_output_driver_to_output;
+ }
+}
+
+
+
+void uca_org_t::find_energy()
+{
+ if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
+ power = data_array2->power + tag_array2->power;
+ else
+ power = data_array2->power;
+}
+
+
+
+void uca_org_t::find_area()
+{
+ if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
+ {
+ cache_ht = data_array2->height;
+ cache_len = data_array2->width;
+ }
+ else
+ {
+ cache_ht = MAX(tag_array2->height, data_array2->height);
+ cache_len = tag_array2->width + data_array2->width;
+ }
+ area = cache_ht * cache_len;
+}
+
+void uca_org_t::adjust_area()
+{
+ double area_adjust;
+ if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
+ {
+ if (data_array2->area_efficiency/100.0<0.2)
+ {
+ //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
+ area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
+ cache_ht = cache_ht/area_adjust;
+ cache_len = cache_len/area_adjust;
+ }
+ }
+ area = cache_ht * cache_len;
+}
+
+void uca_org_t::find_cyc()
+{
+ if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
+ {
+ cycle_time = data_array2->cycle_time;
+ }
+ else
+ {
+ cycle_time = MAX(tag_array2->cycle_time,
+ data_array2->cycle_time);
+ }
+}
+
+uca_org_t :: uca_org_t()
+:tag_array2(0),
+ data_array2(0)
+{
+
+}
+
+void uca_org_t :: cleanup()
+{
+ if (data_array2!=0)
+ delete data_array2;
+ if (tag_array2!=0)
+ delete tag_array2;
+}
diff --git a/ext/mcpat/cacti/cacti_interface.h b/ext/mcpat/cacti/cacti_interface.h
new file mode 100644
index 000000000..f37596554
--- /dev/null
+++ b/ext/mcpat/cacti/cacti_interface.h
@@ -0,0 +1,633 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __CACTI_INTERFACE_H__
+#define __CACTI_INTERFACE_H__
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "const.h"
+
+using namespace std;
+
+
+class min_values_t;
+class mem_array;
+class uca_org_t;
+
+
+class powerComponents
+{
+ public:
+ double dynamic;
+ double leakage;
+ double gate_leakage;
+ double short_circuit;
+ double longer_channel_leakage;
+
+ powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
+ powerComponents(const powerComponents & obj) { *this = obj; }
+ powerComponents & operator=(const powerComponents & rhs)
+ {
+ dynamic = rhs.dynamic;
+ leakage = rhs.leakage;
+ gate_leakage = rhs.gate_leakage;
+ short_circuit = rhs.short_circuit;
+ longer_channel_leakage = rhs.longer_channel_leakage;
+ return *this;
+ }
+ void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
+
+ friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
+ friend powerComponents operator*(const powerComponents & x, double const * const y);
+};
+
+
+
+class powerDef
+{
+ public:
+ powerComponents readOp;
+ powerComponents writeOp;
+ powerComponents searchOp;//Sheng: for CAM and FA
+
+ powerDef() : readOp(), writeOp(), searchOp() { }
+ void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
+
+ friend powerDef operator+(const powerDef & x, const powerDef & y);
+ friend powerDef operator*(const powerDef & x, double const * const y);
+};
+
+enum Wire_type
+{
+ Global /* gloabl wires with repeaters */,
+ Global_5 /* 5% delay penalty */,
+ Global_10 /* 10% delay penalty */,
+ Global_20 /* 20% delay penalty */,
+ Global_30 /* 30% delay penalty */,
+ Low_swing /* differential low power wires with high area overhead */,
+ Semi_global /* mid-level wires with repeaters*/,
+ Transmission /* tranmission lines with high area overhead */,
+ Optical /* optical wires */,
+ Invalid_wtype
+};
+
+
+
+class InputParameter
+{
+ public:
+ void parse_cfg(const string & infile);
+
+ bool error_checking(); // return false if the input parameters are problematic
+ void display_ip();
+
+ unsigned int cache_sz; // in bytes
+ unsigned int line_sz;
+ unsigned int assoc;
+ unsigned int nbanks;
+ unsigned int out_w;// == nr_bits_out
+ bool specific_tag;
+ unsigned int tag_w;
+ unsigned int access_mode;
+ unsigned int obj_func_dyn_energy;
+ unsigned int obj_func_dyn_power;
+ unsigned int obj_func_leak_power;
+ unsigned int obj_func_cycle_t;
+
+ double F_sz_nm; // feature size in nm
+ double F_sz_um; // feature size in um
+ unsigned int num_rw_ports;
+ unsigned int num_rd_ports;
+ unsigned int num_wr_ports;
+ unsigned int num_se_rd_ports; // number of single ended read ports
+ unsigned int num_search_ports; // Sheng: number of search ports for CAM
+ bool is_main_mem;
+ bool is_cache;
+ bool pure_ram;
+ bool pure_cam;
+ bool rpters_in_htree; // if there are repeaters in htree segment
+ unsigned int ver_htree_wires_over_array;
+ unsigned int broadcast_addr_din_over_ver_htrees;
+ unsigned int temp;
+
+ unsigned int ram_cell_tech_type;
+ unsigned int peri_global_tech_type;
+ unsigned int data_arr_ram_cell_tech_type;
+ unsigned int data_arr_peri_global_tech_type;
+ unsigned int tag_arr_ram_cell_tech_type;
+ unsigned int tag_arr_peri_global_tech_type;
+
+ unsigned int burst_len;
+ unsigned int int_prefetch_w;
+ unsigned int page_sz_bits;
+
+ unsigned int ic_proj_type; // interconnect_projection_type
+ unsigned int wire_is_mat_type; // wire_inside_mat_type
+ unsigned int wire_os_mat_type; // wire_outside_mat_type
+ enum Wire_type wt;
+ int force_wiretype;
+ bool print_input_args;
+ unsigned int nuca_cache_sz; // TODO
+ int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm;
+ bool force_cache_config;
+
+ int cache_level;
+ int cores;
+ int nuca_bank_count;
+ int force_nuca_bank;
+
+ int delay_wt, dynamic_power_wt, leakage_power_wt,
+ cycle_time_wt, area_wt;
+ int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
+ cycle_time_wt_nuca, area_wt_nuca;
+
+ int delay_dev, dynamic_power_dev, leakage_power_dev,
+ cycle_time_dev, area_dev;
+ int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
+ cycle_time_dev_nuca, area_dev_nuca;
+ int ed; //ED or ED2 optimization
+ int nuca;
+
+ bool fast_access;
+ unsigned int block_sz; // bytes
+ unsigned int tag_assoc;
+ unsigned int data_assoc;
+ bool is_seq_acc;
+ bool fully_assoc;
+ unsigned int nsets; // == number_of_sets
+ int print_detail;
+
+
+ bool add_ecc_b_;
+ //parameters for design constraint
+ double throughput;
+ double latency;
+ bool pipelinable;
+ int pipeline_stages;
+ int per_stage_vector;
+ bool with_clock_grid;
+};
+
+
+typedef struct{
+ int Ndwl;
+ int Ndbl;
+ double Nspd;
+ int deg_bl_muxing;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ int number_activated_mats_horizontal_direction;
+ int number_subbanks;
+ int page_size_in_bits;
+ double delay_route_to_bank;
+ double delay_crossbar;
+ double delay_addr_din_horizontal_htree;
+ double delay_addr_din_vertical_htree;
+ double delay_row_predecode_driver_and_block;
+ double delay_row_decoder;
+ double delay_bitlines;
+ double delay_sense_amp;
+ double delay_subarray_output_driver;
+ double delay_bit_mux_predecode_driver_and_block;
+ double delay_bit_mux_decoder;
+ double delay_senseamp_mux_lev_1_predecode_driver_and_block;
+ double delay_senseamp_mux_lev_1_decoder;
+ double delay_senseamp_mux_lev_2_predecode_driver_and_block;
+ double delay_senseamp_mux_lev_2_decoder;
+ double delay_input_htree;
+ double delay_output_htree;
+ double delay_dout_vertical_htree;
+ double delay_dout_horizontal_htree;
+ double delay_comparator;
+ double access_time;
+ double cycle_time;
+ double multisubbank_interleave_cycle_time;
+ double delay_request_network;
+ double delay_inside_mat;
+ double delay_reply_network;
+ double trcd;
+ double cas_latency;
+ double precharge_delay;
+ powerDef power_routing_to_bank;
+ powerDef power_addr_input_htree;
+ powerDef power_data_input_htree;
+ powerDef power_data_output_htree;
+ powerDef power_addr_horizontal_htree;
+ powerDef power_datain_horizontal_htree;
+ powerDef power_dataout_horizontal_htree;
+ powerDef power_addr_vertical_htree;
+ powerDef power_datain_vertical_htree;
+ powerDef power_row_predecoder_drivers;
+ powerDef power_row_predecoder_blocks;
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_predecoder_drivers;
+ powerDef power_bit_mux_predecoder_blocks;
+ powerDef power_bit_mux_decoders;
+ powerDef power_senseamp_mux_lev_1_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_1_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_1_decoders;
+ powerDef power_senseamp_mux_lev_2_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_2_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_2_decoders;
+ powerDef power_bitlines;
+ powerDef power_sense_amps;
+ powerDef power_prechg_eq_drivers;
+ powerDef power_output_drivers_at_subarray;
+ powerDef power_dataout_vertical_htree;
+ powerDef power_comparators;
+ powerDef power_crossbar;
+ powerDef total_power;
+ double area;
+ double all_banks_height;
+ double all_banks_width;
+ double bank_height;
+ double bank_width;
+ double subarray_memory_cell_area_height;
+ double subarray_memory_cell_area_width;
+ double mat_height;
+ double mat_width;
+ double routing_area_height_within_bank;
+ double routing_area_width_within_bank;
+ double area_efficiency;
+// double perc_power_dyn_routing_to_bank;
+// double perc_power_dyn_addr_horizontal_htree;
+// double perc_power_dyn_datain_horizontal_htree;
+// double perc_power_dyn_dataout_horizontal_htree;
+// double perc_power_dyn_addr_vertical_htree;
+// double perc_power_dyn_datain_vertical_htree;
+// double perc_power_dyn_row_predecoder_drivers;
+// double perc_power_dyn_row_predecoder_blocks;
+// double perc_power_dyn_row_decoders;
+// double perc_power_dyn_bit_mux_predecoder_drivers;
+// double perc_power_dyn_bit_mux_predecoder_blocks;
+// double perc_power_dyn_bit_mux_decoders;
+// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
+// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
+// double perc_power_dyn_senseamp_mux_lev_1_decoders;
+// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
+// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
+// double perc_power_dyn_senseamp_mux_lev_2_decoders;
+// double perc_power_dyn_bitlines;
+// double perc_power_dyn_sense_amps;
+// double perc_power_dyn_prechg_eq_drivers;
+// double perc_power_dyn_subarray_output_drivers;
+// double perc_power_dyn_dataout_vertical_htree;
+// double perc_power_dyn_comparators;
+// double perc_power_dyn_crossbar;
+// double perc_power_dyn_spent_outside_mats;
+// double perc_power_leak_routing_to_bank;
+// double perc_power_leak_addr_horizontal_htree;
+// double perc_power_leak_datain_horizontal_htree;
+// double perc_power_leak_dataout_horizontal_htree;
+// double perc_power_leak_addr_vertical_htree;
+// double perc_power_leak_datain_vertical_htree;
+// double perc_power_leak_row_predecoder_drivers;
+// double perc_power_leak_row_predecoder_blocks;
+// double perc_power_leak_row_decoders;
+// double perc_power_leak_bit_mux_predecoder_drivers;
+// double perc_power_leak_bit_mux_predecoder_blocks;
+// double perc_power_leak_bit_mux_decoders;
+// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
+// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
+// double perc_power_leak_senseamp_mux_lev_1_decoders;
+// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
+// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
+// double perc_power_leak_senseamp_mux_lev_2_decoders;
+// double perc_power_leak_bitlines;
+// double perc_power_leak_sense_amps;
+// double perc_power_leak_prechg_eq_drivers;
+// double perc_power_leak_subarray_output_drivers;
+// double perc_power_leak_dataout_vertical_htree;
+// double perc_power_leak_comparators;
+// double perc_power_leak_crossbar;
+// double perc_leak_mats;
+// double perc_active_mats;
+ double refresh_power;
+ double dram_refresh_period;
+ double dram_array_availability;
+ double dyn_read_energy_from_closed_page;
+ double dyn_read_energy_from_open_page;
+ double leak_power_subbank_closed_page;
+ double leak_power_subbank_open_page;
+ double leak_power_request_and_reply_networks;
+ double activate_energy;
+ double read_energy;
+ double write_energy;
+ double precharge_energy;
+} results_mem_array;
+
+
+class uca_org_t
+{
+ public:
+ mem_array * tag_array2;
+ mem_array * data_array2;
+ double access_time;
+ double cycle_time;
+ double area;
+ double area_efficiency;
+ powerDef power;
+ double leak_power_with_sleep_transistors_in_mats;
+ double cache_ht;
+ double cache_len;
+ char file_n[100];
+ double vdd_periph_global;
+ bool valid;
+ results_mem_array tag_array;
+ results_mem_array data_array;
+
+ uca_org_t();
+ void find_delay();
+ void find_energy();
+ void find_area();
+ void find_cyc();
+ void adjust_area();//for McPAT only to adjust routing overhead
+ void cleanup();
+ ~uca_org_t(){};
+};
+
+void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
+
+uca_org_t cacti_interface(const string & infile_name);
+//McPAT's plain interface, please keep !!!
+uca_org_t cacti_interface(InputParameter * const local_interface);
+//McPAT's plain interface, please keep !!!
+uca_org_t init_interface(InputParameter * const local_interface);
+//McPAT's plain interface, please keep !!!
+uca_org_t cacti_interface(
+ int cache_size,
+ int line_size,
+ int associativity,
+ int rw_ports,
+ int excl_read_ports,
+ int excl_write_ports,
+ int single_ended_read_ports,
+ int search_ports,
+ int banks,
+ double tech_node,
+ int output_width,
+ int specific_tag,
+ int tag_width,
+ int access_mode,
+ int cache,
+ int main_mem,
+ int obj_func_delay,
+ int obj_func_dynamic_power,
+ int obj_func_leakage_power,
+ int obj_func_cycle_time,
+ int obj_func_area,
+ int dev_func_delay,
+ int dev_func_dynamic_power,
+ int dev_func_leakage_power,
+ int dev_func_area,
+ int dev_func_cycle_time,
+ int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
+ int temp,
+ int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
+ int data_arr_ram_cell_tech_flavor_in,
+ int data_arr_peri_global_tech_flavor_in,
+ int tag_arr_ram_cell_tech_flavor_in,
+ int tag_arr_peri_global_tech_flavor_in,
+ int interconnect_projection_type_in,
+ int wire_inside_mat_type_in,
+ int wire_outside_mat_type_in,
+ int REPEATERS_IN_HTREE_SEGMENTS_in,
+ int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
+ int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
+ int PAGE_SIZE_BITS_in,
+ int BURST_LENGTH_in,
+ int INTERNAL_PREFETCH_WIDTH_in,
+ int force_wiretype,
+ int wiretype,
+ int force_config,
+ int ndwl,
+ int ndbl,
+ int nspd,
+ int ndcm,
+ int ndsam1,
+ int ndsam2,
+ int ecc);
+// int cache_size,
+// int line_size,
+// int associativity,
+// int rw_ports,
+// int excl_read_ports,
+// int excl_write_ports,
+// int single_ended_read_ports,
+// int banks,
+// double tech_node,
+// int output_width,
+// int specific_tag,
+// int tag_width,
+// int access_mode,
+// int cache,
+// int main_mem,
+// int obj_func_delay,
+// int obj_func_dynamic_power,
+// int obj_func_leakage_power,
+// int obj_func_area,
+// int obj_func_cycle_time,
+// int dev_func_delay,
+// int dev_func_dynamic_power,
+// int dev_func_leakage_power,
+// int dev_func_area,
+// int dev_func_cycle_time,
+// int temp,
+// int data_arr_ram_cell_tech_flavor_in,
+// int data_arr_peri_global_tech_flavor_in,
+// int tag_arr_ram_cell_tech_flavor_in,
+// int tag_arr_peri_global_tech_flavor_in,
+// int interconnect_projection_type_in,
+// int wire_inside_mat_type_in,
+// int wire_outside_mat_type_in,
+// int REPEATERS_IN_HTREE_SEGMENTS_in,
+// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
+// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
+//// double MAXAREACONSTRAINT_PERC_in,
+//// double MAXACCTIMECONSTRAINT_PERC_in,
+//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
+// int PAGE_SIZE_BITS_in,
+// int BURST_LENGTH_in,
+// int INTERNAL_PREFETCH_WIDTH_in);
+
+//Naveen's interface
+uca_org_t cacti_interface(
+ int cache_size,
+ int line_size,
+ int associativity,
+ int rw_ports,
+ int excl_read_ports,
+ int excl_write_ports,
+ int single_ended_read_ports,
+ int banks,
+ double tech_node,
+ int page_sz,
+ int burst_length,
+ int pre_width,
+ int output_width,
+ int specific_tag,
+ int tag_width,
+ int access_mode, //0 normal, 1 seq, 2 fast
+ int cache, //scratch ram or cache
+ int main_mem,
+ int obj_func_delay,
+ int obj_func_dynamic_power,
+ int obj_func_leakage_power,
+ int obj_func_area,
+ int obj_func_cycle_time,
+ int dev_func_delay,
+ int dev_func_dynamic_power,
+ int dev_func_leakage_power,
+ int dev_func_area,
+ int dev_func_cycle_time,
+ int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
+ int temp,
+ int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
+ int data_arr_ram_cell_tech_flavor_in,
+ int data_arr_peri_global_tech_flavor_in,
+ int tag_arr_ram_cell_tech_flavor_in,
+ int tag_arr_peri_global_tech_flavor_in,
+ int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
+ int wire_inside_mat_type_in,
+ int wire_outside_mat_type_in,
+ int is_nuca, // 0 - UCA, 1 - NUCA
+ int core_count,
+ int cache_level, // 0 - L2, 1 - L3
+ int nuca_bank_count,
+ int nuca_obj_func_delay,
+ int nuca_obj_func_dynamic_power,
+ int nuca_obj_func_leakage_power,
+ int nuca_obj_func_area,
+ int nuca_obj_func_cycle_time,
+ int nuca_dev_func_delay,
+ int nuca_dev_func_dynamic_power,
+ int nuca_dev_func_leakage_power,
+ int nuca_dev_func_area,
+ int nuca_dev_func_cycle_time,
+ int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
+ int p_input);
+
+class mem_array
+{
+ public:
+ int Ndcm;
+ int Ndwl;
+ int Ndbl;
+ double Nspd;
+ int deg_bl_muxing;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ double access_time;
+ double cycle_time;
+ double multisubbank_interleave_cycle_time;
+ double area_ram_cells;
+ double area;
+ powerDef power;
+ double delay_senseamp_mux_decoder;
+ double delay_before_subarray_output_driver;
+ double delay_from_subarray_output_driver_to_output;
+ double height;
+ double width;
+
+ double mat_height;
+ double mat_length;
+ double subarray_length;
+ double subarray_height;
+
+ double delay_route_to_bank,
+ delay_input_htree,
+ delay_row_predecode_driver_and_block,
+ delay_row_decoder,
+ delay_bitlines,
+ delay_sense_amp,
+ delay_subarray_output_driver,
+ delay_dout_htree,
+ delay_comparator,
+ delay_matchlines;
+
+ double all_banks_height,
+ all_banks_width,
+ area_efficiency;
+
+ powerDef power_routing_to_bank;
+ powerDef power_addr_input_htree;
+ powerDef power_data_input_htree;
+ powerDef power_data_output_htree;
+ powerDef power_htree_in_search;
+ powerDef power_htree_out_search;
+ powerDef power_row_predecoder_drivers;
+ powerDef power_row_predecoder_blocks;
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_predecoder_drivers;
+ powerDef power_bit_mux_predecoder_blocks;
+ powerDef power_bit_mux_decoders;
+ powerDef power_senseamp_mux_lev_1_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_1_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_1_decoders;
+ powerDef power_senseamp_mux_lev_2_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_2_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_2_decoders;
+ powerDef power_bitlines;
+ powerDef power_sense_amps;
+ powerDef power_prechg_eq_drivers;
+ powerDef power_output_drivers_at_subarray;
+ powerDef power_dataout_vertical_htree;
+ powerDef power_comparators;
+
+ powerDef power_cam_bitline_precharge_eq_drv;
+ powerDef power_searchline;
+ powerDef power_searchline_precharge;
+ powerDef power_matchlines;
+ powerDef power_matchline_precharge;
+ powerDef power_matchline_to_wordline_drv;
+
+ min_values_t *arr_min;
+ enum Wire_type wt;
+
+ // dram stats
+ double activate_energy, read_energy, write_energy, precharge_energy,
+ refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
+ leak_power_request_and_reply_networks;
+
+ double precharge_delay;
+
+ static bool lt(const mem_array * m1, const mem_array * m2);
+};
+
+
+#endif
diff --git a/ext/mcpat/cacti/component.cc b/ext/mcpat/cacti/component.cc
new file mode 100644
index 000000000..733108407
--- /dev/null
+++ b/ext/mcpat/cacti/component.cc
@@ -0,0 +1,236 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+#include "bank.h"
+#include "component.h"
+#include "decoder.h"
+
+using namespace std;
+
+
+
+Component::Component()
+ :area(), power(), rt_power(),delay(0)
+{
+}
+
+
+
+Component::~Component()
+{
+}
+
+
+
+double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
+{
+ double w_poly = g_ip->F_sz_um;
+ double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
+ double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
+ num_stacked_in * w_poly +
+ (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
+
+ if (num_folded_tr > 1)
+ {
+ total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
+ (num_folded_tr - 1) * num_stacked_in * w_poly +
+ (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
+ }
+
+ return total_diff_w;
+}
+
+
+
+double Component::compute_gate_area(
+ int gate_type,
+ int num_inputs,
+ double w_pmos,
+ double w_nmos,
+ double h_gate)
+{
+ if (w_pmos <= 0.0 || w_nmos <= 0.0)
+ {
+ return 0.0;
+ }
+
+ double w_folded_pmos, w_folded_nmos;
+ int num_folded_pmos, num_folded_nmos;
+ double total_ndiff_w, total_pdiff_w;
+ Area gate;
+
+ double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
+ double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
+
+ if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
+ {
+ return 0.0;
+ }
+
+ w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
+ w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
+ assert(w_folded_pmos > 0);
+
+ num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
+ num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
+
+ switch (gate_type)
+ {
+ case INV:
+ total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
+ break;
+
+ case NOR:
+ total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
+ break;
+
+ case NAND:
+ total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
+ break;
+ default:
+ cout << "Unknown gate type: " << gate_type << endl;
+ exit(1);
+ }
+
+ gate.w = MAX(total_ndiff_w, total_pdiff_w);
+
+ if (w_folded_nmos > w_nmos)
+ {
+ //means that the height of the gate can
+ //be made smaller than the input height specified, so calculate the height of the gate.
+ gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
+ }
+ else
+ {
+ gate.h = h_gate;
+ }
+ return gate.get_area();
+}
+
+
+
+double Component::compute_tr_width_after_folding(
+ double input_width,
+ double threshold_folding_width)
+{//This is actually the width of the cell not the width of a device.
+//The width of a cell and the width of a device is orthogonal.
+ if (input_width <= 0)
+ {
+ return 0;
+ }
+
+ int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
+ double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
+ double width_poly = g_ip->F_sz_um;
+ double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
+
+ return total_diff_width;
+}
+
+
+
+double Component::height_sense_amplifier(double pitch_sense_amp)
+{
+ // compute the height occupied by all PMOS transistors
+ double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
+ compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
+ 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
+
+ // compute the height occupied by all NMOS transistors
+ double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
+ compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
+ 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
+
+ // compute total height by considering gap between the p and n diffusion areas
+ return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
+}
+
+
+
+int Component::logical_effort(
+ int num_gates_min,
+ double g,
+ double F,
+ double * w_n,
+ double * w_p,
+ double C_load,
+ double p_to_n_sz_ratio,
+ bool is_dram_,
+ bool is_wl_tr_,
+ double max_w_nmos)
+{
+ int num_gates = (int) (log(F) / log(fopt));
+
+ // check if num_gates is odd. if so, add 1 to make it even
+ num_gates+= (num_gates % 2) ? 1 : 0;
+ num_gates = MAX(num_gates, num_gates_min);
+
+ // recalculate the effective fanout of each stage
+ double f = pow(F, 1.0 / num_gates);
+ int i = num_gates - 1;
+ double C_in = C_load / f;
+ w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
+ w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+
+ if (w_n[i] > max_w_nmos)
+ {
+ double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
+ F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
+ num_gates = (int) (log(F) / log(fopt)) + 1;
+ num_gates+= (num_gates % 2) ? 1 : 0;
+ num_gates = MAX(num_gates, num_gates_min);
+ f = pow(F, 1.0 / (num_gates - 1));
+ i = num_gates - 1;
+ w_n[i] = max_w_nmos;
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+ }
+
+ for (i = num_gates - 2; i >= 1; i--)
+ {
+ w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+ }
+
+ assert(num_gates <= MAX_NUMBER_GATES_STAGE);
+ return num_gates;
+}
+
diff --git a/ext/mcpat/cacti/component.h b/ext/mcpat/cacti/component.h
new file mode 100644
index 000000000..75e2cb075
--- /dev/null
+++ b/ext/mcpat/cacti/component.h
@@ -0,0 +1,84 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __COMPONENT_H__
+#define __COMPONENT_H__
+
+#include "area.h"
+#include "parameter.h"
+
+using namespace std;
+
+class Crossbar;
+class Bank;
+
+class Component
+{
+ public:
+ Component();
+ ~Component();
+
+ Area area;
+ powerDef power,rt_power;
+ double delay;
+ double cycle_time;
+
+ double compute_gate_area(
+ int gate_type,
+ int num_inputs,
+ double w_pmos,
+ double w_nmos,
+ double h_gate);
+
+ double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
+ double height_sense_amplifier(double pitch_sense_amp);
+
+ protected:
+ int logical_effort(
+ int num_gates_min,
+ double g,
+ double F,
+ double * w_n,
+ double * w_p,
+ double C_load,
+ double p_to_n_sz_ratio,
+ bool is_dram_,
+ bool is_wl_tr_,
+ double max_w_nmos);
+
+ private:
+ double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
+};
+
+#endif
+
diff --git a/ext/mcpat/cacti/const.h b/ext/mcpat/cacti/const.h
new file mode 100644
index 000000000..aef7d019b
--- /dev/null
+++ b/ext/mcpat/cacti/const.h
@@ -0,0 +1,270 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#ifndef __CONST_H__
+#define __CONST_H__
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* The following are things you might want to change
+ * when compiling
+ */
+
+/*
+ * Address bits in a word, and number of output bits from the cache
+ */
+
+/*
+was: #define ADDRESS_BITS 32
+now: I'm using 42 bits as in the Power4,
+since that's bigger then the 36 bits on the Pentium 4
+and 40 bits on the Opteron
+*/
+const int ADDRESS_BITS = 42;
+
+/*dt: In addition to the tag bits, the tags also include 1 valid bit, 1 dirty bit, 2 bits for a 4-state
+ cache coherency protocoll (MESI), 1 bit for MRU (change this to log(ways) for full LRU).
+ So in total we have 1 + 1 + 2 + 1 = 5 */
+const int EXTRA_TAG_BITS = 5;
+
+/* limits on the various N parameters */
+
+const unsigned int MAXDATAN = 512; // maximum for Ndwl and Ndbl
+const unsigned int MAXSUBARRAYS = 1048576; // maximum subarrays for data and tag arrays
+const unsigned int MAXDATASPD = 256; // maximum for Nspd
+const unsigned int MAX_COL_MUX = 256;
+
+
+
+#define ROUTER_TYPES 3
+#define WIRE_TYPES 6
+
+const double Cpolywire = 0;
+
+
+/* Threshold voltages (as a proportion of Vdd)
+ If you don't know them, set all values to 0.5 */
+#define VTHFA1 0.452
+#define VTHFA2 0.304
+#define VTHFA3 0.420
+#define VTHFA4 0.413
+#define VTHFA5 0.405
+#define VTHFA6 0.452
+#define VSINV 0.452
+#define VTHCOMPINV 0.437
+#define VTHMUXNAND 0.548 // TODO : this constant must be revisited
+#define VTHEVALINV 0.452
+#define VTHSENSEEXTDRV 0.438
+
+
+//WmuxdrvNANDn and WmuxdrvNANDp are no longer being used but it's part of the old
+//delay_comparator function which we are using exactly as it used to be, so just setting these to 0
+const double WmuxdrvNANDn = 0;
+const double WmuxdrvNANDp = 0;
+
+
+/*===================================================================*/
+/*
+ * The following are things you probably wouldn't want to change.
+ */
+
+#define BIGNUM 1e30
+#define INF 9999999
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#define MIN(a,b) (((a)<(b))?(a):(b))
+
+/* Used to communicate with the horowitz model */
+#define RISE 1
+#define FALL 0
+#define NCH 1
+#define PCH 0
+
+
+#define EPSILON 0.5 //v4.1: This constant is being used in order to fix floating point -> integer
+//conversion problems that were occuring within CACTI. Typical problem that was occuring was
+//that with different compilers a floating point number like 3.0 would get represented as either
+//2.9999....or 3.00000001 and then the integer part of the floating point number (3.0) would
+//be computed differently depending on the compiler. What we are doing now is to replace
+//int (x) with (int) (x+EPSILON) where EPSILON is 0.5. This would fix such problems. Note that
+//this works only when x is an integer >= 0.
+/*
+ * Sheng thinks this is more a solution to solve the simple truncate problem
+ * (http://www.cs.tut.fi/~jkorpela/round.html) rather than the problem mentioned above.
+ * Unfortunately, this solution causes nasty bugs (different results when using O0 and O3).
+ * Moreover, round is not correct in CACTI since when an extra fraction of bit/line is needed,
+ * we need to provide a complete bit/line even the fraction is just 0.01.
+ * So, in later version than 6.5 we use (int)ceil() to get double to int conversion.
+ */
+
+#define EPSILON2 0.1
+#define EPSILON3 0.6
+
+
+#define MINSUBARRAYROWS 16 //For simplicity in modeling, for the row decoding structure, we assume
+//that each row predecode block is composed of at least one 2-4 decoder. When the outputs from the
+//row predecode blocks are combined this means that there are at least 4*4=16 row decode outputs
+#define MAXSUBARRAYROWS 262144 //Each row predecode block produces a max of 2^9 outputs. So
+//the maximum number of row decode outputs will be 2^9*2^9
+#define MINSUBARRAYCOLS 2
+#define MAXSUBARRAYCOLS 262144
+
+
+#define INV 0
+#define NOR 1
+#define NAND 2
+
+
+#define NUMBER_TECH_FLAVORS 4
+
+#define NUMBER_INTERCONNECT_PROJECTION_TYPES 2 //aggressive and conservative
+//0 = Aggressive projections, 1 = Conservative projections
+#define NUMBER_WIRE_TYPES 4 //local, semi-global and global
+//1 = 'Semi-global' wire type, 2 = 'Global' wire type
+
+
+const int dram_cell_tech_flavor = 3;
+
+
+#define VBITSENSEMIN 0.08 //minimum bitline sense voltage is fixed to be 80 mV.
+
+#define fopt 4.0
+
+#define INPUT_WIRE_TO_INPUT_GATE_CAP_RATIO 0
+#define BUFFER_SEPARATION_LENGTH_MULTIPLIER 1
+#define NUMBER_MATS_PER_REDUNDANT_MAT 8
+
+#define NUMBER_STACKED_DIE_LAYERS 1
+
+// this variable can be set to carry out solution optimization for
+// a maximum area allocation.
+#define STACKED_DIE_LAYER_ALLOTED_AREA_mm2 0 //6.24 //6.21//71.5
+
+// this variable can also be employed when solution optimization
+// with maximum area allocation is carried out.
+#define MAX_PERCENT_AWAY_FROM_ALLOTED_AREA 50
+
+// this variable can also be employed when solution optimization
+// with maximum area allocation is carried out.
+#define MIN_AREA_EFFICIENCY 20
+
+// this variable can be employed when solution with a desired
+// aspect ratio is required.
+#define STACKED_DIE_LAYER_ASPECT_RATIO 1
+
+// this variable can be employed when solution with a desired
+// aspect ratio is required.
+#define MAX_PERCENT_AWAY_FROM_ASPECT_RATIO 101
+
+// this variable can be employed to carry out solution optimization
+// for a certain target random cycle time.
+#define TARGET_CYCLE_TIME_ns 1000000000
+
+#define NUMBER_PIPELINE_STAGES 4
+
+// this can be used to model the length of interconnect
+// between a bank and a crossbar
+#define LENGTH_INTERCONNECT_FROM_BANK_TO_CROSSBAR 0 //3791 // 2880//micron
+
+#define IS_CROSSBAR 0
+#define NUMBER_INPUT_PORTS_CROSSBAR 8
+#define NUMBER_OUTPUT_PORTS_CROSSBAR 8
+#define NUMBER_SIGNALS_PER_PORT_CROSSBAR 256
+
+
+#define MAT_LEAKAGE_REDUCTION_DUE_TO_SLEEP_TRANSISTORS_FACTOR 1
+#define LEAKAGE_REDUCTION_DUE_TO_LONG_CHANNEL_HP_TRANSISTORS_FACTOR 1
+
+#define PAGE_MODE 0
+
+#define MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA 60
+// We are actually not using this variable in the CACTI code. We just want to acknowledge that
+// this current should be multiplied by the DDR(n) system VDD value to compute the standby power
+// consumed during precharge.
+
+
+const double VDD_STORAGE_LOSS_FRACTION_WORST = 0.125;
+const double CU_RESISTIVITY = 0.022; //ohm-micron
+const double BULK_CU_RESISTIVITY = 0.018; //ohm-micron
+const double PERMITTIVITY_FREE_SPACE = 8.854e-18; //F/micron
+
+const static uint32_t sram_num_cells_wl_stitching_ = 16;
+const static uint32_t dram_num_cells_wl_stitching_ = 64;
+const static uint32_t comm_dram_num_cells_wl_stitching_ = 256;
+const static double num_bits_per_ecc_b_ = 8.0;
+
+const double bit_to_byte = 8.0;
+
+#define MAX_NUMBER_GATES_STAGE 20
+#define MAX_NUMBER_HTREE_NODES 20
+#define NAND2_LEAK_STACK_FACTOR 0.2
+#define NAND3_LEAK_STACK_FACTOR 0.2
+#define NOR2_LEAK_STACK_FACTOR 0.2
+#define INV_LEAK_STACK_FACTOR 0.5
+#define MAX_NUMBER_ARRAY_PARTITIONS 1000000
+
+// abbreviations used in this project
+// ----------------------------------
+//
+// num : number
+// rw : read/write
+// rd : read
+// wr : write
+// se : single-ended
+// sz : size
+// F : feature
+// w : width
+// h : height or horizontal
+// v : vertical or velocity
+
+
+enum ram_cell_tech_type_num
+{
+ itrs_hp = 0,
+ itrs_lstp = 1,
+ itrs_lop = 2,
+ lp_dram = 3,
+ comm_dram = 4
+};
+
+const double pppm[4] = {1,1,1,1};
+const double pppm_lkg[4] = {0,1,1,0};
+const double pppm_dyn[4] = {1,0,0,0};
+const double pppm_Isub[4] = {0,1,0,0};
+const double pppm_Ig[4] = {0,0,1,0};
+const double pppm_sc[4] = {0,0,0,1};
+
+
+
+#endif
diff --git a/ext/mcpat/cacti/contention.dat b/ext/mcpat/cacti/contention.dat
new file mode 100755
index 000000000..826553e7e
--- /dev/null
+++ b/ext/mcpat/cacti/contention.dat
@@ -0,0 +1,126 @@
+l34c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l34c64l2b: 9 11 19 29 43 62 81 102
+l34c64l4b: 6 8 12 17 24 29 39 47
+l34c64l8b: 7 8 10 14 18 22 25 30
+l34c64l16b: 7 7 9 12 14 17 20 24
+l34c64l32b: 7 7 9 12 14 17 20 24 -r
+l34c64l64b: 7 7 9 12 14 17 20 24 -r
+l34c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l34c128l2b: 4 10 19 30 44 64 82 103
+l34c128l4b: 3 6 11 17 24 31 38 47
+l34c128l8b: 3 5 9 13 17 21 25 29
+l34c128l16b: 4 5 7 10 13 16 19 22
+l34c128l32b: 4 5 7 10 13 16 19 22 -r
+l34c128l64b: 4 5 7 10 13 16 19 22 -r
+l34c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l34c256l2b: 3 10 19 30 44 63 82 103
+l34c256l4b: 3 6 11 17 24 31 38 47
+l34c256l8b: 2 5 8 12 16 20 24 29
+l34c256l16b: 2 4 7 9 12 15 18 21
+l34c256l32b: 2 4 7 9 12 15 18 21 -r
+l34c256l64b: 2 4 7 9 12 15 18 21 -r
+l38c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l38c64l2b: 57 59 77 90 137 187 219 245
+l38c64l4b: 35 40 48 56 43 61 80 101
+l38c64l8b: 18 27 41 45 52 58 58 58 -r
+l38c64l16b: 16 17 19 35 40 49 53 53 -r
+l38c64l32b: 15 15 17 19 22 25 30 30 -r
+l38c64l64b: 15 15 17 19 22 25 30 30 -r
+l38c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l38c128l2b: 38 50 78 93 139 188 220 245
+l38c128l4b: 29 37 46 56 43 61 81 102
+l38c128l8b: 16 30 39 44 50 57 57 57 -r
+l38c128l16b: 14 16 19 33 40 47 52 52 -r
+l38c128l32b: 14 15 17 20 23 27 31 31 -r
+l38c128l64b: 14 15 17 20 23 27 31 31 -r
+l38c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l38c256l2b: 35 50 78 94 139 188 220 246
+l38c256l4b: 28 36 45 55 55 61 81 102
+l38c256l8b: 17 30 38 43 50 57 57 57 -r
+l38c256l16b: 15 17 21 32 40 47 51 51
+l38c256l32b: 15 17 19 21 24 29 33 33
+l38c256l64b: 15 17 19 21 24 29 33 33 -r
+l316c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l316c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l316c64l4b: 34 35 78 126 178 220 252 274
+l316c64l8b: 9 11 23 43 62 87 105 130
+l316c64l16b: 7 9 13 23 33 45 56 67
+l316c64l32b: 5 6 7 10 13 19 25 30
+l316c64l64b: 4 5 6 8 10 14 18 21
+l316c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l316c128l2b: 25 131 243 1000 1000 1000 1000 1000
+l316c128l4b: 8 28 79 127 179 221 253 274
+l316c128l8b: 4 9 22 43 62 88 106 131
+l316c128l16b: 4 6 11 21 32 44 55 67
+l316c128l32b: 4 6 11 12 12 18 24 29
+l316c128l64b: 2 3 5 7 9 13 17 21
+l316c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l316c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l316c256l4b: 5 28 80 128 180 221 253 274
+l316c256l8b: 3 8 22 43 63 88 107 131
+l316c256l16b: 2 5 11 21 32 44 55 67
+l316c256l32b: 2 3 5 8 12 18 24 29
+l316c256l64b: 2 3 4 6 9 13 17 21
+l24c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l24c64l2b: 10 12 24 41 60 86 105 122
+l24c64l4b: 5 7 13 20 29 38 47 56
+l24c64l8b: 5 6 9 14 18 24 29 35
+l24c64l16b: 4 5 7 10 12 16 19 22
+l24c64l32b: 5 5 6 8 10 12 14 17
+l24c64l64b: 5 5 6 8 10 12 14 16
+l24c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l24c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l24c128l4b: 3 7 13 20 29 38 47 57
+l24c128l8b: 3 5 9 13 18 23 29 35
+l24c128l16b: 3 4 6 9 12 15 19 22
+l24c128l32b: 3 4 5 7 9 11 14 16
+l24c128l64b: 1000 1000 1000 1000 1000 1000 1000 1000
+l24c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l24c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l24c256l4b: 2 6 13 20 29 38 47 57
+l24c256l8b: 2 4 8 13 18 23 28 35
+l24c256l16b: 2 3 6 8 11 15 18 22
+l24c256l32b: 2 3 5 6 8 11 14 16
+l24c256l64b: 1000 1000 1000 1000 1000 1000 1000 1000
+l28c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l28c64l2b: 46 52 117 157 188 225 246 261
+l28c64l4b: 19 25 39 54 96 107 120 150
+l28c64l8b: 9 12 21 30 39 47 58 79
+l28c64l16b: 8 9 11 16 25 32 37 42
+l28c64l32b: 7 8 9 11 14 19 23 28
+l28c64l64b: 7 7 8 10 12 14 18 22
+l28c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l28c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l28c128l4b: 12 22 39 54 98 108 130 151
+l28c128l8b: 7 12 21 30 39 48 59 80
+l28c128l16b: 6 8 11 16 24 31 37 42
+l28c128l32b: 6 7 9 11 14 19 24 28
+l28c128l64b: 6 7 9 11 14 19 24 28
+l28c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l28c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l28c256l4b: 12 22 39 54 100 108 130 152
+l28c256l8b: 7 12 21 30 39 48 59 81
+l28c256l16b: 6 8 11 16 24 31 37 42
+l28c256l32b: 6 7 9 11 14 19 24 28
+l28c256l64b: 6 7 9 11 14 19 24 28
+l216c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l216c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l216c64l4b: 34 35 78 126 178 220 252 274
+l216c64l8b: 9 11 23 43 62 87 105 130
+l216c64l16b: 7 9 13 23 33 45 56 67
+l216c64l32b: 5 6 7 10 13 19 25 30
+l216c64l64b: 4 5 6 8 10 14 18 21
+l216c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l216c128l2b: 25 131 243 1000 1000 1000 1000 1000
+l216c128l4b: 8 28 79 127 179 221 253 274
+l216c128l8b: 4 9 22 43 62 88 106 131
+l216c128l16b: 4 6 11 21 32 44 55 67
+l216c128l32b: 4 6 11 12 12 18 24 29
+l216c128l64b: 2 3 5 7 9 13 17 21
+l216c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
+l216c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
+l216c256l4b: 5 28 80 128 180 221 253 274
+l216c256l8b: 3 8 22 43 63 88 107 131
+l216c256l16b: 2 5 11 21 32 44 55 67
+l216c256l32b: 2 3 5 8 12 18 24 29
+l216c256l64b: 2 3 4 6 9 13 17 21
diff --git a/ext/mcpat/cacti/crossbar.cc b/ext/mcpat/cacti/crossbar.cc
new file mode 100644
index 000000000..a3d8532d5
--- /dev/null
+++ b/ext/mcpat/cacti/crossbar.cc
@@ -0,0 +1,161 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include "crossbar.h"
+
+#define ASPECT_THRESHOLD .8
+#define ADJ 1
+
+Crossbar::Crossbar(
+ double n_inp_,
+ double n_out_,
+ double flit_size_,
+ TechnologyParameter::DeviceType *dt
+ ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
+{
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ CB_ADJ = 1;
+}
+
+Crossbar::~Crossbar(){}
+
+double Crossbar::output_buffer()
+{
+
+ //Wire winit(4, 4);
+ double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
+ Wire w1(g_ip->wt, l_eff);
+ //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
+ double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
+ TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
+ TriS2 = s1; //driver transistor
+
+ if (TriS1 < 1)
+ TriS1 = 1;
+
+ double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
+ gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
+// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
+// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
+// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
+// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+// gate_C(TriS2*min_w_pmos, 0);
+ tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
+ gate_C(TriS2*g_tp.min_w_nmos_, 0)+
+ drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
+ drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(TriS2*min_w_pmos, 0);
+ double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
+ double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
+
+ tri_inp_cap = input_cap;
+ tri_out_cap = output_cap;
+ tri_ctr_cap = ctr_cap;
+ return input_cap + output_cap + ctr_cap;
+}
+
+void Crossbar::compute_power()
+{
+
+ Wire winit(4, 4);
+ double tri_cap = output_buffer();
+ assert(tri_cap > 0);
+ //area of a tristate logic
+ double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
+ g_area *= 2; // to model area of output transistors
+ g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
+ g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
+ double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
+ // effective no. of tristate buffers that need to be laid side by side
+ int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
+ double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
+ Wire w1(g_ip->wt, wire_len);
+
+ area.w = wire_len;
+ area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
+ Wire w2(g_ip->wt, area.h);
+
+ double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
+ if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
+
+ if (aspect_ratio_cb < ASPECT_THRESHOLD) {
+ if (n_out > 2 && n_inp > 2) {
+ CB_ADJ+=0.2;
+ //cout << "CB ADJ " << CB_ADJ << endl;
+ if (CB_ADJ < 4) {
+ this->compute_power();
+ }
+ }
+ }
+
+
+
+ power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
+ power.readOp.leakage = n_inp * n_out * flit_size * (
+ cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
+ cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
+ cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
+ w1.power.readOp.leakage + w2.power.readOp.leakage);
+ power.readOp.gate_leakage = n_inp * n_out * flit_size * (
+ cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
+ cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
+ cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
+ w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
+
+ // delay calculation
+ double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
+ Wire wdriver(g_ip->wt, l_eff);
+ double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
+ double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
+ delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
+
+ Wire wreset();
+}
+
+void Crossbar::print_crossbar()
+{
+ cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Width : " << area.w << " u" << endl;
+ cout << "Height : " << area.h << " u" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
+ cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
+ cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
+}
+
+
diff --git a/ext/mcpat/cacti/crossbar.h b/ext/mcpat/cacti/crossbar.h
new file mode 100644
index 000000000..3b926517c
--- /dev/null
+++ b/ext/mcpat/cacti/crossbar.h
@@ -0,0 +1,85 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __CROSSBAR__
+#define __CROSSBAR__
+
+#include <assert.h>
+
+#include <iostream>
+
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "mat.h"
+#include "parameter.h"
+#include "wire.h"
+
+class Crossbar : public Component
+{
+ public:
+ Crossbar(
+ double in,
+ double out,
+ double flit_sz,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ ~Crossbar();
+
+ void print_crossbar();
+ double output_buffer();
+ void compute_power();
+
+ double n_inp, n_out;
+ double flit_size;
+ double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
+
+ private:
+ double CB_ADJ;
+ /*
+ * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
+ * buffer is adjusted to get an aspect ratio of whole cross bar close to one;
+ * when adjust the ratio, the number of wires route over the tri-state buffers does not change,
+ * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
+ * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
+ * will increase. As a result, the height of the crossbar (area.h) will increase.
+ */
+
+ TechnologyParameter::DeviceType *deviceType;
+ double TriS1, TriS2;
+ double min_w_pmos, Vdd;
+
+};
+
+
+
+
+#endif
diff --git a/ext/mcpat/cacti/decoder.cc b/ext/mcpat/cacti/decoder.cc
new file mode 100644
index 000000000..0de6f6157
--- /dev/null
+++ b/ext/mcpat/cacti/decoder.cc
@@ -0,0 +1,1577 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+#include "area.h"
+#include "decoder.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+Decoder::Decoder(
+ int _num_dec_signals,
+ bool flag_way_select,
+ double _C_ld_dec_out,
+ double _R_wire_dec_out,
+ bool fully_assoc_,
+ bool is_dram_,
+ bool is_wl_tr_,
+ const Area & cell_)
+:exist(false),
+ C_ld_dec_out(_C_ld_dec_out),
+ R_wire_dec_out(_R_wire_dec_out),
+ num_gates(0), num_gates_min(2),
+ delay(0),
+ //power(),
+ fully_assoc(fully_assoc_), is_dram(is_dram_),
+ is_wl_tr(is_wl_tr_), cell(cell_)
+{
+
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
+ {
+ w_dec_n[i] = 0;
+ w_dec_p[i] = 0;
+ }
+
+ /*
+ * _num_dec_signals is the number of decoded signal as output
+ * num_addr_bits_dec is the number of signal to be decoded
+ * as the decoders input.
+ */
+ int num_addr_bits_dec = _log2(_num_dec_signals);
+
+ if (num_addr_bits_dec < 4)
+ {
+ if (flag_way_select)
+ {
+ exist = true;
+ num_in_signals = 2;
+ }
+ else
+ {
+ num_in_signals = 0;
+ }
+ }
+ else
+ {
+ exist = true;
+
+ if (flag_way_select)
+ {
+ num_in_signals = 3;
+ }
+ else
+ {
+ num_in_signals = 2;
+ }
+ }
+
+ assert(cell.h>0);
+ assert(cell.w>0);
+ // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
+ //area.h = 4 * cell.h;
+ area.h = g_tp.h_dec * cell.h;
+
+ compute_widths();
+ compute_area();
+}
+
+
+
+void Decoder::compute_widths()
+{
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+
+ if (exist)
+ {
+ if (num_in_signals == 2 || fully_assoc)
+ {
+ w_dec_n[0] = 2 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2;
+ }
+ else
+ {
+ w_dec_n[0] = 3 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+
+ F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
+ gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
+ num_gates = logical_effort(
+ num_gates_min,
+ num_in_signals == 2 ? gnand2 : gnand3,
+ F,
+ w_dec_n,
+ w_dec_p,
+ C_ld_dec_out,
+ p_to_n_sz_ratio,
+ is_dram,
+ is_wl_tr,
+ g_tp.max_w_nmos_dec);
+ }
+}
+
+
+
+void Decoder::compute_area()
+{
+ double cumulative_area = 0;
+ double cumulative_curr = 0; // cumulative leakage current
+ double cumulative_curr_Ig = 0; // cumulative leakage current
+
+ if (exist)
+ { // First check if this decoder exists
+ if (num_in_signals == 2)
+ {
+ cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
+ cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
+ }
+ else if (num_in_signals == 3)
+ {
+ cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
+ cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
+ }
+
+ for (int i = 1; i < num_gates; i++)
+ {
+ cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
+ cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ }
+ power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
+
+ area.w = (cumulative_area / area.h);
+ }
+}
+
+
+
+double Decoder::compute_delays(double inrisetime)
+{
+ if (exist)
+ {
+ double ret_val = 0; // outrisetime
+ int i;
+ double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
+ double Vdd = g_tp.peri_global.Vdd;
+
+ if ((is_wl_tr) && (is_dram))
+ {
+ Vpp = g_tp.vpp;
+ }
+ else if (is_wl_tr)
+ {
+ Vpp = g_tp.sram_cell.Vdd;
+ }
+ else
+ {
+ Vpp = g_tp.peri_global.Vdd;
+ }
+
+ // first check whether a decoder is required at all
+ rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
+ drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ for (i = 1; i < num_gates - 1; ++i)
+ {
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ }
+
+ // add delay of final inverter that drives the wordline
+ i = num_gates - 1;
+ c_load = C_ld_dec_out;
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ ret_val = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
+
+ return ret_val;
+ }
+ else
+ {
+ return 0.0;
+ }
+}
+
+void Decoder::leakage_feedback(double temperature)
+{
+ double cumulative_curr = 0; // cumulative leakage current
+ double cumulative_curr_Ig = 0; // cumulative leakage current
+
+ if (exist)
+ { // First check if this decoder exists
+ if (num_in_signals == 2)
+ {
+ cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
+ cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
+ }
+ else if (num_in_signals == 3)
+ {
+ cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
+ cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
+ }
+
+ for (int i = 1; i < num_gates; i++)
+ {
+ cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ }
+
+ power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
+ }
+}
+
+PredecBlk::PredecBlk(
+ int num_dec_signals,
+ Decoder * dec_,
+ double C_wire_predec_blk_out,
+ double R_wire_predec_blk_out_,
+ int num_dec_per_predec,
+ bool is_dram,
+ bool is_blk1)
+ :dec(dec_),
+ exist(false),
+ number_input_addr_bits(0),
+ C_ld_predec_blk_out(0),
+ R_wire_predec_blk_out(0),
+ branch_effort_nand2_gate_output(1),
+ branch_effort_nand3_gate_output(1),
+ flag_two_unique_paths(false),
+ flag_L2_gate(0),
+ number_inputs_L1_gate(0),
+ number_gates_L1_nand2_path(0),
+ number_gates_L1_nand3_path(0),
+ number_gates_L2(0),
+ min_number_gates_L1(2),
+ min_number_gates_L2(2),
+ num_L1_active_nand2_path(0),
+ num_L1_active_nand3_path(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ power_L2(),
+ is_dram_(is_dram)
+{
+ int branch_effort_predec_out;
+ double C_ld_dec_gate;
+ int num_addr_bits_dec = _log2(num_dec_signals);
+ int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
+ int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
+
+ w_L1_nand2_n[0] = 0;
+ w_L1_nand2_p[0] = 0;
+ w_L1_nand3_n[0] = 0;
+ w_L1_nand3_p[0] = 0;
+
+ if (is_blk1 == true)
+ {
+ if (num_addr_bits_dec <= 0)
+ {
+ return;
+ }
+ else if (num_addr_bits_dec < 4)
+ {
+ // Just one predecoder block is required with NAND2 gates. No decoder required.
+ // The first level of predecoding directly drives the decoder output load
+ exist = true;
+ number_input_addr_bits = num_addr_bits_dec;
+ R_wire_predec_blk_out = dec->R_wire_dec_out;
+ C_ld_predec_blk_out = dec->C_ld_dec_out;
+ }
+ else
+ {
+ exist = true;
+ number_input_addr_bits = blk1_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
+ }
+ else
+ {
+ if (num_addr_bits_dec >= 4)
+ {
+ exist = true;
+ number_input_addr_bits = blk2_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
+ }
+
+ compute_widths();
+ compute_area();
+}
+
+
+
+void PredecBlk::compute_widths()
+{
+ double F, c_load_nand3_path, c_load_nand2_path;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+
+ if (exist == false) return;
+
+
+ switch (number_input_addr_bits)
+ {
+ case 1:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
+ break;
+ case 2:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
+ break;
+ case 3:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 0;
+ break;
+ case 4:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 4;
+ break;
+ case 5:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 8;
+ branch_effort_nand3_gate_output = 4;
+ break;
+ case 6:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 2;
+ branch_effort_nand3_gate_output = 8;
+ break;
+ case 7:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 32;
+ branch_effort_nand3_gate_output = 16;
+ break;
+ case 8:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 64;
+ branch_effort_nand3_gate_output = 32;
+ break;
+ case 9:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 3;
+ branch_effort_nand3_gate_output = 64;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ // find the number of gates and sizing in second level of predecoder (if there is a second level)
+ if (flag_L2_gate)
+ {
+ if (flag_L2_gate == 2)
+ { // 2nd level is a NAND2 gate
+ w_L2_n[0] = 2 * g_tp.min_w_nmos_;
+ F = gnand2;
+ }
+ else
+ { // 2nd level is a NAND3 gate
+ w_L2_n[0] = 3 * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+ w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
+ number_gates_L2 = logical_effort(
+ min_number_gates_L2,
+ flag_L2_gate == 2 ? gnand2 : gnand3,
+ F,
+ w_L2_n,
+ w_L2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+
+ // Now find the number of gates and widths in first level of predecoder
+ if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
+ { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
+ // a NAND2 gate is used in the first level of the predecoder
+ c_load_nand2_path = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * c_load_nand2_path /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ c_load_nand2_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+
+ //Now find widths of gates along path in which first gate is a NAND3
+ if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
+ { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
+ // a NAND3 gate is used in the first level of the predecoder
+ c_load_nand3_path = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * c_load_nand3_path /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ c_load_nand3_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+ }
+ else
+ { // find number of gates and widths in first level of predecoder block when there is no second level
+ if (number_inputs_L1_gate == 2)
+ {
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2*C_ld_predec_blk_out /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+ else if (number_inputs_L1_gate == 3)
+ {
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3*C_ld_predec_blk_out /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+ }
+}
+
+
+
+void PredecBlk::compute_area()
+{
+ if (exist)
+ { // First check whether a predecoder block is needed
+ int num_L1_nand2 = 0;
+ int num_L1_nand3 = 0;
+ int num_L2 = 0;
+ double tot_area_L1_nand3 =0;
+ double leak_L1_nand3 =0;
+ double gate_leak_L1_nand3 =0;
+
+ double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
+ double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ if (number_inputs_L1_gate != 3) {
+ tot_area_L1_nand3 = 0;
+ leak_L1_nand3 = 0;
+ gate_leak_L1_nand3 =0;
+ }
+ else {
+ tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
+ leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ }
+
+ switch (number_input_addr_bits)
+ {
+ case 1: //2 NAND2 gates
+ num_L1_nand2 = 2;
+ num_L2 = 0;
+ num_L1_active_nand2_path =1;
+ num_L1_active_nand3_path =0;
+ break;
+ case 2: //4 NAND2 gates
+ num_L1_nand2 = 4;
+ num_L2 = 0;
+ num_L1_active_nand2_path =1;
+ num_L1_active_nand3_path =0;
+ break;
+ case 3: //8 NAND3 gates
+ num_L1_nand3 = 8;
+ num_L2 = 0;
+ num_L1_active_nand2_path =0;
+ num_L1_active_nand3_path =1;
+ break;
+ case 4: //4 + 4 NAND2 gates
+ num_L1_nand2 = 8;
+ num_L2 = 16;
+ num_L1_active_nand2_path =2;
+ num_L1_active_nand3_path =0;
+ break;
+ case 5: //4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 8;
+ num_L2 = 32;
+ num_L1_active_nand2_path =1;
+ num_L1_active_nand3_path =1;
+ break;
+ case 6: //8 + 8 NAND3 gates
+ num_L1_nand3 = 16;
+ num_L2 = 64;
+ num_L1_active_nand2_path =0;
+ num_L1_active_nand3_path =2;
+ break;
+ case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 8;
+ num_L1_nand3 = 8;
+ num_L2 = 128;
+ num_L1_active_nand2_path =2;
+ num_L1_active_nand3_path =1;
+ break;
+ case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 16;
+ num_L2 = 256;
+ num_L1_active_nand2_path =2;
+ num_L1_active_nand3_path =2;
+ break;
+ case 9: //8 + 8 + 8 NAND3 gates
+ num_L1_nand3 = 24;
+ num_L2 = 512;
+ num_L1_active_nand2_path =0;
+ num_L1_active_nand3_path =3;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 1; i < number_gates_L1_nand2_path; ++i)
+ {
+ tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
+ leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ }
+ tot_area_L1_nand2 *= num_L1_nand2;
+ leak_L1_nand2 *= num_L1_nand2;
+ gate_leak_L1_nand2 *= num_L1_nand2;
+
+ for (int i = 1; i < number_gates_L1_nand3_path; ++i)
+ {
+ tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
+ leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ }
+ tot_area_L1_nand3 *= num_L1_nand3;
+ leak_L1_nand3 *= num_L1_nand3;
+ gate_leak_L1_nand3 *= num_L1_nand3;
+
+ double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
+ double cumulative_area_L2 = 0.0;
+ double leakage_L2 = 0.0;
+ double gate_leakage_L2 = 0.0;
+
+ if (flag_L2_gate == 2)
+ {
+ cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ }
+ else if (flag_L2_gate == 3)
+ {
+ cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ }
+
+ for (int i = 1; i < number_gates_L2; ++i)
+ {
+ cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
+ leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ }
+ cumulative_area_L2 *= num_L2;
+ leakage_L2 *= num_L2;
+ gate_leakage_L2 *= num_L2;
+
+ power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
+ area.set_area(cumulative_area_L1 + cumulative_area_L2);
+ power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
+ }
+}
+
+
+
+pair<double, double> PredecBlk::compute_delays(
+ pair<double, double> inrisetime) // <nand2, nand3>
+{
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
+
+ double inrisetime_nand2_path = inrisetime.first;
+ double inrisetime_nand3_path = inrisetime.second;
+ int i;
+ double rd, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
+
+ // TODO: following delay calculation part can be greatly simplified.
+ // first check whether a predecoder block is required
+ if (exist)
+ {
+ //Find delay in first level of predecoder block
+ //First find delay in path
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
+ {
+ //First gate is a NAND2 gate
+ rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
+ {
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand2_path - 1;
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate)
+ {
+ c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ else
+ { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
+ { //Check if the number of gates in the first level is more than 1.
+ //First gate is a NAND3 gate
+ rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
+ {
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand3_path - 1;
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate)
+ {
+ c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ else
+ { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ // Find delay through second level
+ if (flag_L2_gate)
+ {
+ if (flag_L2_gate == 2)
+ {
+ rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ else
+ { // flag_L2_gate = 3
+ rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ for (i = 1; i < number_gates_L2 - 1; ++i)
+ {
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of final inverter that drives the wordline decoders
+ i = number_gates_L2 - 1;
+ c_load = C_ld_predec_blk_out;
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
+ return ret_val;
+}
+
+void PredecBlk::leakage_feedback(double temperature)
+{
+ if (exist)
+ { // First check whether a predecoder block is needed
+ int num_L1_nand2 = 0;
+ int num_L1_nand3 = 0;
+ int num_L2 = 0;
+ double leak_L1_nand3 =0;
+ double gate_leak_L1_nand3 =0;
+
+ double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ if (number_inputs_L1_gate != 3) {
+ leak_L1_nand3 = 0;
+ gate_leak_L1_nand3 =0;
+ }
+ else {
+ leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ }
+
+ switch (number_input_addr_bits)
+ {
+ case 1: //2 NAND2 gates
+ num_L1_nand2 = 2;
+ num_L2 = 0;
+ num_L1_active_nand2_path =1;
+ num_L1_active_nand3_path =0;
+ break;
+ case 2: //4 NAND2 gates
+ num_L1_nand2 = 4;
+ num_L2 = 0;
+ num_L1_active_nand2_path =1;
+ num_L1_active_nand3_path =0;
+ break;
+ case 3: //8 NAND3 gates
+ num_L1_nand3 = 8;
+ num_L2 = 0;
+ num_L1_active_nand2_path =0;
+ num_L1_active_nand3_path =1;
+ break;
+ case 4: //4 + 4 NAND2 gates
+ num_L1_nand2 = 8;
+ num_L2 = 16;
+ num_L1_active_nand2_path =2;
+ num_L1_active_nand3_path =0;
+ break;
+ case 5: //4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 8;
+ num_L2 = 32;
+ num_L1_active_nand2_path =1;
+ num_L1_active_nand3_path =1;
+ break;
+ case 6: //8 + 8 NAND3 gates
+ num_L1_nand3 = 16;
+ num_L2 = 64;
+ num_L1_active_nand2_path =0;
+ num_L1_active_nand3_path =2;
+ break;
+ case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 8;
+ num_L1_nand3 = 8;
+ num_L2 = 128;
+ num_L1_active_nand2_path =2;
+ num_L1_active_nand3_path =1;
+ break;
+ case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 16;
+ num_L2 = 256;
+ num_L1_active_nand2_path =2;
+ num_L1_active_nand3_path =2;
+ break;
+ case 9: //8 + 8 + 8 NAND3 gates
+ num_L1_nand3 = 24;
+ num_L2 = 512;
+ num_L1_active_nand2_path =0;
+ num_L1_active_nand3_path =3;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 1; i < number_gates_L1_nand2_path; ++i)
+ {
+ leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ }
+ leak_L1_nand2 *= num_L1_nand2;
+ gate_leak_L1_nand2 *= num_L1_nand2;
+
+ for (int i = 1; i < number_gates_L1_nand3_path; ++i)
+ {
+ leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ }
+ leak_L1_nand3 *= num_L1_nand3;
+ gate_leak_L1_nand3 *= num_L1_nand3;
+
+ double leakage_L2 = 0.0;
+ double gate_leakage_L2 = 0.0;
+
+ if (flag_L2_gate == 2)
+ {
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ }
+ else if (flag_L2_gate == 3)
+ {
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ }
+
+ for (int i = 1; i < number_gates_L2; ++i)
+ {
+ leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ }
+ leakage_L2 *= num_L2;
+ gate_leakage_L2 *= num_L2;
+
+ power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
+
+ power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
+ }
+}
+
+PredecBlkDrv::PredecBlkDrv(
+ int way_select_,
+ PredecBlk * blk_,
+ bool is_dram)
+ :flag_driver_exists(0),
+ number_gates_nand2_path(0),
+ number_gates_nand3_path(0),
+ min_number_gates(2),
+ num_buffers_driving_1_nand2_load(0),
+ num_buffers_driving_2_nand2_load(0),
+ num_buffers_driving_4_nand2_load(0),
+ num_buffers_driving_2_nand3_load(0),
+ num_buffers_driving_8_nand3_load(0),
+ num_buffers_nand3_path(0),
+ c_load_nand2_path_out(0),
+ c_load_nand3_path_out(0),
+ r_load_nand2_path_out(0),
+ r_load_nand3_path_out(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ blk(blk_), dec(blk->dec),
+ is_dram_(is_dram),
+ way_select(way_select_)
+{
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
+ {
+ width_nand2_path_n[i] = 0;
+ width_nand2_path_p[i] = 0;
+ width_nand3_path_n[i] = 0;
+ width_nand3_path_p[i] = 0;
+ }
+
+ number_input_addr_bits = blk->number_input_addr_bits;
+
+ if (way_select > 1)
+ {
+ flag_driver_exists = 1;
+ number_input_addr_bits = way_select;
+ if (dec->num_in_signals == 2)
+ {
+ c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand2_load = number_input_addr_bits;
+ }
+ else if (dec->num_in_signals == 3)
+ {
+ c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ }
+ }
+ else if (way_select == 0)
+ {
+ if (blk->exist)
+ {
+ flag_driver_exists = 1;
+ }
+ }
+
+ compute_widths();
+ compute_area();
+}
+
+
+
+void PredecBlkDrv::compute_widths()
+{
+ // The predecode block driver accepts as input the address bits from the h-tree network. For
+ // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
+ // inversion to generate addrbar and simply treat addrbar as addr.
+
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+
+ if (flag_driver_exists)
+ {
+ double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
+ double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
+
+ if (way_select == 0)
+ {
+ if (blk->number_input_addr_bits == 1)
+ { //2 NAND2 gates
+ num_buffers_driving_2_nand2_load = 1;
+ c_load_nand2_path_out = 2 * C_nand2_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 2)
+ { //4 NAND2 gates one 2-4 decoder
+ num_buffers_driving_4_nand2_load = 2;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 3)
+ { //8 NAND3 gates one 3-8 decoder
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 4)
+ { //4 + 4 NAND2 gates two 2-4 decoder
+ num_buffers_driving_4_nand2_load = 4;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 5)
+ { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 6)
+ { //8 + 8 NAND3 gates two 3-8 decoder
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 7)
+ { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
+ num_buffers_driving_4_nand2_load = 4;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 8)
+ { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ else if (blk->number_input_addr_bits == 9)
+ { //8 + 8 + 8 NAND3 gates three 3-8 decoder
+ num_buffers_driving_8_nand3_load = 9;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 2) ||
+ (number_input_addr_bits == 0) ||
+ ((way_select)&&(dec->num_in_signals == 2)))
+ { //this means that way_select is driving NAND2 in decoder.
+ width_nand2_path_n[0] = g_tp.min_w_nmos_;
+ width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
+ F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
+ number_gates_nand2_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand2_path_n,
+ width_nand2_path_p,
+ c_load_nand2_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 3) ||
+ ((way_select)&&(dec->num_in_signals == 3)))
+ { //this means that way_select is driving NAND3 in decoder.
+ width_nand3_path_n[0] = g_tp.min_w_nmos_;
+ width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
+ F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
+ number_gates_nand3_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand3_path_n,
+ width_nand3_path_p,
+ c_load_nand3_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
+ }
+}
+
+
+
+void PredecBlkDrv::compute_area()
+{
+ double area_nand2_path = 0;
+ double area_nand3_path = 0;
+ double leak_nand2_path = 0;
+ double leak_nand3_path = 0;
+ double gate_leak_nand2_path = 0;
+ double gate_leak_nand3_path = 0;
+
+ if (flag_driver_exists)
+ { // first check whether a predecoder block driver is needed
+ for (int i = 0; i < number_gates_nand2_path; ++i)
+ {
+ area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
+ leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
+ gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
+ }
+ area_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+ leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+ gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+
+ for (int i = 0; i < number_gates_nand3_path; ++i)
+ {
+ area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
+ leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ }
+ area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+
+ power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
+ power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
+ area.set_area(area_nand2_path + area_nand3_path);
+ }
+}
+
+
+
+pair<double, double> PredecBlkDrv::compute_delays(
+ double inrisetime_nand2_path,
+ double inrisetime_nand3_path)
+{
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
+ int i;
+ double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
+
+ if (flag_driver_exists)
+ {
+ for (i = 0; i < number_gates_nand2_path - 1; ++i)
+ {
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand2_path != 0)
+ {
+ i = number_gates_nand2_path - 1;
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand2_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
+ }
+
+ for (i = 0; i < number_gates_nand3_path - 1; ++i)
+ {
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand3_path != 0)
+ {
+ i = number_gates_nand3_path - 1;
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand3_path_out;
+ tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ }
+ }
+ return ret_val;
+}
+
+
+double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
+{
+ return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
+ num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
+}
+
+
+
+Predec::Predec(
+ PredecBlkDrv * drv1_,
+ PredecBlkDrv * drv2_)
+:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
+{
+ driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
+ drv1->power_nand3_path.readOp.leakage +
+ drv2->power_nand2_path.readOp.leakage +
+ drv2->power_nand3_path.readOp.leakage;
+ block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
+ blk1->power_nand3_path.readOp.leakage +
+ blk1->power_L2.readOp.leakage +
+ blk2->power_nand2_path.readOp.leakage +
+ blk2->power_nand3_path.readOp.leakage +
+ blk2->power_L2.readOp.leakage;
+ power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
+
+ driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
+ drv1->power_nand3_path.readOp.gate_leakage +
+ drv2->power_nand2_path.readOp.gate_leakage +
+ drv2->power_nand3_path.readOp.gate_leakage;
+ block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
+ blk1->power_nand3_path.readOp.gate_leakage +
+ blk1->power_L2.readOp.gate_leakage +
+ blk2->power_nand2_path.readOp.gate_leakage +
+ blk2->power_nand3_path.readOp.gate_leakage +
+ blk2->power_L2.readOp.gate_leakage;
+ power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
+}
+
+void PredecBlkDrv::leakage_feedback(double temperature)
+{
+ double leak_nand2_path = 0;
+ double leak_nand3_path = 0;
+ double gate_leak_nand2_path = 0;
+ double gate_leak_nand3_path = 0;
+
+ if (flag_driver_exists)
+ { // first check whether a predecoder block driver is needed
+ for (int i = 0; i < number_gates_nand2_path; ++i)
+ {
+ leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
+ gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
+ }
+ leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+ gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+
+ for (int i = 0; i < number_gates_nand3_path; ++i)
+ {
+ leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ }
+ leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+
+ power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
+ power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
+ }
+}
+
+double Predec::compute_delays(double inrisetime)
+{
+ // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
+ pair<double, double> tmp_pair1, tmp_pair2;
+ tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
+ tmp_pair1 = blk1->compute_delays(tmp_pair1);
+ tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
+ tmp_pair2 = blk2->compute_delays(tmp_pair2);
+ tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
+
+ driver_power.readOp.dynamic =
+ drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
+ drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
+ drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
+ drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
+
+ block_power.readOp.dynamic =
+ blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
+ blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
+ blk1->power_L2.readOp.dynamic +
+ blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
+ blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
+ blk2->power_L2.readOp.dynamic;
+
+ power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
+
+ delay = tmp_pair1.first;
+ return tmp_pair1.second;
+}
+
+
+void Predec::leakage_feedback(double temperature)
+{
+ drv1->leakage_feedback(temperature);
+ drv2->leakage_feedback(temperature);
+ blk1->leakage_feedback(temperature);
+ blk2->leakage_feedback(temperature);
+
+ driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
+ drv1->power_nand3_path.readOp.leakage +
+ drv2->power_nand2_path.readOp.leakage +
+ drv2->power_nand3_path.readOp.leakage;
+ block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
+ blk1->power_nand3_path.readOp.leakage +
+ blk1->power_L2.readOp.leakage +
+ blk2->power_nand2_path.readOp.leakage +
+ blk2->power_nand3_path.readOp.leakage +
+ blk2->power_L2.readOp.leakage;
+ power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
+
+ driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
+ drv1->power_nand3_path.readOp.gate_leakage +
+ drv2->power_nand2_path.readOp.gate_leakage +
+ drv2->power_nand3_path.readOp.gate_leakage;
+ block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
+ blk1->power_nand3_path.readOp.gate_leakage +
+ blk1->power_L2.readOp.gate_leakage +
+ blk2->power_nand2_path.readOp.gate_leakage +
+ blk2->power_nand3_path.readOp.gate_leakage +
+ blk2->power_L2.readOp.gate_leakage;
+ power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
+}
+
+// returns <delay, risetime>
+pair<double, double> Predec::get_max_delay_before_decoder(
+ pair<double, double> input_pair1,
+ pair<double, double> input_pair2)
+{
+ pair<double, double> ret_val;
+ double delay;
+
+ delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
+ ret_val.first = delay;
+ ret_val.second = input_pair1.first;
+ delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
+ if (ret_val.first < delay)
+ {
+ ret_val.first = delay;
+ ret_val.second = input_pair1.second;
+ }
+ delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
+ if (ret_val.first < delay)
+ {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.first;
+ }
+ delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
+ if (ret_val.first < delay)
+ {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.second;
+ }
+
+ return ret_val;
+}
+
+
+
+Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
+:number_gates(0),
+ min_number_gates(2),
+ c_gate_load(c_gate_load_),
+ c_wire_load(c_wire_load_),
+ r_wire_load(r_wire_load_),
+ delay(0),
+ power(),
+ is_dram_(is_dram)
+{
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
+ {
+ width_n[i] = 0;
+ width_p[i] = 0;
+ }
+
+ compute_widths();
+}
+
+
+void Driver::compute_widths()
+{
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double c_load = c_gate_load + c_wire_load;
+ width_n[0] = g_tp.min_w_nmos_;
+ width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+
+ double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
+ number_gates = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_n,
+ width_p,
+ c_load,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+}
+
+
+
+double Driver::compute_delay(double inrisetime)
+{
+ int i;
+ double rd, c_load, c_intrinsic, tf;
+ double this_delay = 0;
+
+ for (i = 0; i < number_gates - 1; ++i)
+ {
+ rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
+ }
+
+ i = number_gates - 1;
+ c_load = c_gate_load + c_wire_load;
+ rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
+
+ return this_delay / (1.0 - 0.5);
+}
+
diff --git a/ext/mcpat/cacti/decoder.h b/ext/mcpat/cacti/decoder.h
new file mode 100644
index 000000000..35631e84b
--- /dev/null
+++ b/ext/mcpat/cacti/decoder.h
@@ -0,0 +1,247 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __DECODER_H__
+#define __DECODER_H__
+
+#include <vector>
+
+#include "area.h"
+#include "component.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+class Decoder : public Component
+{
+ public:
+ Decoder(
+ int _num_dec_signals,
+ bool flag_way_select,
+ double _C_ld_dec_out,
+ double _R_wire_dec_out,
+ bool fully_assoc_,
+ bool is_dram_,
+ bool is_wl_tr_,
+ const Area & cell_);
+
+ bool exist;
+ int num_in_signals;
+ double C_ld_dec_out;
+ double R_wire_dec_out;
+ int num_gates;
+ int num_gates_min;
+ double w_dec_n[MAX_NUMBER_GATES_STAGE];
+ double w_dec_p[MAX_NUMBER_GATES_STAGE];
+ double delay;
+ //powerDef power;
+ bool fully_assoc;
+ bool is_dram;
+ bool is_wl_tr;
+ const Area & cell;
+
+
+ void compute_widths();
+ void compute_area();
+ double compute_delays(double inrisetime); // return outrisetime
+
+ void leakage_feedback(double temperature);
+};
+
+
+
+class PredecBlk : public Component
+{
+ public:
+ PredecBlk(
+ int num_dec_signals,
+ Decoder * dec,
+ double C_wire_predec_blk_out,
+ double R_wire_predec_blk_out,
+ int num_dec_per_predec,
+ bool is_dram_,
+ bool is_blk1);
+
+ Decoder * dec;
+ bool exist;
+ int number_input_addr_bits;
+ double C_ld_predec_blk_out;
+ double R_wire_predec_blk_out;
+ int branch_effort_nand2_gate_output;
+ int branch_effort_nand3_gate_output;
+ bool flag_two_unique_paths;
+ int flag_L2_gate;
+ int number_inputs_L1_gate;
+ int number_gates_L1_nand2_path;
+ int number_gates_L1_nand3_path;
+ int number_gates_L2;
+ int min_number_gates_L1;
+ int min_number_gates_L2;
+ int num_L1_active_nand2_path;
+ int num_L1_active_nand3_path;
+ double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
+ double w_L2_n[MAX_NUMBER_GATES_STAGE];
+ double w_L2_p[MAX_NUMBER_GATES_STAGE];
+ double delay_nand2_path;
+ double delay_nand3_path;
+ powerDef power_nand2_path;
+ powerDef power_nand3_path;
+ powerDef power_L2;
+
+ bool is_dram_;
+
+ void compute_widths();
+ void compute_area();
+
+ void leakage_feedback(double temperature);
+
+ pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
+ // return <outrise_nand2, outrise_nand3>
+};
+
+
+class PredecBlkDrv : public Component
+{
+ public:
+ PredecBlkDrv(
+ int way_select,
+ PredecBlk * blk_,
+ bool is_dram);
+
+ int flag_driver_exists;
+ int number_input_addr_bits;
+ int number_gates_nand2_path;
+ int number_gates_nand3_path;
+ int min_number_gates;
+ int num_buffers_driving_1_nand2_load;
+ int num_buffers_driving_2_nand2_load;
+ int num_buffers_driving_4_nand2_load;
+ int num_buffers_driving_2_nand3_load;
+ int num_buffers_driving_8_nand3_load;
+ int num_buffers_nand3_path;
+ double c_load_nand2_path_out;
+ double c_load_nand3_path_out;
+ double r_load_nand2_path_out;
+ double r_load_nand3_path_out;
+ double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
+ double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
+ double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
+ double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
+ double delay_nand2_path;
+ double delay_nand3_path;
+ powerDef power_nand2_path;
+ powerDef power_nand3_path;
+
+ PredecBlk * blk;
+ Decoder * dec;
+ bool is_dram_;
+ int way_select;
+
+ void compute_widths();
+ void compute_area();
+
+ void leakage_feedback(double temperature);
+
+
+ pair<double, double> compute_delays(
+ double inrisetime_nand2_path,
+ double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
+
+ inline int num_addr_bits_nand2_path()
+ {
+ return num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load;
+ }
+ inline int num_addr_bits_nand3_path()
+ {
+ return num_buffers_driving_2_nand3_load +
+ num_buffers_driving_8_nand3_load;
+ }
+ double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
+};
+
+
+
+class Predec : public Component
+{
+ public:
+ Predec(
+ PredecBlkDrv * drv1,
+ PredecBlkDrv * drv2);
+
+ double compute_delays(double inrisetime); // return outrisetime
+
+ void leakage_feedback(double temperature);
+ PredecBlk * blk1;
+ PredecBlk * blk2;
+ PredecBlkDrv * drv1;
+ PredecBlkDrv * drv2;
+
+ powerDef block_power;
+ powerDef driver_power;
+
+ private:
+ // returns <delay, risetime>
+ pair<double, double> get_max_delay_before_decoder(
+ pair<double, double> input_pair1,
+ pair<double, double> input_pair2);
+};
+
+
+
+class Driver : public Component
+{
+ public:
+ Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
+
+ int number_gates;
+ int min_number_gates;
+ double width_n[MAX_NUMBER_GATES_STAGE];
+ double width_p[MAX_NUMBER_GATES_STAGE];
+ double c_gate_load;
+ double c_wire_load;
+ double r_wire_load;
+ double delay;
+ powerDef power;
+ bool is_dram_;
+
+ void compute_widths();
+ double compute_delay(double inrisetime);
+};
+
+
+#endif
diff --git a/ext/mcpat/cacti/htree2.cc b/ext/mcpat/cacti/htree2.cc
new file mode 100644
index 000000000..817ea6a7c
--- /dev/null
+++ b/ext/mcpat/cacti/htree2.cc
@@ -0,0 +1,641 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <cassert>
+#include <iostream>
+
+#include "htree2.h"
+#include "wire.h"
+
+Htree2::Htree2(
+ enum Wire_type wire_model, double mat_w, double mat_h,
+ int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
+ bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
+ :in_rise_time(0), out_rise_time(0),
+ tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
+ add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
+ search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
+ uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
+{
+ assert(ndbl >= 2 && ndwl >= 2);
+
+// if (ndbl == 1 && ndwl == 1)
+// {
+// delay = 0;
+// power.readOp.dynamic = 0;
+// power.readOp.leakage = 0;
+// area.w = mat_w;
+// area.h = mat_h;
+// return;
+// }
+// if (ndwl == 1) ndwl++;
+// if (ndbl == 1) ndbl++;
+
+ max_unpipelined_link_delay = 0; //TODO
+ min_w_nmos = g_tp.min_w_nmos_;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
+
+ switch (htree_type)
+ {
+ case Add_htree:
+ wire_bw = init_wire_bw = add_bits;
+ in_htree();
+ break;
+ case Data_in_htree:
+ wire_bw = init_wire_bw = data_in_bits;
+ in_htree();
+ break;
+ case Data_out_htree:
+ wire_bw = init_wire_bw = data_out_bits;
+ out_htree();
+ break;
+ case Search_in_htree:
+ wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
+ in_htree();
+ break;
+ case Search_out_htree:
+ wire_bw = init_wire_bw = search_data_out_bits;
+ out_htree();
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ power_bit = power;
+ power.readOp.dynamic *= init_wire_bw;
+
+ assert(power.readOp.dynamic >= 0);
+ assert(power.readOp.leakage >= 0);
+}
+
+
+
+// nand gate sizing calculation
+void Htree2::input_nand(double s1, double s2, double l_eff)
+{
+ Wire w1(wt, l_eff);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // input capacitance of a repeater = input capacitance of nand.
+ double nsize = s1*(1 + pton_size)/(2 + pton_size);
+ nsize = (nsize < 1) ? 1 : nsize;
+
+ double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
+ (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
+ 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
+ delay+= horowitz (w1.out_rise_time, tc,
+ deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
+ power.readOp.dynamic += 0.5 *
+ (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
+
+ power.searchOp.dynamic += 0.5 *
+ (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * wire_bw ;
+ power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
+ power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
+}
+
+
+
+// tristate buffer model consisting of not, nand, nor, and driver transistors
+void Htree2::output_buffer(double s1, double s2, double l_eff)
+{
+ Wire w1(wt, l_eff);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // input capacitance of repeater = input capacitance of nand + nor.
+ double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
+ double s_eff = //stage eff of a repeater in a wire
+ (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
+ gate_C(s2*(min_w_nmos + min_w_pmos), 0);
+ double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
+ size = (size < 1) ? 1 : size;
+
+ double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
+ double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
+ double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
+ gate_C(tr_size*min_w_pmos, 0);
+ double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
+ gate_C(s1*(min_w_nmos + min_w_pmos), 0);
+
+ double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
+
+
+ delay += horowitz (w1.out_rise_time, tc,
+ deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
+
+ //nand
+ power.readOp.dynamic += 0.5 *
+ (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(tr_size*(min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
+
+ power.searchOp.dynamic += 0.5 *
+ (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(tr_size*(min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd*init_wire_bw;
+
+ //not
+ power.readOp.dynamic += 0.5 *
+ (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
+
+ power.searchOp.dynamic += 0.5 *
+ (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd*init_wire_bw;
+
+ //nor
+ power.readOp.dynamic += 0.5 *
+ (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
+
+ power.searchOp.dynamic += 0.5 *
+ (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd*init_wire_bw;
+
+ //output transistor
+ power.readOp.dynamic += 0.5 *
+ ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
+ + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
+
+ power.searchOp.dynamic += 0.5 *
+ ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
+ + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd*init_wire_bw;
+
+ if(uca_tree) {
+ power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
+ power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
+ power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
+
+ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
+ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
+ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
+ //power.readOp.gate_leakage *=;
+ }
+ else {
+ power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
+ power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
+ power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
+
+ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
+ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
+ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
+ //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
+ }
+}
+
+
+
+/* calculates the input h-tree delay/power
+ * A nand gate is used at each node to
+ * limit the signal
+ * The area of an unbalanced htree (rows != columns)
+ * depends on how data is traversed.
+ * In the following function, if ( no. of rows < no. of columns),
+ * then data first traverse in excess hor. links until vertical
+ * and horizontal nodes are same.
+ * If no. of rows is bigger, then data traverse in
+ * a hor. link followed by a ver. link in a repeated
+ * fashion (similar to a balanced tree) until there are no
+ * hor. links left. After this it goes through the remaining vertical
+ * links.
+ */
+ void
+Htree2::in_htree()
+{
+ //temp var
+ double s1 = 0, s2 = 0, s3 = 0;
+ double l_eff = 0;
+ Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
+ double len = 0, ht = 0;
+ int option = 0;
+
+ int h = (int) _log2(ndwl/2); // horizontal nodes
+ int v = (int) _log2(ndbl/2); // vertical nodes
+ double len_temp;
+ double ht_temp;
+ if (uca_tree)
+ {//Sheng: this computation do not consider the wires that route from edge to middle.
+ ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
+ ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
+ 2 * (1-pow(0.5,h))))/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
+ 2 * (1-pow(0.5,v))))/2;
+ }
+ else
+ {
+ if (ndwl == ndbl) {
+ ht_temp = ((mat_height*ndbl/2) +
+ ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
+ )/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
+ }
+ else if (ndwl > ndbl) {
+ double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
+ ht_temp = ((mat_height*ndbl/2) +
+ ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
+ (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
+ }
+ else {
+ double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
+ ht_temp = ((mat_height*ndbl/2) +
+ ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
+ )/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
+ }
+ }
+
+ area.h = ht_temp * 2;
+ area.w = len_temp * 2;
+ delay = 0;
+ power.readOp.dynamic = 0;
+ power.readOp.leakage = 0;
+ power.searchOp.dynamic =0;
+ len = len_temp;
+ ht = ht_temp/2;
+
+ while (v > 0 || h > 0)
+ {
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+
+ if (h > v)
+ {
+ //the iteration considers only one horizontal link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, len/2); // ver
+ len_temp = len;
+ len /= 2;
+ wtemp3 = 0;
+ h--;
+ option = 0;
+ }
+ else if (v>0 && h>0)
+ {
+ //considers one horizontal link and one vertical link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, ht); // ver
+ wtemp3 = new Wire(wt, len/2); // next hor
+ len_temp = len;
+ ht_temp = ht;
+ len /= 2;
+ ht /= 2;
+ v--;
+ h--;
+ option = 1;
+ }
+ else
+ {
+ // considers only one vertical link
+ assert(h == 0);
+ wtemp1 = new Wire(wt, ht); // ver
+ wtemp2 = new Wire(wt, ht/2); // hor
+ ht_temp = ht;
+ ht /= 2;
+ wtemp3 = 0;
+ v--;
+ option = 2;
+ }
+
+ delay += wtemp1->delay;
+ power.readOp.dynamic += wtemp1->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
+ power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
+ power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
+ if ((uca_tree == false && option == 2) || search_tree==true)
+ {
+ wire_bw*=2; // wire bandwidth doubles only for vertical branches
+ }
+
+ if (uca_tree == false)
+ {
+ if (len_temp > wtemp1->repeater_spacing)
+ {
+ s1 = wtemp1->repeater_size;
+ l_eff = wtemp1->repeater_spacing;
+ }
+ else
+ {
+ s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
+ l_eff = len_temp;
+ }
+
+ if (ht_temp > wtemp2->repeater_spacing)
+ {
+ s2 = wtemp2->repeater_size;
+ }
+ else
+ {
+ s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
+ }
+ // first level
+ input_nand(s1, s2, l_eff);
+ }
+
+
+ if (option != 1)
+ {
+ continue;
+ }
+
+ // second level
+ delay += wtemp2->delay;
+ power.readOp.dynamic += wtemp2->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
+ power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
+
+ if (uca_tree)
+ {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
+ }
+ else
+ {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
+ wire_bw*=2;
+
+ if (ht_temp > wtemp3->repeater_spacing)
+ {
+ s3 = wtemp3->repeater_size;
+ l_eff = wtemp3->repeater_spacing;
+ }
+ else
+ {
+ s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
+ l_eff = ht_temp;
+ }
+
+ input_nand(s2, s3, l_eff);
+ }
+ }
+
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+}
+
+
+
+/* a tristate buffer is used to handle fan-ins
+ * The area of an unbalanced htree (rows != columns)
+ * depends on how data is traversed.
+ * In the following function, if ( no. of rows < no. of columns),
+ * then data first traverse in excess hor. links until vertical
+ * and horizontal nodes are same.
+ * If no. of rows is bigger, then data traverse in
+ * a hor. link followed by a ver. link in a repeated
+ * fashion (similar to a balanced tree) until there are no
+ * hor. links left. After this it goes through the remaining vertical
+ * links.
+ */
+void Htree2::out_htree()
+{
+ //temp var
+ double s1 = 0, s2 = 0, s3 = 0;
+ double l_eff = 0;
+ Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
+ double len = 0, ht = 0;
+ int option = 0;
+
+ int h = (int) _log2(ndwl/2);
+ int v = (int) _log2(ndbl/2);
+ double len_temp;
+ double ht_temp;
+ if (uca_tree)
+ {
+ ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
+ ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
+ 2 * (1-pow(0.5,h))))/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
+ 2 * (1-pow(0.5,v))))/2;
+ }
+ else
+ {
+ if (ndwl == ndbl) {
+ ht_temp = ((mat_height*ndbl/2) +
+ ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
+ )/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
+
+ }
+ else if (ndwl > ndbl) {
+ double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
+ ht_temp = ((mat_height*ndbl/2) +
+ ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
+ (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
+ }
+ else {
+ double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
+ ht_temp = ((mat_height*ndbl/2) +
+ ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
+ )/2;
+ len_temp = (mat_width*ndwl/2 +
+ ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
+ }
+ }
+ area.h = ht_temp * 2;
+ area.w = len_temp * 2;
+ delay = 0;
+ power.readOp.dynamic = 0;
+ power.readOp.leakage = 0;
+ power.readOp.gate_leakage = 0;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ len = len_temp;
+ ht = ht_temp/2;
+
+ while (v > 0 || h > 0)
+ { //finds delay/power of each link in the tree
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+
+ if(h > v) {
+ //the iteration considers only one horizontal link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, len/2); // ver
+ len_temp = len;
+ len /= 2;
+ wtemp3 = 0;
+ h--;
+ option = 0;
+ }
+ else if (v>0 && h>0) {
+ //considers one horizontal link and one vertical link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, ht); // ver
+ wtemp3 = new Wire(wt, len/2); // next hor
+ len_temp = len;
+ ht_temp = ht;
+ len /= 2;
+ ht /= 2;
+ v--;
+ h--;
+ option = 1;
+ }
+ else {
+ // considers only one vertical link
+ assert(h == 0);
+ wtemp1 = new Wire(wt, ht); // hor
+ wtemp2 = new Wire(wt, ht/2); // ver
+ ht_temp = ht;
+ ht /= 2;
+ wtemp3 = 0;
+ v--;
+ option = 2;
+ }
+ delay += wtemp1->delay;
+ power.readOp.dynamic += wtemp1->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
+ power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
+ power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ if ((uca_tree == false && option == 2) || search_tree==true)
+ {
+ wire_bw*=2;
+ }
+
+ if (uca_tree == false)
+ {
+ if (len_temp > wtemp1->repeater_spacing)
+ {
+ s1 = wtemp1->repeater_size;
+ l_eff = wtemp1->repeater_spacing;
+ }
+ else
+ {
+ s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
+ l_eff = len_temp;
+ }
+ if (ht_temp > wtemp2->repeater_spacing)
+ {
+ s2 = wtemp2->repeater_size;
+ }
+ else
+ {
+ s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
+ }
+ // first level
+ output_buffer(s1, s2, l_eff);
+ }
+
+
+ if (option != 1)
+ {
+ continue;
+ }
+
+ // second level
+ delay += wtemp2->delay;
+ power.readOp.dynamic += wtemp2->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
+ power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ if (uca_tree)
+ {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
+ }
+ else
+ {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
+ wire_bw*=2;
+
+ if (ht_temp > wtemp3->repeater_spacing)
+ {
+ s3 = wtemp3->repeater_size;
+ l_eff = wtemp3->repeater_spacing;
+ }
+ else
+ {
+ s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
+ l_eff = ht_temp;
+ }
+
+ output_buffer(s2, s3, l_eff);
+ }
+ //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
+ }
+
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+}
+
diff --git a/ext/mcpat/cacti/htree2.h b/ext/mcpat/cacti/htree2.h
new file mode 100644
index 000000000..053e43a27
--- /dev/null
+++ b/ext/mcpat/cacti/htree2.h
@@ -0,0 +1,97 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __HTREE2_H__
+#define __HTREE2_H__
+
+#include "assert.h"
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "parameter.h"
+#include "subarray.h"
+#include "wire.h"
+
+// leakge power includes entire htree in a bank (when uca_tree == false)
+// leakge power includes only part to one bank when uca_tree == true
+
+class Htree2 : public Component
+{
+ public:
+ Htree2(enum Wire_type wire_model,
+ double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
+ enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ ~Htree2() {};
+
+ void in_htree();
+ void out_htree();
+
+ // repeaters only at h-tree nodes
+ void limited_in_htree();
+ void limited_out_htree();
+ void input_nand(double s1, double s2, double l);
+ void output_buffer(double s1, double s2, double l);
+
+ double in_rise_time, out_rise_time;
+
+ void set_in_rise_time(double rt)
+ {
+ in_rise_time = rt;
+ }
+
+ double max_unpipelined_link_delay;
+ powerDef power_bit;
+
+
+ private:
+ double wire_bw;
+ double init_wire_bw; // bus width at root
+ enum Htree_type tree_type;
+ double htree_hnodes;
+ double htree_vnodes;
+ double mat_width;
+ double mat_height;
+ int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
+ int ndbl, ndwl;
+ bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
+ bool search_tree;
+
+ enum Wire_type wt;
+ double min_w_nmos;
+ double min_w_pmos;
+
+ TechnologyParameter::DeviceType *deviceType;
+
+};
+
+#endif
diff --git a/ext/mcpat/cacti/io.cc b/ext/mcpat/cacti/io.cc
new file mode 100644
index 000000000..56725ab7c
--- /dev/null
+++ b/ext/mcpat/cacti/io.cc
@@ -0,0 +1,2350 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "Ucache.h"
+#include "arbiter.h"
+#include "area.h"
+#include "basic_circuit.h"
+#include "crossbar.h"
+#include "io.h"
+#include "nuca.h"
+#include "parameter.h"
+//#include "highradix.h"
+
+using namespace std;
+
+
+/* Parses "cache.cfg" file */
+ void
+InputParameter::parse_cfg(const string & in_file)
+{
+ FILE *fp = fopen(in_file.c_str(), "r");
+ char line[5000];
+ char jk[5000];
+ char temp_var[5000];
+
+ if(!fp) {
+ cout << in_file << " is missing!\n";
+ exit(-1);
+ }
+
+ while(fscanf(fp, "%[^\n]\n", line) != EOF) {
+
+ if (!strncmp("-size", line, strlen("-size"))) {
+ sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
+ continue;
+ }
+
+ if (!strncmp("-page size", line, strlen("-page size"))) {
+ sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
+ continue;
+ }
+
+ if (!strncmp("-burst length", line, strlen("-burst length"))) {
+ sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
+ continue;
+ }
+
+ if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
+ sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
+ continue;
+ }
+
+ if (!strncmp("-block", line, strlen("-block"))) {
+ sscanf(line, "-block size (bytes) %d", &(line_sz));
+ continue;
+ }
+
+ if (!strncmp("-associativity", line, strlen("-associativity"))) {
+ sscanf(line, "-associativity %d", &(assoc));
+ continue;
+ }
+
+ if (!strncmp("-read-write", line, strlen("-read-write"))) {
+ sscanf(line, "-read-write port %d", &(num_rw_ports));
+ continue;
+ }
+
+ if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
+ sscanf(line, "-exclusive read port %d", &(num_rd_ports));
+ continue;
+ }
+
+ if(!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
+ sscanf(line, "-exclusive write port %d", &(num_wr_ports));
+ continue;
+ }
+
+ if (!strncmp("-single ended", line, strlen("-single ended"))) {
+ sscanf(line, "-single %[(:-~)*]%d", jk,
+ &(num_se_rd_ports));
+ continue;
+ }
+
+ if (!strncmp("-search", line, strlen("-search"))) {
+ sscanf(line, "-search port %d", &(num_search_ports));
+ continue;
+ }
+
+ if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
+ sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
+ continue;
+ }
+
+ if (!strncmp("-technology", line, strlen("-technology"))) {
+ sscanf(line, "-technology (u) %lf", &(F_sz_um));
+ F_sz_nm = F_sz_um*1000;
+ continue;
+ }
+
+ if (!strncmp("-output/input", line, strlen("-output/input"))) {
+ sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
+ continue;
+ }
+
+ if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
+ sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
+ continue;
+ }
+
+ if (!strncmp("-cache type", line, strlen("-cache type"))) {
+ sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("cache", temp_var, sizeof("cache"))) {
+ is_cache = true;
+ }
+ else
+ {
+ is_cache = false;
+ }
+
+ if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
+ is_main_mem = true;
+ }
+ else {
+ is_main_mem = false;
+ }
+
+ if (!strncmp("cam", temp_var, sizeof("cam"))) {
+ pure_cam = true;
+ }
+ else {
+ pure_cam = false;
+ }
+
+ if (!strncmp("ram", temp_var, sizeof("ram"))) {
+ pure_ram = true;
+ }
+ else {
+ if (!is_main_mem)
+ pure_ram = false;
+ else
+ pure_ram = true;
+ }
+
+ continue;
+ }
+
+
+ if (!strncmp("-tag size", line, strlen("-tag size"))) {
+ sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("default", temp_var, sizeof("default"))) {
+ specific_tag = false;
+ tag_w = 42; /* the acutal value is calculated
+ * later based on the cache size, bank count, and associativity
+ */
+ }
+ else {
+ specific_tag = true;
+ sscanf(line, "-tag size (b) %d", &(tag_w));
+ }
+ continue;
+ }
+
+ if (!strncmp("-access mode", line, strlen("-access mode"))) {
+ sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("fast", temp_var, strlen("fast"))) {
+ access_mode = 2;
+ }
+ else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
+ access_mode = 1;
+ }
+ else if(!strncmp("normal", temp_var, strlen("normal"))) {
+ access_mode = 0;
+ }
+ else {
+ cout << "ERROR: Invalid access mode!\n";
+ exit(0);
+ }
+ continue;
+ }
+
+ if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) {
+ sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ data_arr_ram_cell_tech_type = 0;
+ }
+ else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ data_arr_ram_cell_tech_type = 1;
+ }
+ else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ data_arr_ram_cell_tech_type = 2;
+ }
+ else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
+ data_arr_ram_cell_tech_type = 3;
+ }
+ else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
+ data_arr_ram_cell_tech_type = 4;
+ }
+ else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
+
+ if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
+ sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ data_arr_peri_global_tech_type = 0;
+ }
+ else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ data_arr_peri_global_tech_type = 1;
+ }
+ else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ data_arr_peri_global_tech_type = 2;
+ }
+ else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
+
+ if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
+ sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ tag_arr_ram_cell_tech_type = 0;
+ }
+ else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ tag_arr_ram_cell_tech_type = 1;
+ }
+ else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ tag_arr_ram_cell_tech_type = 2;
+ }
+ else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
+ tag_arr_ram_cell_tech_type = 3;
+ }
+ else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
+ tag_arr_ram_cell_tech_type = 4;
+ }
+ else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
+
+ if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
+ sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ tag_arr_peri_global_tech_type = 0;
+ }
+ else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ tag_arr_peri_global_tech_type = 1;
+ }
+ else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ tag_arr_peri_global_tech_type = 2;
+ }
+ else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
+ if(!strncmp("-design", line, strlen("-design"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_wt), &(dynamic_power_wt),
+ &(leakage_power_wt),
+ &(cycle_time_wt), &(area_wt));
+ continue;
+ }
+
+ if(!strncmp("-deviate", line, strlen("-deviate"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_dev), &(dynamic_power_dev),
+ &(leakage_power_dev),
+ &(cycle_time_dev), &(area_dev));
+ continue;
+ }
+
+ if(!strncmp("-Optimize", line, strlen("-Optimize"))) {
+ sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if(!strncmp("ED^2", temp_var, strlen("ED^2"))) {
+ ed = 2;
+ }
+ else if(!strncmp("ED", temp_var, strlen("ED"))) {
+ ed = 1;
+ }
+ else {
+ ed = 0;
+ }
+ }
+
+ if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_wt_nuca), &(dynamic_power_wt_nuca),
+ &(leakage_power_wt_nuca),
+ &(cycle_time_wt_nuca), &(area_wt_nuca));
+ continue;
+ }
+
+ if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_dev_nuca), &(dynamic_power_dev_nuca),
+ &(leakage_power_dev_nuca),
+ &(cycle_time_dev_nuca), &(area_dev_nuca));
+ continue;
+ }
+
+ if(!strncmp("-Cache model", line, strlen("-cache model"))) {
+ sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("UCA", temp_var, strlen("UCA"))) {
+ nuca = 0;
+ }
+ else {
+ nuca = 1;
+ }
+ continue;
+ }
+
+ if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
+ sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
+
+ if (nuca_bank_count != 0) {
+ force_nuca_bank = 1;
+ }
+ continue;
+ }
+
+ if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("global", temp_var, strlen("global"))) {
+ wire_is_mat_type = 2;
+ continue;
+ }
+ else if (!strncmp("local", temp_var, strlen("local"))) {
+ wire_is_mat_type = 0;
+ continue;
+ }
+ else {
+ wire_is_mat_type = 1;
+ continue;
+ }
+ }
+
+ if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("global", temp_var, strlen("global"))) {
+ wire_os_mat_type = 2;
+ }
+ else {
+ wire_os_mat_type = 1;
+ }
+ continue;
+ }
+
+ if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
+ sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
+ ic_proj_type = 0;
+ }
+ else {
+ ic_proj_type = 1;
+ }
+ continue;
+ }
+
+ if(!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("default", temp_var, strlen("default"))) {
+ force_wiretype = 0;
+ wt = Global;
+ }
+ else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
+ force_wiretype = 1;
+ wt = Global_10;
+ }
+ else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
+ force_wiretype = 1;
+ wt = Global_20;
+ }
+ else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
+ force_wiretype = 1;
+ wt = Global_30;
+ }
+ else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
+ force_wiretype = 1;
+ wt = Global_5;
+ }
+ else if (!(strncmp("Global", temp_var, strlen("Global")))) {
+ force_wiretype = 1;
+ wt = Global;
+ }
+ else {
+ wt = Low_swing;
+ force_wiretype = 1;
+ }
+ continue;
+ }
+
+
+
+ if(!strncmp("-Core", line, strlen("-Core"))) {
+ sscanf(line, "-Core count %d\n", &(cores));
+ if (cores > 16) {
+ printf("No. of cores should be less than 16!\n");
+ }
+ continue;
+ }
+
+ if(!strncmp("-Cache level", line, strlen("-Cache level"))) {
+ sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("L2", temp_var, strlen("L2"))) {
+ cache_level = 0;
+ }
+ else {
+ cache_level = 1;
+ }
+ }
+
+ if(!strncmp("-Print level", line, strlen("-Print level"))) {
+ sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
+ print_detail = 1;
+ }
+ else {
+ print_detail = 0;
+ }
+
+ }
+ if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
+ sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ add_ecc_b_ = true;
+ }
+ else {
+ add_ecc_b_ = false;
+ }
+ }
+
+ if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
+ sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ print_input_args = true;
+ }
+ else {
+ print_input_args = false;
+ }
+ }
+
+ if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
+ sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ force_cache_config = true;
+ }
+ else {
+ force_cache_config = false;
+ }
+ }
+
+ if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
+ sscanf(line, "-Ndbl %d\n", &(ndbl));
+ continue;
+ }
+ if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
+ sscanf(line, "-Ndwl %d\n", &(ndwl));
+ continue;
+ }
+ if(!strncmp("-Nspd", line, strlen("-Nspd"))) {
+ sscanf(line, "-Nspd %d\n", &(nspd));
+ continue;
+ }
+ if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
+ sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
+ continue;
+ }
+ if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
+ sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
+ continue;
+ }
+ if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
+ sscanf(line, "-Ndcm %d\n", &(ndcm));
+ continue;
+ }
+
+ }
+ rpters_in_htree = true;
+ fclose(fp);
+}
+
+ void
+InputParameter::display_ip()
+{
+ cout << "Cache size : " << cache_sz << endl;
+ cout << "Block size : " << line_sz << endl;
+ cout << "Associativity : " << assoc << endl;
+ cout << "Read only ports : " << num_rd_ports << endl;
+ cout << "Write only ports : " << num_wr_ports << endl;
+ cout << "Read write ports : " << num_rw_ports << endl;
+ cout << "Single ended read ports : " << num_se_rd_ports << endl;
+ if (fully_assoc||pure_cam)
+ {
+ cout << "Search ports : " << num_search_ports << endl;
+ }
+ cout << "Cache banks (UCA) : " << nbanks << endl;
+ cout << "Technology : " << F_sz_um << endl;
+ cout << "Temperature : " << temp << endl;
+ cout << "Tag size : " << tag_w << endl;
+ if (is_cache) {
+ cout << "array type : " << "Cache" << endl;
+ }
+ if (pure_ram) {
+ cout << "array type : " << "Scratch RAM" << endl;
+ }
+ if (pure_cam)
+ {
+ cout << "array type : " << "CAM" << endl;
+ }
+ cout << "Model as memory : " << is_main_mem << endl;
+ cout << "Access mode : " << access_mode << endl;
+ cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
+ cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
+ cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
+ cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
+ cout << "Optimization target : " << ed << endl;
+ cout << "Design objective (UCA wt) : " << delay_wt << " "
+ << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
+ << " " << area_wt << endl;
+ cout << "Design objective (UCA dev) : " << delay_dev << " "
+ << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
+ << " " << area_dev << endl;
+ if (nuca)
+ {
+ cout << "Cores : " << cores << endl;
+
+
+ cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
+ << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
+ << " " << area_wt_nuca << endl;
+ cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
+ << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
+ << " " << area_dev_nuca << endl;
+ }
+ cout << "Cache model : " << nuca << endl;
+ cout << "Nuca bank : " << nuca_bank_count << endl;
+ cout << "Wire inside mat : " << wire_is_mat_type << endl;
+ cout << "Wire outside mat : " << wire_os_mat_type << endl;
+ cout << "Interconnect projection : " << ic_proj_type << endl;
+ cout << "Wire signalling : " << force_wiretype << endl;
+ cout << "Print level : " << print_detail << endl;
+ cout << "ECC overhead : " << add_ecc_b_ << endl;
+ cout << "Page size : " << page_sz_bits << endl;
+ cout << "Burst length : " << burst_len << endl;
+ cout << "Internal prefetch width : " << int_prefetch_w << endl;
+ cout << "Force cache config : " << g_ip->force_cache_config << endl;
+ if (g_ip->force_cache_config) {
+ cout << "Ndwl : " << g_ip->ndwl << endl;
+ cout << "Ndbl : " << g_ip->ndbl << endl;
+ cout << "Nspd : " << g_ip->nspd << endl;
+ cout << "Ndcm : " << g_ip->ndcm << endl;
+ cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
+ cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
+ }
+}
+
+
+
+powerComponents operator+(const powerComponents & x, const powerComponents & y)
+{
+ powerComponents z;
+
+ z.dynamic = x.dynamic + y.dynamic;
+ z.leakage = x.leakage + y.leakage;
+ z.gate_leakage = x.gate_leakage + y.gate_leakage;
+ z.short_circuit = x.short_circuit + y.short_circuit;
+ z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
+
+ return z;
+}
+
+powerComponents operator*(const powerComponents & x, double const * const y)
+{
+ powerComponents z;
+
+ z.dynamic = x.dynamic*y[0];
+ z.leakage = x.leakage*y[1];
+ z.gate_leakage = x.gate_leakage*y[2];
+ z.short_circuit = x.short_circuit*y[3];
+ z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage
+
+ return z;
+}
+
+
+powerDef operator+(const powerDef & x, const powerDef & y)
+{
+ powerDef z;
+
+ z.readOp = x.readOp + y.readOp;
+ z.writeOp = x.writeOp + y.writeOp;
+ z.searchOp = x.searchOp + y.searchOp;
+ return z;
+}
+
+powerDef operator*(const powerDef & x, double const * const y)
+{
+ powerDef z;
+
+ z.readOp = x.readOp*y;
+ z.writeOp = x.writeOp*y;
+ z.searchOp = x.searchOp*y;
+ return z;
+}
+
+uca_org_t cacti_interface(const string & infile_name)
+{
+
+ uca_org_t fin_res;
+ //uca_org_t result;
+ fin_res.valid = false;
+
+ g_ip = new InputParameter();
+ g_ip->parse_cfg(infile_name);
+ if(!g_ip->error_checking())
+ exit(0);
+ if (g_ip->print_input_args)
+ g_ip->display_ip();
+
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+
+
+// For HighRadix Only
+// //// Wire wirea(g_ip->wt, 1000);
+// //// wirea.print_wire();
+// //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl;
+// // winit.print_wire();
+// //
+// HighRadix *hr;
+// hr = new HighRadix();
+// hr->compute_power();
+// hr->print_router();
+// exit(0);
+//
+// double sub_switch_sz = 2;
+// double rows = 32;
+// for (int i=0; i<6; i++) {
+// sub_switch_sz = pow(2, i);
+// rows = 64/sub_switch_sz;
+// hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7);
+// hr->compute_power();
+// hr->print_router();
+// delete hr;
+// }
+// // HighRadix yarc;
+// // yarc.compute_power();
+// // yarc.print_router();
+// winit.print_wire();
+// exit(0);
+// For HighRadix Only End
+
+ if (g_ip->nuca == 1)
+ {
+ Nuca n(&g_tp.peri_global);
+ n.sim_nuca();
+ }
+ g_ip->display_ip();
+ solve(&fin_res);
+
+ output_UCA(&fin_res);
+ output_data_csv(fin_res);
+
+ delete (g_ip);
+ return fin_res;
+}
+
+//cacti6.5's plain interface, please keep !!!
+uca_org_t cacti_interface(
+ int cache_size,
+ int line_size,
+ int associativity,
+ int rw_ports,
+ int excl_read_ports,
+ int excl_write_ports,
+ int single_ended_read_ports,
+ int banks,
+ double tech_node, // in nm
+ int page_sz,
+ int burst_length,
+ int pre_width,
+ int output_width,
+ int specific_tag,
+ int tag_width,
+ int access_mode, //0 normal, 1 seq, 2 fast
+ int cache, //scratch ram or cache
+ int main_mem,
+ int obj_func_delay,
+ int obj_func_dynamic_power,
+ int obj_func_leakage_power,
+ int obj_func_area,
+ int obj_func_cycle_time,
+ int dev_func_delay,
+ int dev_func_dynamic_power,
+ int dev_func_leakage_power,
+ int dev_func_area,
+ int dev_func_cycle_time,
+ int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
+ int temp,
+ int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
+ int data_arr_ram_cell_tech_flavor_in, // 0-4
+ int data_arr_peri_global_tech_flavor_in,
+ int tag_arr_ram_cell_tech_flavor_in,
+ int tag_arr_peri_global_tech_flavor_in,
+ int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
+ int wire_inside_mat_type_in,
+ int wire_outside_mat_type_in,
+ int is_nuca, // 0 - UCA, 1 - NUCA
+ int core_count,
+ int cache_level, // 0 - L2, 1 - L3
+ int nuca_bank_count,
+ int nuca_obj_func_delay,
+ int nuca_obj_func_dynamic_power,
+ int nuca_obj_func_leakage_power,
+ int nuca_obj_func_area,
+ int nuca_obj_func_cycle_time,
+ int nuca_dev_func_delay,
+ int nuca_dev_func_dynamic_power,
+ int nuca_dev_func_leakage_power,
+ int nuca_dev_func_area,
+ int nuca_dev_func_cycle_time,
+ int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
+ int p_input)
+{
+ g_ip = new InputParameter();
+ g_ip->add_ecc_b_ = true;
+
+ g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+ g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+ g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+ g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+
+ g_ip->ic_proj_type = interconnect_projection_type_in;
+ g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+ g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+ g_ip->burst_len = burst_length;
+ g_ip->int_prefetch_w = pre_width;
+ g_ip->page_sz_bits = page_sz;
+
+ g_ip->cache_sz = cache_size;
+ g_ip->line_sz = line_size;
+ g_ip->assoc = associativity;
+ g_ip->nbanks = banks;
+ g_ip->out_w = output_width;
+ g_ip->specific_tag = specific_tag;
+ if (tag_width == 0) {
+ g_ip->tag_w = 42;
+ }
+ else {
+ g_ip->tag_w = tag_width;
+ }
+
+ g_ip->access_mode = access_mode;
+ g_ip->delay_wt = obj_func_delay;
+ g_ip->dynamic_power_wt = obj_func_dynamic_power;
+ g_ip->leakage_power_wt = obj_func_leakage_power;
+ g_ip->area_wt = obj_func_area;
+ g_ip->cycle_time_wt = obj_func_cycle_time;
+ g_ip->delay_dev = dev_func_delay;
+ g_ip->dynamic_power_dev = dev_func_dynamic_power;
+ g_ip->leakage_power_dev = dev_func_leakage_power;
+ g_ip->area_dev = dev_func_area;
+ g_ip->cycle_time_dev = dev_func_cycle_time;
+ g_ip->ed = ed_ed2_none;
+
+ switch(wt) {
+ case (0):
+ g_ip->force_wiretype = 0;
+ g_ip->wt = Global;
+ break;
+ case (1):
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global;
+ break;
+ case (2):
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_5;
+ break;
+ case (3):
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_10;
+ break;
+ case (4):
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_20;
+ break;
+ case (5):
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_30;
+ break;
+ case (6):
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Low_swing;
+ break;
+ default:
+ cout << "Unknown wire type!\n";
+ exit(0);
+ }
+
+ g_ip->delay_wt_nuca = nuca_obj_func_delay;
+ g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
+ g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
+ g_ip->area_wt_nuca = nuca_obj_func_area;
+ g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
+ g_ip->delay_dev_nuca = dev_func_delay;
+ g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
+ g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
+ g_ip->area_dev_nuca = nuca_dev_func_area;
+ g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
+ g_ip->nuca = is_nuca;
+ g_ip->nuca_bank_count = nuca_bank_count;
+ if(nuca_bank_count > 0) {
+ g_ip->force_nuca_bank = 1;
+ }
+ g_ip->cores = core_count;
+ g_ip->cache_level = cache_level;
+
+ g_ip->temp = temp;
+
+ g_ip->F_sz_nm = tech_node;
+ g_ip->F_sz_um = tech_node / 1000;
+ g_ip->is_main_mem = (main_mem != 0) ? true : false;
+ g_ip->is_cache = (cache != 0) ? true : false;
+ g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+
+ g_ip->num_rw_ports = rw_ports;
+ g_ip->num_rd_ports = excl_read_ports;
+ g_ip->num_wr_ports = excl_write_ports;
+ g_ip->num_se_rd_ports = single_ended_read_ports;
+ g_ip->print_detail = 1;
+ g_ip->nuca = 0;
+
+ g_ip->wt = Global_5;
+ g_ip->force_cache_config = false;
+ g_ip->force_wiretype = false;
+ g_ip->print_input_args = p_input;
+
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ if (g_ip->error_checking() == false) exit(0);
+ if (g_ip->print_input_args)
+ g_ip->display_ip();
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+
+ if (g_ip->nuca == 1)
+ {
+ Nuca n(&g_tp.peri_global);
+ n.sim_nuca();
+ }
+ solve(&fin_res);
+
+ output_UCA(&fin_res);
+
+ delete (g_ip);
+ return fin_res;
+}
+
+//McPAT's plain interface, please keep !!!
+uca_org_t cacti_interface(
+ int cache_size,
+ int line_size,
+ int associativity,
+ int rw_ports,
+ int excl_read_ports,// para5
+ int excl_write_ports,
+ int single_ended_read_ports,
+ int search_ports,
+ int banks,
+ double tech_node,//para10
+ int output_width,
+ int specific_tag,
+ int tag_width,
+ int access_mode,
+ int cache, //para15
+ int main_mem,
+ int obj_func_delay,
+ int obj_func_dynamic_power,
+ int obj_func_leakage_power,
+ int obj_func_cycle_time, //para20
+ int obj_func_area,
+ int dev_func_delay,
+ int dev_func_dynamic_power,
+ int dev_func_leakage_power,
+ int dev_func_area, //para25
+ int dev_func_cycle_time,
+ int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
+ int temp,
+ int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
+ int data_arr_ram_cell_tech_flavor_in,//para30
+ int data_arr_peri_global_tech_flavor_in,
+ int tag_arr_ram_cell_tech_flavor_in,
+ int tag_arr_peri_global_tech_flavor_in,
+ int interconnect_projection_type_in,
+ int wire_inside_mat_type_in,//para35
+ int wire_outside_mat_type_in,
+ int REPEATERS_IN_HTREE_SEGMENTS_in,
+ int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
+ int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
+ int PAGE_SIZE_BITS_in,//para40
+ int BURST_LENGTH_in,
+ int INTERNAL_PREFETCH_WIDTH_in,
+ int force_wiretype,
+ int wiretype,
+ int force_config,//para45
+ int ndwl,
+ int ndbl,
+ int nspd,
+ int ndcm,
+ int ndsam1,//para50
+ int ndsam2,
+ int ecc)
+{
+ g_ip = new InputParameter();
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+ g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+ g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+ g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+
+ g_ip->ic_proj_type = interconnect_projection_type_in;
+ g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+ g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+ g_ip->burst_len = BURST_LENGTH_in;
+ g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
+ g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
+
+ g_ip->cache_sz = cache_size;
+ g_ip->line_sz = line_size;
+ g_ip->assoc = associativity;
+ g_ip->nbanks = banks;
+ g_ip->out_w = output_width;
+ g_ip->specific_tag = specific_tag;
+ if (specific_tag == 0) {
+ g_ip->tag_w = 42;
+ }
+ else {
+ g_ip->tag_w = tag_width;
+ }
+
+ g_ip->access_mode = access_mode;
+ g_ip->delay_wt = obj_func_delay;
+ g_ip->dynamic_power_wt = obj_func_dynamic_power;
+ g_ip->leakage_power_wt = obj_func_leakage_power;
+ g_ip->area_wt = obj_func_area;
+ g_ip->cycle_time_wt = obj_func_cycle_time;
+ g_ip->delay_dev = dev_func_delay;
+ g_ip->dynamic_power_dev = dev_func_dynamic_power;
+ g_ip->leakage_power_dev = dev_func_leakage_power;
+ g_ip->area_dev = dev_func_area;
+ g_ip->cycle_time_dev = dev_func_cycle_time;
+ g_ip->temp = temp;
+ g_ip->ed = ed_ed2_none;
+
+ g_ip->F_sz_nm = tech_node;
+ g_ip->F_sz_um = tech_node / 1000;
+ g_ip->is_main_mem = (main_mem != 0) ? true : false;
+ g_ip->is_cache = (cache ==1) ? true : false;
+ g_ip->pure_ram = (cache ==0) ? true : false;
+ g_ip->pure_cam = (cache ==2) ? true : false;
+ g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+ g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
+ g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
+
+ g_ip->num_rw_ports = rw_ports;
+ g_ip->num_rd_ports = excl_read_ports;
+ g_ip->num_wr_ports = excl_write_ports;
+ g_ip->num_se_rd_ports = single_ended_read_ports;
+ g_ip->num_search_ports = search_ports;
+
+ g_ip->print_detail = 1;
+ g_ip->nuca = 0;
+
+ if (force_wiretype == 0)
+ {
+ g_ip->wt = Global;
+ g_ip->force_wiretype = false;
+ }
+ else
+ { g_ip->force_wiretype = true;
+ if (wiretype==10) {
+ g_ip->wt = Global_10;
+ }
+ if (wiretype==20) {
+ g_ip->wt = Global_20;
+ }
+ if (wiretype==30) {
+ g_ip->wt = Global_30;
+ }
+ if (wiretype==5) {
+ g_ip->wt = Global_5;
+ }
+ if (wiretype==0) {
+ g_ip->wt = Low_swing;
+ }
+ }
+ //g_ip->wt = Global_5;
+ if (force_config == 0)
+ {
+ g_ip->force_cache_config = false;
+ }
+ else
+ {
+ g_ip->force_cache_config = true;
+ g_ip->ndbl=ndbl;
+ g_ip->ndwl=ndwl;
+ g_ip->nspd=nspd;
+ g_ip->ndcm=ndcm;
+ g_ip->ndsam1=ndsam1;
+ g_ip->ndsam2=ndsam2;
+
+
+ }
+
+ if (ecc==0){
+ g_ip->add_ecc_b_=false;
+ }
+ else
+ {
+ g_ip->add_ecc_b_=true;
+ }
+
+
+ if(!g_ip->error_checking())
+ exit(0);
+
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+
+ g_ip->display_ip();
+ solve(&fin_res);
+ output_UCA(&fin_res);
+ output_data_csv(fin_res);
+ delete (g_ip);
+
+ return fin_res;
+}
+
+
+
+bool InputParameter::error_checking()
+{
+ int A;
+ bool seq_access = false;
+ fast_access = true;
+
+ switch (access_mode)
+ {
+ case 0:
+ seq_access = false;
+ fast_access = false;
+ break;
+ case 1:
+ seq_access = true;
+ fast_access = false;
+ break;
+ case 2:
+ seq_access = false;
+ fast_access = true;
+ break;
+ }
+
+ if(is_main_mem)
+ {
+ if(ic_proj_type == 0)
+ {
+ cerr << "DRAM model supports only conservative interconnect projection!\n\n";
+ return false;
+ }
+ }
+
+
+ uint32_t B = line_sz;
+
+ if (B < 1)
+ {
+ cerr << "Block size must >= 1" << endl;
+ return false;
+ }
+ else if (B*8 < out_w)
+ {
+ cerr << "Block size must be at least " << out_w/8 << endl;
+ return false;
+ }
+
+ if (F_sz_um <= 0)
+ {
+ cerr << "Feature size must be > 0" << endl;
+ return false;
+ }
+ else if (F_sz_um > 0.091)
+ {
+ cerr << "Feature size must be <= 90 nm" << endl;
+ return false;
+ }
+
+
+ uint32_t RWP = num_rw_ports;
+ uint32_t ERP = num_rd_ports;
+ uint32_t EWP = num_wr_ports;
+ uint32_t NSER = num_se_rd_ports;
+ uint32_t SCHP = num_search_ports;
+
+//TODO: revisit this. This is an important feature. Sheng thought this should be used
+// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to
+// // the number of banks, we assume that the multiple ports are implemented via the multiple banks.
+// // In such a case we assume that each bank has 1 RWP port.
+// if ((RWP + ERP + EWP) <= nbanks && nbanks>1)
+// {
+// RWP = 1;
+// ERP = 0;
+// EWP = 0;
+// NSER = 0;
+// }
+// else if ((RWP < 0) || (EWP < 0) || (ERP < 0))
+// {
+// cerr << "Ports must >=0" << endl;
+// return false;
+// }
+// else if (RWP > 2)
+// {
+// cerr << "Maximum of 2 read/write ports" << endl;
+// return false;
+// }
+// else if ((RWP+ERP+EWP) < 1)
+ // Changed to new implementation:
+ // The number of ports specified at input is per bank
+ if ((RWP+ERP+EWP) < 1)
+ {
+ cerr << "Must have at least one port" << endl;
+ return false;
+ }
+
+ if (is_pow2(nbanks) == false)
+ {
+ cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl;
+ return false;
+ }
+
+ int C = cache_sz/nbanks;
+ if (C < 64)
+ {
+ cerr << "Cache size must >=64" << endl;
+ return false;
+ }
+
+//TODO: revisit this
+// if (pure_ram==true && assoc!=1)
+// {
+// cerr << "Pure RAM must have assoc as 1" << endl;
+// return false;
+// }
+
+ //fully assoc and cam check
+ if (is_cache && assoc==0)
+ fully_assoc =true;
+ else
+ fully_assoc = false;
+
+ if (pure_cam==true && assoc!=0)
+ {
+ cerr << "Pure CAM must have associativity as 0" << endl;
+ return false;
+ }
+
+ if (assoc==0 && (pure_cam==false && is_cache ==false))
+ {
+ cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl;
+ return false;
+ }
+
+ if ((fully_assoc==true || pure_cam==true)
+ && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type
+ || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type ))
+ {
+ cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl;
+ return false;
+ }
+
+ if ((fully_assoc==true || pure_cam==true)
+ && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram))
+ {
+ cerr << "DRAM based CAM and fully associative cache are not supported" << endl;
+ return false;
+ }
+
+ if ((fully_assoc==true || pure_cam==true)
+ && (is_main_mem==true))
+ {
+ cerr << "CAM and fully associative cache cannot be as main memory" << endl;
+ return false;
+ }
+
+ if ((fully_assoc || pure_cam) && SCHP<1)
+ {
+ cerr << "CAM and fully associative must have at least 1 search port" << endl;
+ return false;
+ }
+
+ if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam)))
+ {
+ ERP=SCHP;
+ }
+
+// if ((!(fully_assoc || pure_cam)) && SCHP>=1)
+// {
+// cerr << "None CAM and fully associative cannot have search ports" << endl;
+// return false;
+// }
+
+ if (assoc == 0)
+ {
+ A = C/B;
+ //fully_assoc = true;
+ }
+ else
+ {
+ if (assoc == 1)
+ {
+ A = 1;
+ //fully_assoc = false;
+ }
+ else
+ {
+ //fully_assoc = false;
+ A = assoc;
+ if (is_pow2(A) == false)
+ {
+ cerr << "Associativity must be a power of 2" << endl;
+ return false;
+ }
+ }
+ }
+
+ if (C/(B*A) <= 1 && assoc!=0)
+ {
+ cerr << "Number of sets is too small: " << endl;
+ cerr << " Need to either increase cache size, or decrease associativity or block size" << endl;
+ cerr << " (or use fully associative cache)" << endl;
+ return false;
+ }
+
+ block_sz = B;
+
+ /*dt: testing sequential access mode*/
+ if(seq_access)
+ {
+ tag_assoc = A;
+ data_assoc = 1;
+ is_seq_acc = true;
+ }
+ else
+ {
+ tag_assoc = A;
+ data_assoc = A;
+ is_seq_acc = false;
+ }
+
+ if (assoc==0)
+ {
+ data_assoc = 1;
+ }
+ num_rw_ports = RWP;
+ num_rd_ports = ERP;
+ num_wr_ports = EWP;
+ num_se_rd_ports = NSER;
+ if (!(fully_assoc || pure_cam))
+ num_search_ports = 0;
+ nsets = C/(B*A);
+
+ if (temp < 300 || temp > 400 || temp%10 != 0)
+ {
+ cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl;
+ return false;
+ }
+
+ if (nsets < 1)
+ {
+ cerr << "Less than one set..." << endl;
+ return false;
+ }
+
+ return true;
+}
+
+
+
+void output_data_csv(const uca_org_t & fin_res)
+{
+ //TODO: the csv output should remain
+ fstream file("out.csv", ios::in);
+ bool print_index = file.fail();
+ file.close();
+
+ file.open("out.csv", ios::out|ios::app);
+ if (file.fail() == true)
+ {
+ cerr << "File out.csv could not be opened successfully" << endl;
+ }
+ else
+ {
+ if (print_index == true)
+ {
+ file << "Tech node (nm), ";
+ file << "Capacity (bytes), ";
+ file << "Number of banks, ";
+ file << "Associativity, ";
+ file << "Output width (bits), ";
+ file << "Access time (ns), ";
+ file << "Random cycle time (ns), ";
+// file << "Multisubbank interleave cycle time (ns), ";
+
+// file << "Delay request network (ns), ";
+// file << "Delay inside mat (ns), ";
+// file << "Delay reply network (ns), ";
+// file << "Tag array access time (ns), ";
+// file << "Data array access time (ns), ";
+// file << "Refresh period (microsec), ";
+// file << "DRAM array availability (%), ";
+ file << "Dynamic search energy (nJ), ";
+ file << "Dynamic read energy (nJ), ";
+ file << "Dynamic write energy (nJ), ";
+// file << "Tag Dynamic read energy (nJ), ";
+// file << "Data Dynamic read energy (nJ), ";
+// file << "Dynamic read power (mW), ";
+ file << "Standby leakage per bank(mW), ";
+// file << "Leakage per bank with leak power management (mW), ";
+// file << "Leakage per bank with leak power management (mW), ";
+// file << "Refresh power as percentage of standby leakage, ";
+ file << "Area (mm2), ";
+ file << "Ndwl, ";
+ file << "Ndbl, ";
+ file << "Nspd, ";
+ file << "Ndcm, ";
+ file << "Ndsam_level_1, ";
+ file << "Ndsam_level_2, ";
+ file << "Data arrary area efficiency %, ";
+ file << "Ntwl, ";
+ file << "Ntbl, ";
+ file << "Ntspd, ";
+ file << "Ntcm, ";
+ file << "Ntsam_level_1, ";
+ file << "Ntsam_level_2, ";
+ file << "Tag arrary area efficiency %, ";
+
+// file << "Resistance per unit micron (ohm-micron), ";
+// file << "Capacitance per unit micron (fF per micron), ";
+// file << "Unit-length wire delay (ps), ";
+// file << "FO4 delay (ps), ";
+// file << "delay route to bank (including crossb delay) (ps), ";
+// file << "Crossbar delay (ps), ";
+// file << "Dyn read energy per access from closed page (nJ), ";
+// file << "Dyn read energy per access from open page (nJ), ";
+// file << "Leak power of an subbank with page closed (mW), ";
+// file << "Leak power of a subbank with page open (mW), ";
+// file << "Leak power of request and reply networks (mW), ";
+// file << "Number of subbanks, ";
+// file << "Page size in bits, ";
+// file << "Activate power, ";
+// file << "Read power, ";
+// file << "Write power, ";
+// file << "Precharge power, ";
+// file << "tRCD, ";
+// file << "CAS latency, ";
+// file << "Precharge delay, ";
+// file << "Perc dyn energy bitlines, ";
+// file << "perc dyn energy wordlines, ";
+// file << "perc dyn energy outside mat, ";
+// file << "Area opt (perc), ";
+// file << "Delay opt (perc), ";
+// file << "Repeater opt (perc), ";
+// file << "Aspect ratio";
+ file << endl;
+ }
+ file << g_ip->F_sz_nm << ", ";
+ file << g_ip->cache_sz << ", ";
+ file << g_ip->nbanks << ", ";
+ file << g_ip->tag_assoc << ", ";
+ file << g_ip->out_w << ", ";
+ file << fin_res.access_time*1e+9 << ", ";
+ file << fin_res.cycle_time*1e+9 << ", ";
+// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", ";
+// file << fin_res.data_array2->delay_request_network*1e+9 << ", ";
+// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", ";
+// file << fin_res.data_array2.delay_reply_network*1e+9 << ", ";
+
+// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
+// {
+// file << fin_res.tag_array2->access_time*1e+9 << ", ";
+// }
+// else
+// {
+// file << 0 << ", ";
+// }
+// file << fin_res.data_array2->access_time*1e+9 << ", ";
+// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", ";
+// file << fin_res.data_array2->dram_array_availability << ", ";
+ if (g_ip->fully_assoc || g_ip->pure_cam)
+ {
+ file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
+ }
+ else
+ {
+ file << "N/A" << ", ";
+ }
+ file << fin_res.power.readOp.dynamic*1e+9 << ", ";
+ file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
+// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
+// {
+// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", ";
+// }
+// else
+// {
+// file << "NA" << ", ";
+// }
+// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", ";
+// if (g_ip->fully_assoc || g_ip->pure_cam)
+// {
+// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", ";
+// }
+// else
+// {
+// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", ";
+// }
+
+ file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", ";
+// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", ";
+// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", ";
+ file << fin_res.area*1e-6 << ", ";
+
+ file << fin_res.data_array2->Ndwl << ", ";
+ file << fin_res.data_array2->Ndbl << ", ";
+ file << fin_res.data_array2->Nspd << ", ";
+ file << fin_res.data_array2->deg_bl_muxing << ", ";
+ file << fin_res.data_array2->Ndsam_lev_1 << ", ";
+ file << fin_res.data_array2->Ndsam_lev_2 << ", ";
+ file << fin_res.data_array2->area_efficiency << ", ";
+ if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
+ {
+ file << fin_res.tag_array2->Ndwl << ", ";
+ file << fin_res.tag_array2->Ndbl << ", ";
+ file << fin_res.tag_array2->Nspd << ", ";
+ file << fin_res.tag_array2->deg_bl_muxing << ", ";
+ file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
+ file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
+ file << fin_res.tag_array2->area_efficiency << ", ";
+ }
+ else
+ {
+ file << "N/A" << ", ";
+ file << "N/A"<< ", ";
+ file << "N/A" << ", ";
+ file << "N/A" << ", ";
+ file << "N/A" << ", ";
+ file << "N/A" << ", ";
+ file << "N/A" << ", ";
+ }
+
+// file << g_tp.wire_inside_mat.R_per_um << ", ";
+// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", ";
+// file << g_tp.unit_len_wire_del / 1e-12 << ", ";
+// file << g_tp.FO4 / 1e-12 << ", ";
+// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", ";
+// file << fin_res.data_array.delay_crossbar / 1e-9 << ", ";
+// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", ";
+// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", ";
+// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", ";
+// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", ";
+// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", ";
+// file << fin_res.data_array.number_subbanks << ", " ;
+// file << fin_res.data_array.page_size_in_bits << ", " ;
+// file << fin_res.data_array.activate_energy * 1e9 << ", " ;
+// file << fin_res.data_array.read_energy * 1e9 << ", " ;
+// file << fin_res.data_array.write_energy * 1e9 << ", " ;
+// file << fin_res.data_array.precharge_energy * 1e9 << ", " ;
+// file << fin_res.data_array.trcd * 1e9 << ", " ;
+// file << fin_res.data_array.cas_latency * 1e9 << ", " ;
+// file << fin_res.data_array.precharge_delay * 1e9 << ", " ;
+// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width;
+ file<<endl;
+ }
+ file.close();
+}
+
+
+
+void output_UCA(uca_org_t *fr)
+{
+ // if (NUCA)
+ if (0) {
+ cout << "\n\n Detailed Bank Stats:\n";
+ cout << " Bank Size (bytes): %d\n" <<
+ (int) (g_ip->cache_sz);
+ }
+ else {
+ if (g_ip->data_arr_ram_cell_tech_type == 3) {
+ cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
+ "Logic Process Based DRAM Model ----------\n";
+ }
+ else if (g_ip->data_arr_ram_cell_tech_type == 4) {
+ cout << "\n---------- CACTI version 6.5, Uniform" <<
+ "Cache Access Commodity DRAM Model ----------\n";
+ }
+ else {
+ cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
+ "SRAM Model ----------\n";
+ }
+ cout << "\nCache Parameters:\n";
+ cout << " Total cache size (bytes): " <<
+ (int) (g_ip->cache_sz) << endl;
+ }
+
+ cout << " Number of banks: " << (int) g_ip->nbanks << endl;
+ if (g_ip->fully_assoc|| g_ip->pure_cam)
+ cout << " Associativity: fully associative\n";
+ else {
+ if (g_ip->tag_assoc == 1)
+ cout << " Associativity: direct mapped\n";
+ else
+ cout << " Associativity: " <<
+ g_ip->tag_assoc << endl;
+ }
+
+
+ cout << " Block size (bytes): " << g_ip->line_sz << endl;
+ cout << " Read/write Ports: " <<
+ g_ip->num_rw_ports << endl;
+ cout << " Read ports: " <<
+ g_ip->num_rd_ports << endl;
+ cout << " Write ports: " <<
+ g_ip->num_wr_ports << endl;
+ if (g_ip->fully_assoc|| g_ip->pure_cam)
+ cout << " search ports: " <<
+ g_ip->num_search_ports << endl;
+ cout << " Technology size (nm): " <<
+ g_ip->F_sz_nm << endl << endl;
+
+ cout << " Access time (ns): " << fr->access_time*1e9 << endl;
+ cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl;
+ if (g_ip->data_arr_ram_cell_tech_type >= 4) {
+ cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
+ cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
+ cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
+ cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
+ cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
+ cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
+ cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
+ cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
+ cout << " Refresh power (mW): " <<
+ fr->data_array2->refresh_power*1e3 << endl;
+ }
+ else {
+ if ((g_ip->fully_assoc|| g_ip->pure_cam))
+ {
+ cout << " Total dynamic associative search energy per access (nJ): " <<
+ fr->power.searchOp.dynamic*1e9 << endl;
+// cout << " Total dynamic read energy per access (nJ): " <<
+// fr->power.readOp.dynamic*1e9 << endl;
+// cout << " Total dynamic write energy per access (nJ): " <<
+// fr->power.writeOp.dynamic*1e9 << endl;
+ }
+// else
+// {
+ cout << " Total dynamic read energy per access (nJ): " <<
+ fr->power.readOp.dynamic*1e9 << endl;
+ cout << " Total dynamic write energy per access (nJ): " <<
+ fr->power.writeOp.dynamic*1e9 << endl;
+// }
+ cout << " Total leakage power of a bank"
+ " (mW): " << fr->power.readOp.leakage*1e3 << endl;
+ cout << " Total gate leakage power of a bank"
+ " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
+ }
+
+ if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4)
+ {
+ }
+ cout << " Cache height x width (mm): " <<
+ fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
+
+
+ cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl;
+ cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
+ cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
+ cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
+ cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
+ cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
+
+ if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
+ {
+ cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
+ cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
+ cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
+ cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
+ cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
+ cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
+ }
+
+ switch (fr->data_array2->wt) {
+ case (0):
+ cout << " Data array, H-tree wire type: Delay optimized global wires\n";
+ break;
+ case (1):
+ cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
+ break;
+ case (2):
+ cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
+ break;
+ case (3):
+ cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
+ break;
+ case (4):
+ cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
+ break;
+ case (5):
+ cout << " Data array, wire type: Low swing wires\n";
+ break;
+ default:
+ cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <<endl;
+ exit(0);
+ }
+
+ if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) {
+ switch (fr->tag_array2->wt) {
+ case (0):
+ cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
+ break;
+ case (1):
+ cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
+ break;
+ case (2):
+ cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
+ break;
+ case (3):
+ cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
+ break;
+ case (4):
+ cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
+ break;
+ case (5):
+ cout << " Tag array, wire type: Low swing wires\n";
+ break;
+ default:
+ cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <<endl;
+ exit(-1);
+ }
+ }
+
+ if (g_ip->print_detail)
+ {
+ //if(g_ip->fully_assoc) return;
+
+ /* Delay stats */
+ /* data array stats */
+ cout << endl << "Time Components:" << endl << endl;
+
+ cout << " Data side (with Output driver) (ns): " <<
+ fr->data_array2->access_time/1e-9 << endl;
+
+ cout << "\tH-tree input delay (ns): " <<
+ fr->data_array2->delay_route_to_bank * 1e9 +
+ fr->data_array2->delay_input_htree * 1e9 << endl;
+
+ if (!(g_ip->pure_cam || g_ip->fully_assoc))
+ {
+ cout << "\tDecoder + wordline delay (ns): " <<
+ fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
+ fr->data_array2->delay_row_decoder * 1e9 << endl;
+ }
+ else
+ {
+ cout << "\tCAM search delay (ns): " <<
+ fr->data_array2->delay_matchlines * 1e9 << endl;
+ }
+
+ cout << "\tBitline delay (ns): " <<
+ fr->data_array2->delay_bitlines/1e-9 << endl;
+
+ cout << "\tSense Amplifier delay (ns): " <<
+ fr->data_array2->delay_sense_amp * 1e9 << endl;
+
+
+ cout << "\tH-tree output delay (ns): " <<
+ fr->data_array2->delay_subarray_output_driver * 1e9 +
+ fr->data_array2->delay_dout_htree * 1e9 << endl;
+
+ if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
+ {
+ /* tag array stats */
+ cout << endl << " Tag side (with Output driver) (ns): " <<
+ fr->tag_array2->access_time/1e-9 << endl;
+
+ cout << "\tH-tree input delay (ns): " <<
+ fr->tag_array2->delay_route_to_bank * 1e9 +
+ fr->tag_array2->delay_input_htree * 1e9 << endl;
+
+ cout << "\tDecoder + wordline delay (ns): " <<
+ fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
+ fr->tag_array2->delay_row_decoder * 1e9 << endl;
+
+ cout << "\tBitline delay (ns): " <<
+ fr->tag_array2->delay_bitlines/1e-9 << endl;
+
+ cout << "\tSense Amplifier delay (ns): " <<
+ fr->tag_array2->delay_sense_amp * 1e9 << endl;
+
+ cout << "\tComparator delay (ns): " <<
+ fr->tag_array2->delay_comparator * 1e9 << endl;
+
+ cout << "\tH-tree output delay (ns): " <<
+ fr->tag_array2->delay_subarray_output_driver * 1e9 +
+ fr->tag_array2->delay_dout_htree * 1e9 << endl;
+ }
+
+
+
+ /* Energy/Power stats */
+ cout << endl << endl << "Power Components:" << endl << endl;
+
+ if (!(g_ip->pure_cam || g_ip->fully_assoc))
+ {
+ cout << " Data array: Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal leakage read/write power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
+
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+
+ cout << "\tTotal leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.leakage +
+ fr->data_array2->power_data_output_htree.readOp.leakage +
+ fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
+
+ cout << "\tTotal gate leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.gate_leakage +
+ fr->data_array2->power_data_output_htree.readOp.gate_leakage +
+ fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
+
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+
+ cout << "\tBitlines precharge and equalization circuit (nJ): " <<
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+ }
+
+ else if (g_ip->pure_cam)
+ {
+
+ cout << " CAM array:"<<endl;
+ cout << " Total dynamic associative search energy/access (nJ): " <<
+ fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "match key and data transfer) (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic +
+ fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tSearchlines (nJ): " <<
+ fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tMatchlines (nJ): " <<
+ fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
+
+
+ cout <<endl<< " Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+
+ cout << endl <<" Total leakage power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ }
+ else
+ {
+ cout << " Fully associative array:"<<endl;
+ cout << " Total dynamic associative search energy/access (nJ): " <<
+ fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "match key and data transfer) (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic +
+ fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tSearchlines (nJ): " <<
+ fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tMatchlines (nJ): " <<
+ fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tData portion wordline (nJ): " <<
+ fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
+ cout << "\tData Bitlines (nJ): " <<
+ fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
+
+
+ cout <<endl<< " Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+
+ cout << endl <<" Total leakage power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ }
+
+
+ if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
+ {
+ cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
+ fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal leakage read/write power of a bank (mW): " <<
+ fr->tag_array2->power.readOp.leakage * 1e3 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->tag_array2->power_addr_input_htree.readOp.dynamic +
+ fr->tag_array2->power_data_output_htree.readOp.dynamic +
+ fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+
+ cout << "\tTotal leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
+ (fr->tag_array2->power_addr_input_htree.readOp.leakage +
+ fr->tag_array2->power_data_output_htree.readOp.leakage +
+ fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
+
+ cout << "\tTotal gate leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
+ (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage +
+ fr->tag_array2->power_data_output_htree.readOp.gate_leakage +
+ fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
+
+ cout << "\tOutput Htree inside a bank Energy (nJ): " <<
+ fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines precharge and equalization circuit (nJ): " <<
+ fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+ }
+
+ cout << endl << endl << "Area Components:" << endl << endl;
+ /* Data array area stats */
+ if (!(g_ip->pure_cam || g_ip->fully_assoc))
+ cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ else if (g_ip->pure_cam)
+ cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ else
+ cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ cout << "\tHeight (mm): " <<
+ fr->data_array2->all_banks_height*1e-3 << endl;
+ cout << "\tWidth (mm): " <<
+ fr->data_array2->all_banks_width*1e-3 << endl;
+ if (g_ip->print_detail) {
+ cout << "\tArea efficiency (Memory cell area/Total area) - " <<
+ fr->data_array2->area_efficiency << " %" << endl;
+ cout << "\t\tMAT Height (mm): " <<
+ fr->data_array2->mat_height*1e-3 << endl;
+ cout << "\t\tMAT Length (mm): " <<
+ fr->data_array2->mat_length*1e-3 << endl;
+ cout << "\t\tSubarray Height (mm): " <<
+ fr->data_array2->subarray_height*1e-3 << endl;
+ cout << "\t\tSubarray Length (mm): " <<
+ fr->data_array2->subarray_length*1e-3 << endl;
+ }
+
+ /* Tag array area stats */
+ if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
+ {
+ cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
+ cout << "\tHeight (mm): " <<
+ fr->tag_array2->all_banks_height*1e-3 << endl;
+ cout << "\tWidth (mm): " <<
+ fr->tag_array2->all_banks_width*1e-3 << endl;
+ if (g_ip->print_detail)
+ {
+ cout << "\tArea efficiency (Memory cell area/Total area) - " <<
+ fr->tag_array2->area_efficiency << " %" << endl;
+ cout << "\t\tMAT Height (mm): " <<
+ fr->tag_array2->mat_height*1e-3 << endl;
+ cout << "\t\tMAT Length (mm): " <<
+ fr->tag_array2->mat_length*1e-3 << endl;
+ cout << "\t\tSubarray Height (mm): " <<
+ fr->tag_array2->subarray_height*1e-3 << endl;
+ cout << "\t\tSubarray Length (mm): " <<
+ fr->tag_array2->subarray_length*1e-3 << endl;
+ }
+ }
+ Wire wpr;
+ wpr.print_wire();
+
+ //cout << "FO4 = " << g_tp.FO4 << endl;
+ }
+}
+
+//McPAT's plain interface, please keep !!!
+uca_org_t cacti_interface(InputParameter * const local_interface)
+{
+// g_ip = new InputParameter();
+ //g_ip->add_ecc_b_ = true;
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ g_ip = local_interface;
+
+
+// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+//
+// g_ip->ic_proj_type = interconnect_projection_type_in;
+// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+// g_ip->burst_len = BURST_LENGTH_in;
+// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
+// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
+//
+// g_ip->cache_sz = cache_size;
+// g_ip->line_sz = line_size;
+// g_ip->assoc = associativity;
+// g_ip->nbanks = banks;
+// g_ip->out_w = output_width;
+// g_ip->specific_tag = specific_tag;
+// if (tag_width == 0) {
+// g_ip->tag_w = 42;
+// }
+// else {
+// g_ip->tag_w = tag_width;
+// }
+//
+// g_ip->access_mode = access_mode;
+// g_ip->delay_wt = obj_func_delay;
+// g_ip->dynamic_power_wt = obj_func_dynamic_power;
+// g_ip->leakage_power_wt = obj_func_leakage_power;
+// g_ip->area_wt = obj_func_area;
+// g_ip->cycle_time_wt = obj_func_cycle_time;
+// g_ip->delay_dev = dev_func_delay;
+// g_ip->dynamic_power_dev = dev_func_dynamic_power;
+// g_ip->leakage_power_dev = dev_func_leakage_power;
+// g_ip->area_dev = dev_func_area;
+// g_ip->cycle_time_dev = dev_func_cycle_time;
+// g_ip->temp = temp;
+//
+// g_ip->F_sz_nm = tech_node;
+// g_ip->F_sz_um = tech_node / 1000;
+// g_ip->is_main_mem = (main_mem != 0) ? true : false;
+// g_ip->is_cache = (cache ==1) ? true : false;
+// g_ip->pure_ram = (cache ==0) ? true : false;
+// g_ip->pure_cam = (cache ==2) ? true : false;
+// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
+// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
+//
+// g_ip->num_rw_ports = rw_ports;
+// g_ip->num_rd_ports = excl_read_ports;
+// g_ip->num_wr_ports = excl_write_ports;
+// g_ip->num_se_rd_ports = single_ended_read_ports;
+// g_ip->num_search_ports = search_ports;
+//
+// g_ip->print_detail = 1;
+// g_ip->nuca = 0;
+// g_ip->is_cache=true;
+//
+// if (force_wiretype == 0)
+// {
+// g_ip->wt = Global;
+// g_ip->force_wiretype = false;
+// }
+// else
+// { g_ip->force_wiretype = true;
+// if (wiretype==10) {
+// g_ip->wt = Global_10;
+// }
+// if (wiretype==20) {
+// g_ip->wt = Global_20;
+// }
+// if (wiretype==30) {
+// g_ip->wt = Global_30;
+// }
+// if (wiretype==5) {
+// g_ip->wt = Global_5;
+// }
+// if (wiretype==0) {
+// g_ip->wt = Low_swing;
+// }
+// }
+// //g_ip->wt = Global_5;
+// if (force_config == 0)
+// {
+// g_ip->force_cache_config = false;
+// }
+// else
+// {
+// g_ip->force_cache_config = true;
+// g_ip->ndbl=ndbl;
+// g_ip->ndwl=ndwl;
+// g_ip->nspd=nspd;
+// g_ip->ndcm=ndcm;
+// g_ip->ndsam1=ndsam1;
+// g_ip->ndsam2=ndsam2;
+//
+//
+// }
+//
+// if (ecc==0){
+// g_ip->add_ecc_b_=false;
+// }
+// else
+// {
+// g_ip->add_ecc_b_=true;
+// }
+
+
+ g_ip->error_checking();
+
+
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+
+ solve(&fin_res);
+
+// g_ip->display_ip();
+// output_UCA(&fin_res);
+// output_data_csv(fin_res);
+
+ // delete (g_ip);
+
+ return fin_res;
+}
+
+//McPAT's plain interface, please keep !!!
+uca_org_t init_interface(InputParameter* const local_interface)
+{
+ // g_ip = new InputParameter();
+ //g_ip->add_ecc_b_ = true;
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ g_ip = local_interface;
+
+
+// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+//
+// g_ip->ic_proj_type = interconnect_projection_type_in;
+// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+// g_ip->burst_len = BURST_LENGTH_in;
+// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
+// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
+//
+// g_ip->cache_sz = cache_size;
+// g_ip->line_sz = line_size;
+// g_ip->assoc = associativity;
+// g_ip->nbanks = banks;
+// g_ip->out_w = output_width;
+// g_ip->specific_tag = specific_tag;
+// if (tag_width == 0) {
+// g_ip->tag_w = 42;
+// }
+// else {
+// g_ip->tag_w = tag_width;
+// }
+//
+// g_ip->access_mode = access_mode;
+// g_ip->delay_wt = obj_func_delay;
+// g_ip->dynamic_power_wt = obj_func_dynamic_power;
+// g_ip->leakage_power_wt = obj_func_leakage_power;
+// g_ip->area_wt = obj_func_area;
+// g_ip->cycle_time_wt = obj_func_cycle_time;
+// g_ip->delay_dev = dev_func_delay;
+// g_ip->dynamic_power_dev = dev_func_dynamic_power;
+// g_ip->leakage_power_dev = dev_func_leakage_power;
+// g_ip->area_dev = dev_func_area;
+// g_ip->cycle_time_dev = dev_func_cycle_time;
+// g_ip->temp = temp;
+//
+// g_ip->F_sz_nm = tech_node;
+// g_ip->F_sz_um = tech_node / 1000;
+// g_ip->is_main_mem = (main_mem != 0) ? true : false;
+// g_ip->is_cache = (cache ==1) ? true : false;
+// g_ip->pure_ram = (cache ==0) ? true : false;
+// g_ip->pure_cam = (cache ==2) ? true : false;
+// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
+// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
+//
+// g_ip->num_rw_ports = rw_ports;
+// g_ip->num_rd_ports = excl_read_ports;
+// g_ip->num_wr_ports = excl_write_ports;
+// g_ip->num_se_rd_ports = single_ended_read_ports;
+// g_ip->num_search_ports = search_ports;
+//
+// g_ip->print_detail = 1;
+// g_ip->nuca = 0;
+//
+// if (force_wiretype == 0)
+// {
+// g_ip->wt = Global;
+// g_ip->force_wiretype = false;
+// }
+// else
+// { g_ip->force_wiretype = true;
+// if (wiretype==10) {
+// g_ip->wt = Global_10;
+// }
+// if (wiretype==20) {
+// g_ip->wt = Global_20;
+// }
+// if (wiretype==30) {
+// g_ip->wt = Global_30;
+// }
+// if (wiretype==5) {
+// g_ip->wt = Global_5;
+// }
+// if (wiretype==0) {
+// g_ip->wt = Low_swing;
+// }
+// }
+// //g_ip->wt = Global_5;
+// if (force_config == 0)
+// {
+// g_ip->force_cache_config = false;
+// }
+// else
+// {
+// g_ip->force_cache_config = true;
+// g_ip->ndbl=ndbl;
+// g_ip->ndwl=ndwl;
+// g_ip->nspd=nspd;
+// g_ip->ndcm=ndcm;
+// g_ip->ndsam1=ndsam1;
+// g_ip->ndsam2=ndsam2;
+//
+//
+// }
+//
+// if (ecc==0){
+// g_ip->add_ecc_b_=false;
+// }
+// else
+// {
+// g_ip->add_ecc_b_=true;
+// }
+
+
+ g_ip->error_checking();
+
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+ //solve(&fin_res);
+ //g_ip->display_ip();
+
+ //solve(&fin_res);
+ //output_UCA(&fin_res);
+ //output_data_csv(fin_res);
+ // delete (g_ip);
+
+ return fin_res;
+}
+
+void reconfigure(InputParameter *local_interface, uca_org_t *fin_res)
+{
+ // Copy the InputParameter to global interface (g_ip) and do error checking.
+ g_ip = local_interface;
+ g_ip->error_checking();
+
+ // Initialize technology parameters
+ init_tech_params(g_ip->F_sz_um,false);
+
+ Wire winit; // Do not delete this line. It initializes wires.
+
+ // This corresponds to solve() in the initialization process.
+ update(fin_res);
+}
diff --git a/ext/mcpat/cacti/io.h b/ext/mcpat/cacti/io.h
new file mode 100644
index 000000000..b1c2565e0
--- /dev/null
+++ b/ext/mcpat/cacti/io.h
@@ -0,0 +1,44 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __IO_H__
+#define __IO_H__
+
+
+#include "cacti_interface.h"
+#include "const.h"
+
+void output_data_csv(const uca_org_t & fin_res);
+void output_UCA(uca_org_t * fin_res);
+
+
+#endif
diff --git a/ext/mcpat/cacti/main.cc b/ext/mcpat/cacti/main.cc
new file mode 100644
index 000000000..d6e12be62
--- /dev/null
+++ b/ext/mcpat/cacti/main.cc
@@ -0,0 +1,191 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include <iostream>
+
+#include "io.h"
+
+using namespace std;
+
+
+int main(int argc,char *argv[])
+{
+
+ uca_org_t result;
+ if (argc != 53 && argc != 55)
+ {
+ bool infile_specified = false;
+ string infile_name("");
+
+ for (int32_t i = 0; i < argc; i++)
+ {
+ if (argv[i] == string("-infile"))
+ {
+ infile_specified = true;
+ i++;
+ infile_name = argv[i];
+ }
+ }
+
+ if (infile_specified == false)
+ {
+ cerr << " Invalid arguments -- how to use CACTI:" << endl;
+ cerr << " 1) cacti -infile <input file name>" << endl;
+ cerr << " 2) cacti arg1 ... arg52 -- please refer to the README file" << endl;
+ cerr << " No. of arguments input - " << argc << endl;
+ exit(1);
+ }
+ else
+ {
+ result = cacti_interface(infile_name);
+ }
+ }
+ else if (argc == 53)
+ {
+ result = cacti_interface(atoi(argv[ 1]),
+ atoi(argv[ 2]),
+ atoi(argv[ 3]),
+ atoi(argv[ 4]),
+ atoi(argv[ 5]),
+ atoi(argv[ 6]),
+ atoi(argv[ 7]),
+ atoi(argv[ 8]),
+ atoi(argv[ 9]),
+ atof(argv[10]),
+ atoi(argv[11]),
+ atoi(argv[12]),
+ atoi(argv[13]),
+ atoi(argv[14]),
+ atoi(argv[15]),
+ atoi(argv[16]),
+ atoi(argv[17]),
+ atoi(argv[18]),
+ atoi(argv[19]),
+ atoi(argv[20]),
+ atoi(argv[21]),
+ atoi(argv[22]),
+ atoi(argv[23]),
+ atoi(argv[24]),
+ atoi(argv[25]),
+ atoi(argv[26]),
+ atoi(argv[27]),
+ atoi(argv[28]),
+ atoi(argv[29]),
+ atoi(argv[30]),
+ atoi(argv[31]),
+ atoi(argv[32]),
+ atoi(argv[33]),
+ atoi(argv[34]),
+ atoi(argv[35]),
+ atoi(argv[36]),
+ atoi(argv[37]),
+ atoi(argv[38]),
+ atoi(argv[39]),
+ atoi(argv[40]),
+ atoi(argv[41]),
+ atoi(argv[42]),
+ atoi(argv[43]),
+ atoi(argv[44]),
+ atoi(argv[45]),
+ atoi(argv[46]),
+ atoi(argv[47]),
+ atoi(argv[48]),
+ atoi(argv[49]),
+ atoi(argv[50]),
+ atoi(argv[51]),
+ atoi(argv[52]));
+ }
+ else
+ {
+ result = cacti_interface(atoi(argv[ 1]),
+ atoi(argv[ 2]),
+ atoi(argv[ 3]),
+ atoi(argv[ 4]),
+ atoi(argv[ 5]),
+ atoi(argv[ 6]),
+ atoi(argv[ 7]),
+ atoi(argv[ 8]),
+ atof(argv[ 9]),
+ atoi(argv[10]),
+ atoi(argv[11]),
+ atoi(argv[12]),
+ atoi(argv[13]),
+ atoi(argv[14]),
+ atoi(argv[15]),
+ atoi(argv[16]),
+ atoi(argv[17]),
+ atoi(argv[18]),
+ atoi(argv[19]),
+ atoi(argv[20]),
+ atoi(argv[21]),
+ atoi(argv[22]),
+ atoi(argv[23]),
+ atoi(argv[24]),
+ atoi(argv[25]),
+ atoi(argv[26]),
+ atoi(argv[27]),
+ atoi(argv[28]),
+ atoi(argv[29]),
+ atoi(argv[30]),
+ atoi(argv[31]),
+ atoi(argv[32]),
+ atoi(argv[33]),
+ atoi(argv[34]),
+ atoi(argv[35]),
+ atoi(argv[36]),
+ atoi(argv[37]),
+ atoi(argv[38]),
+ atoi(argv[39]),
+ atoi(argv[40]),
+ atoi(argv[41]),
+ atoi(argv[42]),
+ atoi(argv[43]),
+ atoi(argv[44]),
+ atoi(argv[45]),
+ atoi(argv[46]),
+ atoi(argv[47]),
+ atoi(argv[48]),
+ atoi(argv[49]),
+ atoi(argv[50]),
+ atoi(argv[51]),
+ atoi(argv[52]),
+ atoi(argv[53]),
+ atoi(argv[54]));
+ }
+
+ result.cleanup();
+// delete result.data_array2;
+// if (result.tag_array2!=NULL)
+// delete result.tag_array2;
+
+ return 0;
+}
+
diff --git a/ext/mcpat/cacti/makefile b/ext/mcpat/cacti/makefile
new file mode 100644
index 000000000..27286916a
--- /dev/null
+++ b/ext/mcpat/cacti/makefile
@@ -0,0 +1,28 @@
+TAR = cacti
+
+.PHONY: dbg opt depend clean clean_dbg clean_opt
+
+all: opt
+
+dbg: $(TAR).mk obj_dbg
+ @$(MAKE) TAG=dbg -C . -f $(TAR).mk
+
+opt: $(TAR).mk obj_opt
+ @$(MAKE) TAG=opt -C . -f $(TAR).mk
+
+obj_dbg:
+ mkdir $@
+
+obj_opt:
+ mkdir $@
+
+clean: clean_dbg clean_opt
+
+clean_dbg: obj_dbg
+ @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean
+ rm -rf $<
+
+clean_opt: obj_opt
+ @$(MAKE) TAG=opt -C . -f $(TAR).mk clean
+ rm -rf $<
+
diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc
new file mode 100755
index 000000000..ef98107c7
--- /dev/null
+++ b/ext/mcpat/cacti/mat.cc
@@ -0,0 +1,1748 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <cassert>
+
+#include "mat.h"
+
+Mat::Mat(const DynamicParameter & dyn_p)
+ :dp(dyn_p),
+ power_subarray_out_drv(),
+ delay_fa_tag(0), delay_cam(0),
+ delay_before_decoder(0), delay_bitline(0),
+ delay_wl_reset(0), delay_bl_restore(0),
+ delay_searchline(0), delay_matchchline(0),
+ delay_cam_sl_restore(0), delay_cam_ml_reset(0),
+ delay_fa_ram_wl(0),delay_hit_miss_reset(0),
+ delay_hit_miss(0),
+ subarray(dp, dp.fully_assoc),
+ power_bitline(), per_bitline_read_energy(0),
+ deg_bl_muxing(dp.deg_bl_muxing),
+ num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
+ delay_writeback(0),
+ cell(subarray.cell), cam_cell(subarray.cam_cell),
+ is_dram(dyn_p.is_dram),
+ pure_cam(dyn_p.pure_cam),
+ num_mats(dp.num_mats),
+ power_sa(), delay_sa(0),
+ leak_power_sense_amps_closed_page_state(0),
+ leak_power_sense_amps_open_page_state(0),
+ delay_subarray_out_drv(0),
+ delay_comparator(0), power_comparator(),
+ num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
+ num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
+ num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
+{
+ assert(num_subarrays_per_mat <= 4);
+ assert(num_subarrays_per_row <= 2);
+ is_fa = (dp.fully_assoc) ? true : false;
+ camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
+
+ if (is_fa || pure_cam)
+ num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
+
+ if (dp.use_inp_params == 1) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ }
+ else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+
+ }
+
+ double number_sa_subarray;
+
+ if (!is_fa && !pure_cam)
+ {
+ number_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ }
+ else if (is_fa && !pure_cam)
+ {
+ number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
+ }
+
+ else
+ {
+ number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
+ }
+
+ int num_dec_signals = subarray.num_rows;
+ double C_ld_bit_mux_dec_out = 0;
+ double C_ld_sa_mux_lev_1_dec_out = 0;
+ double C_ld_sa_mux_lev_2_dec_out = 0;
+ double R_wire_wl_drv_out;
+
+ if (!is_fa && !pure_cam)
+ {
+ R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ }
+ else if (is_fa && !pure_cam)
+ {
+ R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+ }
+ else
+ {
+ R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
+ }
+
+ double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
+ double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
+
+ if (deg_bl_muxing > 1)
+ {
+ C_ld_bit_mux_dec_out =
+ (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
+ num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
+ }
+
+ if (dp.Ndsam_lev_1 > 1)
+ {
+ C_ld_sa_mux_lev_1_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
+ }
+ if (dp.Ndsam_lev_2 > 1)
+ {
+ C_ld_sa_mux_lev_2_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
+ }
+
+ if (num_subarrays_per_row >= 2)
+ {
+ // wire heads for both right and left side of a mat, so half the resistance
+ R_wire_bit_mux_dec_out /= 2.0;
+ R_wire_sa_mux_dec_out /= 2.0;
+ }
+
+
+ row_dec = new Decoder(
+ num_dec_signals,
+ false,
+ subarray.C_wl,
+ R_wire_wl_drv_out,
+ false/*is_fa*/,
+ is_dram,
+ true,
+ camFlag? cam_cell:cell);
+// if (is_fa && (!dp.is_tag))
+// {
+// row_dec->exist = true;
+// }
+ bit_mux_dec = new Decoder(
+ deg_bl_muxing,// This number is 1 for FA or CAM
+ false,
+ C_ld_bit_mux_dec_out,
+ R_wire_bit_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag? cam_cell:cell);
+ sa_mux_lev_1_dec = new Decoder(
+ dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
+ dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
+ C_ld_sa_mux_lev_1_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag? cam_cell:cell);
+ sa_mux_lev_2_dec = new Decoder(
+ dp.Ndsam_lev_2, // This number is 1 for FA or CAM
+ false,
+ C_ld_sa_mux_lev_2_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag? cam_cell:cell);
+
+ double C_wire_predec_blk_out;
+ double R_wire_predec_blk_out;
+
+ if (!is_fa && !pure_cam)
+ {
+
+ C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
+ R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
+
+ }
+ else //for pre-decode block's load is same for both FA and CAM
+ {
+ C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
+ R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
+ }
+
+
+ if (is_fa||pure_cam)
+ num_dec_signals += _log2(num_subarrays_per_mat);
+
+ PredecBlk * r_predec_blk1 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ true);
+ PredecBlk * r_predec_blk2 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ false);
+ PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
+ dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
+ dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
+
+ PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
+ PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
+ way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
+ dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
+
+ r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
+ b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
+ sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
+ sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
+
+ subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+
+ if (is_fa || pure_cam)
+
+ { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
+ driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
+ cam_bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ if (!pure_cam)
+ {
+ //This is only used for fully asso not pure CAM
+ driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ }
+
+ else
+ {
+ driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
+ double w_row_decoder = area_row_decoder / subarray.area.get_h();
+
+ double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
+ compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
+
+ double h_subarray_out_drv = subarray_out_wire->area.get_area() *
+ (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
+
+
+ h_subarray_out_drv *= (RWP + ERP + SCHP);
+
+ double h_comparators = 0.0;
+ double w_row_predecode_output_wires = 0.0;
+ double h_bit_mux_dec_out_wires = 0.0;
+ double h_senseamp_mux_dec_out_wires = 0.0;
+
+ if ((!is_fa)&&(dp.is_tag))
+ {
+ //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
+ h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
+ h_comparators *= (RWP + ERP);
+ }
+
+
+ int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
+ int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
+ w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+
+ double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
+ (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
+ h_subarray_out_drv + h_comparators);
+
+ double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
+
+ if (deg_bl_muxing > 1)
+ {
+ h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_1 > 1)
+ {
+ h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_2 > 1)
+ {
+ h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+
+ double h_addr_datain_wires;
+ if (!g_ip->ver_htree_wires_over_array)
+ {
+ h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
+ (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+ if (is_fa || pure_cam)
+ {
+ h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
+ (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
+ (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
+ }
+ //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
+ //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
+ h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
+ h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
+ h_addr_datain_wires +
+ h_bit_mux_dec_out_wires +
+ h_senseamp_mux_dec_out_wires;
+
+ }
+
+ // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
+ double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
+ b_mux_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv1->area.get_area() +
+ way_sel_drv1->area.get_area() +
+ r_predec_blk_drv2->area.get_area() +
+ b_mux_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv2->area.get_area() +
+ r_predec_blk1->area.get_area() +
+ b_mux_predec_blk1->area.get_area() +
+ sa_mux_lev_1_predec_blk1->area.get_area() +
+ sa_mux_lev_2_predec_blk1->area.get_area() +
+ r_predec_blk2->area.get_area() +
+ b_mux_predec_blk2->area.get_area() +
+ sa_mux_lev_1_predec_blk2->area.get_area() +
+ sa_mux_lev_2_predec_blk2->area.get_area() +
+ bit_mux_dec->area.get_area() +
+ sa_mux_lev_1_dec->area.get_area() +
+ sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
+
+ double area_efficiency_mat;
+
+// if (!is_fa)
+// {
+ assert(num_subarrays_per_mat/num_subarrays_per_row>0);
+ area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
+ area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
+ area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
+ area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
+
+// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
+// cout<<"h_comparators"<<h_comparators<<endl;
+// cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
+// cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
+// cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
+// cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
+// cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
+// cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
+// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
+// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
+
+ assert(area.h>0);
+ assert(area.w>0);
+// }
+// else
+// {
+// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
+// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
+// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
+// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
+// }
+ }
+
+
+
+Mat::~Mat()
+{
+ delete row_dec;
+ delete bit_mux_dec;
+ delete sa_mux_lev_1_dec;
+ delete sa_mux_lev_2_dec;
+
+ delete r_predec->blk1;
+ delete r_predec->blk2;
+ delete b_mux_predec->blk1;
+ delete b_mux_predec->blk2;
+ delete sa_mux_lev_1_predec->blk1;
+ delete sa_mux_lev_1_predec->blk2;
+ delete sa_mux_lev_2_predec->blk1;
+ delete sa_mux_lev_2_predec->blk2;
+ delete dummy_way_sel_predec_blk1;
+ delete dummy_way_sel_predec_blk2;
+
+ delete r_predec->drv1;
+ delete r_predec->drv2;
+ delete b_mux_predec->drv1;
+ delete b_mux_predec->drv2;
+ delete sa_mux_lev_1_predec->drv1;
+ delete sa_mux_lev_1_predec->drv2;
+ delete sa_mux_lev_2_predec->drv1;
+ delete sa_mux_lev_2_predec->drv2;
+ delete way_sel_drv1;
+ delete dummy_way_sel_predec_blk_drv2;
+
+ delete r_predec;
+ delete b_mux_predec;
+ delete sa_mux_lev_1_predec;
+ delete sa_mux_lev_2_predec;
+
+ delete subarray_out_wire;
+ if (!pure_cam)
+ delete bl_precharge_eq_drv;
+
+ if (is_fa || pure_cam)
+ {
+ delete sl_precharge_eq_drv ;
+ delete sl_data_drv ;
+ delete cam_bl_precharge_eq_drv;
+ delete ml_precharge_drv;
+ delete ml_to_ram_wl_drv;
+ }
+}
+
+
+
+double Mat::compute_delays(double inrisetime)
+{
+ int k;
+ double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
+ double outrisetime_search, outrisetime, row_dec_outrisetime;
+ // delay calculation for tags of fully associative cache
+ if (is_fa || pure_cam)
+ {
+ //Compute search access time
+ outrisetime_search = compute_cam_delay(inrisetime);
+ if (is_fa)
+ {
+ bl_precharge_eq_drv->compute_delay(0);
+ k = ml_to_ram_wl_drv->number_gates - 1;
+ rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
+ C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
+ drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
+ C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
+ tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+
+ R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
+ R_bl = subarray.num_rows * r_b_metal;
+ C_bl = subarray.C_bl;
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+
+ outrisetime_search = compute_bitline_delay(outrisetime_search);
+ outrisetime_search = compute_sa_delay(outrisetime_search);
+ }
+ outrisetime_search = compute_subarray_out_drv(outrisetime_search);
+ subarray_out_wire->set_in_rise_time(outrisetime_search);
+ outrisetime_search = subarray_out_wire->signal_rise_time();
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+
+ //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ if (pure_cam)
+ {
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ }
+ return outrisetime_search;
+ }
+ else
+ {
+ bl_precharge_eq_drv->compute_delay(0);
+ if (row_dec->exist == true)
+ {
+ int k = row_dec->num_gates - 1;
+ double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
+ // TODO: this 4*cell.h number must be revisited
+ double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
+ drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
+ double C_ld = row_dec->C_ld_dec_out;
+ double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+ }
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * r_b_metal;
+ double C_bl = subarray.C_bl;
+
+ if (is_dram)
+ {
+ delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ }
+ else
+ {
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ }
+ }
+
+
+
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ outrisetime = compute_subarray_out_drv(outrisetime);
+ subarray_out_wire->set_in_rise_time(outrisetime);
+ outrisetime = subarray_out_wire->signal_rise_time();
+
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+ if (dp.is_tag == true && dp.fully_assoc == false)
+ {
+ compute_comparator_delay(0);
+ }
+
+ if (row_dec->exist == false)
+ {
+ delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
+ }
+ return outrisetime;
+}
+
+
+
+double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
+{
+
+ double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
+
+ if (deg_bl_muxing > 1)
+ {
+ height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
+ // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
+ }
+
+ height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
+
+ if (dp.Ndsam_lev_1 > 1)
+ {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+ }
+
+ if (dp.Ndsam_lev_2 > 1)
+ {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+
+ // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
+ height += 2 * compute_tr_width_after_folding(
+ pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ }
+
+ // TODO: this should be uncommented...
+ /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
+ {
+ //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
+ double width_write_driver_write_mux = width_write_driver_or_write_mux();
+ double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
+ cell.w *
+ // deg_bl_muxing *
+ dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
+ height += height_write_driver_write_mux;
+ }*/
+
+ return height;
+}
+
+
+
+double Mat::compute_cam_delay(double inrisetime)
+{
+
+ double out_time_ramp, this_delay;
+ double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
+
+
+ double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
+ Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
+ Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
+ Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
+
+ double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
+ int Htagbits;
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+ //double searchline_precharge_time;
+
+ double leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double leak_power_SCHP_port_sram_cell = 0;
+ double leak_comparator_cam_cell =0;
+
+ double gate_leak_comparator_cam_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_SCHP_port_sram_cell = 0;
+
+ c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
+ c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
+ r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
+ r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
+
+ dynSearchEng = 0.0;
+ delay_matchchline = 0.0;
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
+ bool linear_scaling = false;
+
+ if (linear_scaling)
+ {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
+ }
+ else
+ {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
+ Wdummyn = g_tp.cam.cell_nmos_w;
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ }
+
+ Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
+
+ /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
+ search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
+ From the driver(am and an) to the comparators in all the rows including the dummy row,
+ Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
+
+ //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
+ //Searchline precharge routes horizontally
+ driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
+
+ sl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
+ //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+ sl_data_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ sl_precharge_eq_drv->compute_delay(0);
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
+ double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = (subarray.num_rows + 1) * r_b_metal;
+ double C_bl = subarray.C_bl_cam;
+ delay_cam_sl_restore = sl_precharge_eq_drv->delay
+ + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+ out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
+
+ //matchline ops delay
+ delay_matchchline += sl_data_drv->delay;
+
+ /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
+ //matchline delay, matchline power, matchline_reset for cycle time computation,
+
+ ////matchline precharge circuitry routes vertically
+ //There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+
+ ml_precharge_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ ml_precharge_drv->compute_delay(0);
+
+
+ rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
+ c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
+ + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
+
+ Cwire = c_matchline_metal * Htagbits;
+ Rwire = r_matchline_metal * Htagbits;
+ c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
+
+ double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_ml = Rwire;
+ double C_ml = Cwire + c_intrinsic;
+ delay_cam_ml_reset = ml_precharge_drv->delay
+ + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
+
+ //matchline ops delay
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
+ delay_matchchline += this_delay;
+ out_time_ramp = this_delay / VTHFA3;
+
+ dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
+ * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
+
+ /* third stage, from the NAND2 gates to the drivers in the dummy row */
+ rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
+ c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
+ c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
+ out_time_ramp = this_delay / (1 - VTHFA4);
+ delay_matchchline += this_delay;
+
+ //only the dummy row has the extra inverter between NAND and NOR gates
+ dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
+
+ /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
+ rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
+ c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
+ Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
+ c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
+ tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
+ out_time_ramp = this_delay / VTHFA5;
+ delay_matchchline += this_delay;
+
+ dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+ /*final statge from the NOR gate to drive the wordline of the data portion */
+
+ //searchline data driver There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
+ driver_c_wire_load = subarray.C_wl_ram;
+ driver_r_wire_load = subarray.R_wl_ram;
+
+ ml_to_ram_wl_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+
+
+ rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
+ c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
+ out_time_ramp = this_delay / (1-0.5);
+ delay_matchchline += this_delay;
+
+ out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
+
+ //c_gate_load energy is computed in ml_to_ram_wl_drv
+ dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+
+ /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
+ /*Precharge the hitting logic */
+ c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_hit_miss = Rwire;
+ double C_hit_miss = Cwire + c_intrinsic;
+ delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /*hitting logic evaluation */
+ c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+
+ delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
+
+ if (is_fa)
+ delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
+
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
+
+ power_matchline.searchOp.dynamic = dynSearchEng;
+
+ //leakage in one subarray
+ double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
+ double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
+ double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
+ double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
+ leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
+ leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
+
+ power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
+ leak_comparator_cam_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP +
+ leak_power_SCHP_port_sram_cell*SCHP;
+// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
+ power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
+ //In idle states, the hit/miss txs are closed (on) therefore no Isub
+ power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
+ // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
+
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
+ double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
+
+ gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
+ gate_leak_power_SCHP_port_sram_cell = 0;
+
+ //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
+
+ power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
+ power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
+ power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
+ power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
+ + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
+
+
+ return out_time_ramp;
+}
+
+
+double Mat::width_write_driver_or_write_mux()
+{
+ // calculate resistance of SRAM cell pull-up PMOS transistor
+ // cam and sram have same cell trasistor properties
+ double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
+ double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
+ double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
+ double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
+
+ return width_write_driver_nmos;
+}
+
+
+
+double Mat::compute_comparators_height(
+ int tagbits,
+ int number_ways_in_mat,
+ double subarray_mem_cell_area_width)
+{
+ double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
+ double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
+ return cumulative_area / subarray_mem_cell_area_width;
+}
+
+
+
+double Mat::compute_bitline_delay(double inrisetime)
+{
+ double V_b_pre, v_th_mem_cell, V_wl;
+ double tstep;
+ double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
+ double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
+ int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
+
+ double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * R_b_metal;
+ double C_bl = subarray.C_bl;
+
+ // TODO: no leakage for DRAMs?
+ double leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+
+ if (is_dram == true)
+ {
+ V_b_pre = g_tp.dram.Vbitpre;
+ v_th_mem_cell = g_tp.dram_acc.Vth;
+ V_wl = g_tp.vpp;
+ //The access transistor is not folded. So we just need to specify a threshold value for the
+ //folding width that is equal to or greater than Wmemcella.
+ R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
+ r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
+ }
+ else
+ { //SRAM
+ V_b_pre = g_tp.sram.Vbitpre;
+ v_th_mem_cell = g_tp.sram_cell.Vth;
+ V_wl = g_tp.sram_cell.Vdd;
+ R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
+ R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
+
+ //Leakage current of an SRAM cell
+ double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
+ double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
+ double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+
+
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
+
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
+ }
+
+
+ double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
+ double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
+ double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+ double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
+ double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+ double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+
+ if (is_dram)
+ {
+ double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
+ tstep = 2.3 * fraction * r_dev *
+ (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
+ (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
+ delay_writeback = tstep;
+ dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
+ (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
+ per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
+ }
+ else
+ {
+ double tau;
+
+ if (deg_bl_muxing > 1)
+ {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
+ subarray.num_cols * num_subarrays_per_mat*/;
+ dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
+ dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
+ //Write Ops are differential for SRAM
+ }
+ else
+ {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
+
+ }
+ tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
+ power_bitline.readOp.leakage =
+ leak_power_cc_inverters_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP;
+ power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
+ gate_leak_power_RD_port_sram_cell * ERP;
+
+ }
+
+// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
+// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
+// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
+// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
+
+
+ /* take input rise time into account */
+ double m = V_wl / inrisetime;
+ if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
+ {
+ delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
+ }
+ else
+ {
+ delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
+ }
+
+ bool is_fa = (dp.fully_assoc) ? true : false;
+
+ if (dp.is_tag == false || is_fa == false)
+ {
+ power_bitline.readOp.dynamic = dynRdEnergy;
+ power_bitline.writeOp.dynamic = dynWriteEnergy;
+ }
+
+ double outrisetime = 0;
+ return outrisetime;
+}
+
+
+
+double Mat::compute_sa_delay(double inrisetime)
+{
+ //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
+
+ //Bitline circuitry leakage.
+ double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
+ double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
+ double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
+ double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
+
+ double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
+ //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
+ double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
+ //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
+ // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
+ double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+
+ // sense amplifier has to drive logic in "data out driver" and sense precharge load.
+ // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
+ //constant as well as the magnitude of input differential voltage.
+ double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+ double tau = C_ld / g_tp.gm_sense_amp_latch;
+ delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
+ power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
+ num_subarrays_per_mat * num_act_mats_hor_dir*/;
+ power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
+
+ double outrisetime = 0;
+ return outrisetime;
+}
+
+
+
+double Mat::compute_subarray_out_drv(double inrisetime)
+{
+ double C_ld, rd, tf, this_delay;
+ double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
+
+ // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay/(1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
+ // delay of signal through inverter-buffer to second level of sense-amp mux.
+ // internal delay of buffer
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay/(1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
+
+ // inverter driving drain of pass transistor of second level of sense-amp mux.
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay/(1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
+
+
+ // delay of signal through pass-transistor to input of subarray output driver.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
+ //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
+ gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay/(1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
+
+
+ return inrisetime;
+}
+
+
+
+double Mat::compute_comparator_delay(double inrisetime)
+{
+ int A = g_ip->tag_assoc;
+
+ int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
+ // a multiple of 4.
+
+ /* First Inverter */
+ double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
+ double tf = Req*Ceq;
+ double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
+ double nextinputtime = st1del/VTHCOMPINV;
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+
+ //For each degree of associativity
+ //there are 4 such quarter comparators
+ double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
+ double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
+ /* Second Inverter */
+ Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
+ tf = Req*Ceq;
+ double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
+ nextinputtime = st2del/(1.0-VTHCOMPINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
+
+ /* Third Inverter */
+ Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
+ tf = Req*Ceq;
+ double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
+ nextinputtime = st3del/(VTHEVALINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
+
+ /* Final Inverter (virtual ground driver) discharging compare part */
+ double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
+ double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
+ double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
+ double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
+ power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
+
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
+
+ /* time to go to threshold of mux driver */
+ double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
+ /* take into account non-zero input rise time */
+ double m = g_tp.peri_global.Vdd/nextinputtime;
+ double Tcomparatorni;
+
+ if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
+ {
+ double a = m;
+ double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
+ double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
+ Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
+ }
+ else
+ {
+ Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
+ }
+ delay_comparator = Tcomparatorni+st1del+st2del+st3del;
+ power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
+ power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
+
+ return Tcomparatorni / (1.0 - VTHMUXNAND);;
+}
+
+
+
+void Mat::compute_power_energy()
+{
+ //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
+ //when search all subarrays and all mats are fully active
+ //when plain read/write only one subarray in a single mat is active.
+
+ // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
+ power.readOp.dynamic += r_predec->power.readOp.dynamic +
+ b_mux_predec->power.readOp.dynamic +
+ sa_mux_lev_1_predec->power.readOp.dynamic +
+ sa_mux_lev_2_predec->power.readOp.dynamic;
+
+ // add energy consumed in decoders
+ power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
+ if (!(is_fa||pure_cam))
+ power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
+
+ // add energy consumed in bitline prechagers, SAs, and bitlines
+ if (!(is_fa||pure_cam))
+ {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
+
+ // add energy consumed in bitlines
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
+ power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
+ power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
+ //Add subarray output energy
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+ }
+
+ else if (is_fa)
+ {
+ //for plain read/write only one subarray in a mat is active
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
+ + cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+
+ //Add sense amps energy
+ num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
+ num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
+ power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
+ power_sa.readOp.dynamic *= num_sa_subarray;
+
+
+ // add energy consumed in bitlines
+ power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
+ power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
+ power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
+ power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
+
+ //Add subarray output energy
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+ //add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ }
+ else
+ {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
+ power_sa.searchOp.dynamic = 0;
+
+ power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
+ power_bitline.searchOp.dynamic = 0;
+ power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
+
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+
+ ////add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ }
+
+
+
+ // calculate leakage power
+ if (!(is_fa || pure_cam))
+ {
+ int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.leakage += power_comparator.readOp.leakage;
+
+ //cout<<"leakage1"<<power.readOp.leakage<<endl;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
+
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ b_mux_predec->power.readOp.leakage +
+ sa_mux_lev_1_predec->power.readOp.leakage +
+ sa_mux_lev_2_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage +
+ power_bit_mux_decoders.readOp.leakage +
+ power_sa_mux_lev_1_decoders.readOp.leakage +
+ power_sa_mux_lev_2_decoders.readOp.leakage;
+ //cout<<"leakage2"<<power.readOp.leakage<<endl;
+
+ //++++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
+
+ //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
+
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ b_mux_predec->power.readOp.gate_leakage +
+ sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage +
+ power_bit_mux_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage;
+ }
+ else if (is_fa)
+ {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+
+ //cout<<"leakage3"<<power.readOp.leakage<<endl;
+
+
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+
+ //cout<<"leakage4"<<power.readOp.leakage<<endl;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
+
+ //cout<<"leakage5"<<power.readOp.leakage<<endl;
+
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+
+// cout<<"leakage6"<<power.readOp.leakage<<endl;
+
+ //+++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+
+ //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+
+ }
+ else
+ {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+ //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.leakage += //power_bitline.readOp.leakage +
+ //power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+
+ //+++Below is gate leakage
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
+ //power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+ }
+}
+
diff --git a/ext/mcpat/cacti/mat.h b/ext/mcpat/cacti/mat.h
new file mode 100755
index 000000000..8d038be8b
--- /dev/null
+++ b/ext/mcpat/cacti/mat.h
@@ -0,0 +1,148 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __MAT_H__
+#define __MAT_H__
+
+#include "component.h"
+#include "decoder.h"
+#include "subarray.h"
+#include "wire.h"
+
+class Mat : public Component
+{
+ public:
+ Mat(const DynamicParameter & dyn_p);
+ ~Mat();
+ double compute_delays(double inrisetime); // return outrisetime
+ void compute_power_energy();
+
+ const DynamicParameter & dp;
+
+ // TODO: clean up pointers and powerDefs below
+ Decoder * row_dec;
+ Decoder * bit_mux_dec;
+ Decoder * sa_mux_lev_1_dec;
+ Decoder * sa_mux_lev_2_dec;
+ PredecBlk * dummy_way_sel_predec_blk1;
+ PredecBlk * dummy_way_sel_predec_blk2;
+ PredecBlkDrv * way_sel_drv1;
+ PredecBlkDrv * dummy_way_sel_predec_blk_drv2;
+
+ Predec * r_predec;
+ Predec * b_mux_predec;
+ Predec * sa_mux_lev_1_predec;
+ Predec * sa_mux_lev_2_predec;
+
+ Wire * subarray_out_wire;
+ Driver * bl_precharge_eq_drv;
+ Driver * cam_bl_precharge_eq_drv;//bitline pre-charge circuit is separated for CAM and RAM arrays.
+ Driver * ml_precharge_drv;//matchline prechange driver
+ Driver * sl_precharge_eq_drv;//searchline prechage driver
+ Driver * sl_data_drv;//search line data driver
+ Driver * ml_to_ram_wl_drv;//search line data driver
+
+
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_decoders;
+ powerDef power_sa_mux_lev_1_decoders;
+ powerDef power_sa_mux_lev_2_decoders;
+ powerDef power_fa_cam; // TODO: leakage power is not computed yet
+ powerDef power_bl_precharge_eq_drv;
+ powerDef power_subarray_out_drv;
+ powerDef power_cam_all_active;
+ powerDef power_searchline_precharge;
+ powerDef power_matchline_precharge;
+ powerDef power_ml_to_ram_wl_drv;
+
+ double delay_fa_tag, delay_cam;
+ double delay_before_decoder;
+ double delay_bitline;
+ double delay_wl_reset;
+ double delay_bl_restore;
+
+ double delay_searchline;
+ double delay_matchchline;
+ double delay_cam_sl_restore;
+ double delay_cam_ml_reset;
+ double delay_fa_ram_wl;
+
+ double delay_hit_miss_reset;
+ double delay_hit_miss;
+
+ Subarray subarray;
+ powerDef power_bitline, power_searchline, power_matchline;
+ double per_bitline_read_energy;
+ int deg_bl_muxing;
+ int num_act_mats_hor_dir;
+ double delay_writeback;
+ Area cell,cam_cell;
+ bool is_dram,is_fa, pure_cam, camFlag;
+ int num_mats;
+ powerDef power_sa;
+ double delay_sa;
+ double leak_power_sense_amps_closed_page_state;
+ double leak_power_sense_amps_open_page_state;
+ double delay_subarray_out_drv;
+ double delay_subarray_out_drv_htree;
+ double delay_comparator;
+ powerDef power_comparator;
+ int num_do_b_mat;
+ int num_so_b_mat;
+ int num_sa_subarray;
+ int num_sa_subarray_search;
+ double C_bl;
+
+ uint32_t num_subarrays_per_mat; // the number of subarrays in a mat
+ uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
+
+
+ private:
+ double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
+ double width_write_driver_or_write_mux();
+ double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
+ double compute_cam_delay(double inrisetime);
+ double compute_bitline_delay(double inrisetime);
+ double compute_sa_delay(double inrisetime);
+ double compute_subarray_out_drv(double inrisetime);
+ double compute_comparator_delay(double inrisetime);
+
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
+};
+
+
+
+#endif
diff --git a/ext/mcpat/cacti/nuca.cc b/ext/mcpat/cacti/nuca.cc
new file mode 100644
index 000000000..2aabe843f
--- /dev/null
+++ b/ext/mcpat/cacti/nuca.cc
@@ -0,0 +1,612 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <cassert>
+
+#include "Ucache.h"
+#include "nuca.h"
+
+unsigned int MIN_BANKSIZE=65536;
+#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
+#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
+#define CONTR_2_BANK_LAT 0
+
+int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
+
+ Nuca::Nuca(
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+ ):deviceType(dt)
+{
+ init_cont();
+}
+
+void
+Nuca::init_cont()
+{
+ FILE *cont;
+ char line[5000];
+ char jk[5000];
+ cont = fopen("contention.dat", "r");
+ if (!cont) {
+ cout << "contention.dat file is missing!\n";
+ exit(0);
+ }
+
+ for(int i=0; i<2; i++) {
+ for(int j=2; j<5; j++) {
+ for(int k=0; k<ROUTER_TYPES; k++) {
+ for(int l=0;l<7; l++) {
+ int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
+ assert(fscanf(cont, "%[^\n]\n", line) != EOF);
+ sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
+ &temp[4], &temp[5], &temp[6], &temp[7]);
+ }
+ }
+ }
+ }
+ fclose(cont);
+}
+
+ void
+Nuca::print_cont_stats()
+{
+ for(int i=0; i<2; i++) {
+ for(int j=2; j<5; j++) {
+ for(int k=0; k<ROUTER_TYPES; k++) {
+ for(int l=0;l<7; l++) {
+ for(int m=0;l<7; l++) {
+ cout << cont_stats[i][j][k][l][m] << " ";
+ }
+ cout << endl;
+ }
+ }
+ }
+ }
+ cout << endl;
+}
+
+Nuca::~Nuca(){
+ for (int i = wt_min; i <= wt_max; i++) {
+ delete wire_vertical[i];
+ delete wire_horizontal[i];
+ }
+}
+
+/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
+ int
+Nuca::calc_cycles(double lat, double oper_freq)
+{
+ //TODO: convert latch delay to FO4 */
+ double cycle_time = (1.0/(oper_freq*1e9)); /*s*/
+ cycle_time -= LATCH_DELAY;
+ cycle_time -= FIXED_OVERHEAD;
+
+ return (int)ceil(lat/cycle_time);
+}
+
+
+nuca_org_t::~nuca_org_t() {
+ // if(h_wire) delete h_wire;
+ // if(v_wire) delete v_wire;
+ // if(router) delete router;
+}
+
+/*
+ * Version - 6.0
+ *
+ * Perform exhaustive search across different bank organizatons,
+ * router configurations, grid organizations, and wire models and
+ * find an optimal NUCA organization
+ * For different bank count values
+ * 1. Optimal bank organization is calculated
+ * 2. For each bank organization, find different NUCA organizations
+ * using various router configurations, grid organizations,
+ * and wire models.
+ * 3. NUCA model with the least cost is picked for
+ * this particular bank count
+ * Finally include contention statistics and find the optimal
+ * NUCA configuration
+ */
+ void
+Nuca::sim_nuca()
+{
+ /* temp variables */
+ int it, ro, wr;
+ int num_cyc;
+ unsigned int i, j, k;
+ unsigned int r, c;
+ int l2_c;
+ int bank_count = 0;
+ uca_org_t ures;
+ nuca_org_t *opt_n;
+ mem_array tag, data;
+ list<nuca_org_t *> nuca_list;
+ Router *router_s[ROUTER_TYPES];
+ router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
+ router_s[0]->print_router();
+ router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
+ router_s[1]->print_router();
+ router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
+ router_s[2]->print_router();
+
+ int core_in; // to store no. of cores
+
+ /* to search diff grid organizations */
+ double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
+ curr_acclat;
+ double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
+ avg_leakage_power;
+
+ double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
+ int opt_rows = 0;
+ int opt_columns = 0;
+ double opt_totno_hops = 0;
+ double opt_avg_hop = 0;
+ double opt_dyn_power = 0, opt_leakage_power = 0;
+ min_values_t minval;
+
+ int bank_start = 0;
+
+ int flit_width = 0;
+
+ /* vertical and horizontal hop latency values */
+ int ver_hop_lat, hor_hop_lat; /* in cycles */
+
+
+ /* no. of different bank sizes to consider */
+ int iterations;
+
+
+ g_ip->nuca_cache_sz = g_ip->cache_sz;
+ nuca_list.push_back(new nuca_org_t());
+
+ if (g_ip->cache_level == 0) l2_c = 1;
+ else l2_c = 0;
+
+ if (g_ip->cores <= 4) core_in = 2;
+ else if (g_ip->cores <= 8) core_in = 3;
+ else if (g_ip->cores <= 16) core_in = 4;
+ else {cout << "Number of cores should be <= 16!\n"; exit(0);}
+
+
+ // set the lower bound to an appropriate value. this depends on cache associativity
+ if (g_ip->assoc > 2) {
+ i = 2;
+ while (i != g_ip->assoc) {
+ MIN_BANKSIZE *= 2;
+ i *= 2;
+ }
+ }
+
+ iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE);
+
+ if (g_ip->force_wiretype)
+ {
+ if (g_ip->wt == Low_swing) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ }
+ else {
+ wt_min = Global;
+ wt_max = Low_swing-1;
+ }
+ }
+ else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
+ if (g_ip->nuca_bank_count != 0) { // simulate just one bank
+ if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
+ g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
+ g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
+ fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
+ }
+ bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
+ iterations = bank_start+1;
+ g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count;
+ }
+ cout << "Simulating various NUCA configurations\n";
+ for (it=bank_start; it<iterations; it++) { /* different bank count values */
+ ures.tag_array2 = &tag;
+ ures.data_array2 = &data;
+ /*
+ * find the optimal bank organization
+ */
+ solve(&ures);
+// output_UCA(&ures);
+ bank_count = g_ip->nuca_cache_sz/g_ip->cache_sz;
+ cout << "====" << g_ip->cache_sz << "\n";
+
+ for (wr=wt_min; wr<=wt_max; wr++) {
+
+ for (ro=0; ro<ROUTER_TYPES; ro++)
+ {
+ flit_width = (int) router_s[ro]->flit_size; //initialize router
+ nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
+
+ /* calculate router and wire parameters */
+
+ double vlength = ures.cache_ht; /* length of the wire (u)*/
+ double hlength = ures.cache_len; // u
+
+ /* find delay, area, and power for wires */
+ wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
+ wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+
+
+ hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
+ 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
+ ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
+ 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
+
+ /*
+ * assume a grid like topology and explore for optimal network
+ * configuration using different row and column count values.
+ */
+ for (c=1; c<=(unsigned int)bank_count; c++) {
+ while (bank_count%c != 0) c++;
+ r = bank_count/c;
+
+ /*
+ * to find the avg access latency of a NUCA cache, uncontended
+ * access time to each bank from the
+ * cache controller is calculated.
+ * avg latency =
+ * sum of the access latencies to individual banks)/bank
+ * count value.
+ */
+ totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
+ k = 1;
+ for (i=0; i<r; i++) {
+ for (j=0; j<c; j++) {
+ /*
+ * vertical hops including the
+ * first hop from the cache controller
+ */
+ curr_hop = i + 1;
+ curr_hop += j; /* horizontal hops */
+ totno_hhops += j;
+ totno_vhops += (i+1);
+ curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
+ j * hor_hop_lat);
+
+ tot_lat += curr_acclat;
+ totno_hops += curr_hop;
+ }
+ }
+ avg_lat = tot_lat/bank_count;
+ avg_hop = totno_hops/bank_count;
+ avg_hhop = totno_hhops/bank_count;
+ avg_vhop = totno_vhops/bank_count;
+
+ /* net access latency */
+ curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
+ calc_cycles(ures.access_time,
+ 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
+
+ /* avg access lat of nuca */
+ avg_dyn_power =
+ avg_hop *
+ (router_s[ro]->power.readOp.dynamic) + avg_hhop *
+ (wire_horizontal[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz*8 + 64) + avg_vhop *
+ (wire_vertical[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic;
+
+ avg_leakage_power =
+ bank_count * router_s[ro]->power.readOp.leakage +
+ avg_hhop * (wire_horizontal[wr]->power.readOp.leakage*
+ wire_horizontal[wr]->delay) * flit_width +
+ avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay);
+
+ if (curr_acclat < opt_acclat) {
+ opt_acclat = curr_acclat;
+ opt_tot_lat = tot_lat;
+ opt_avg_lat = avg_lat;
+ opt_totno_hops = totno_hops;
+ opt_avg_hop = avg_hop;
+ opt_rows = r;
+ opt_columns = c;
+ opt_dyn_power = avg_dyn_power;
+ opt_leakage_power = avg_leakage_power;
+ }
+ totno_hops = 0;
+ tot_lat = 0;
+ totno_hhops = 0;
+ totno_vhops = 0;
+ }
+ nuca_list.back()->wire_pda.power.readOp.dynamic =
+ opt_avg_hop * flit_width *
+ (wire_horizontal[wr]->power.readOp.dynamic +
+ wire_vertical[wr]->power.readOp.dynamic);
+ nuca_list.back()->avg_hops = opt_avg_hop;
+ /* network delay/power */
+ nuca_list.back()->h_wire = wire_horizontal[wr];
+ nuca_list.back()->v_wire = wire_vertical[wr];
+ nuca_list.back()->router = router_s[ro];
+ /* bank delay/power */
+
+ nuca_list.back()->bank_pda.delay = ures.access_time;
+ nuca_list.back()->bank_pda.power = ures.power;
+ nuca_list.back()->bank_pda.area.h = ures.cache_ht;
+ nuca_list.back()->bank_pda.area.w = ures.cache_len;
+ nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
+
+ num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
+ 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
+ if(num_cyc%2 != 0) num_cyc++;
+ if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
+
+ if (it < 7) {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ }
+ else {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ }
+ nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
+ nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+
+ /* array organization */
+ nuca_list.back()->bank_count = bank_count;
+ nuca_list.back()->rows = opt_rows;
+ nuca_list.back()->columns = opt_columns;
+ calculate_nuca_area (nuca_list.back());
+
+ minval.update_min_values(nuca_list.back());
+ nuca_list.push_back(new nuca_org_t());
+ opt_acclat = BIGNUM;
+
+ }
+ }
+ g_ip->cache_sz /= 2;
+ }
+
+ delete(nuca_list.back());
+ nuca_list.pop_back();
+ opt_n = find_optimal_nuca(&nuca_list, &minval);
+ print_nuca(opt_n);
+ g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count;
+
+ list<nuca_org_t *>::iterator niter;
+ for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
+ {
+ delete *niter;
+ }
+ nuca_list.clear();
+
+ for(int i=0; i < ROUTER_TYPES; i++)
+ {
+ delete router_s[i];
+ }
+ g_ip->display_ip();
+ // g_ip->force_cache_config = true;
+ // g_ip->ndwl = 8;
+ // g_ip->ndbl = 16;
+ // g_ip->nspd = 4;
+ // g_ip->ndcm = 1;
+ // g_ip->ndsam1 = 8;
+ // g_ip->ndsam2 = 32;
+
+}
+
+
+ void
+Nuca::print_nuca (nuca_org_t *fr)
+{
+ printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
+ "----------\n\n");
+ printf("Optimal number of banks - %d\n", fr->bank_count);
+ printf("Grid organization rows x columns - %d x %d\n",
+ fr->rows, fr->columns);
+ printf("Network frequency - %g GHz\n",
+ (1/fr->nuca_pda.cycle_time)*1e3);
+ printf("Cache dimension (mm x mm) - %g x %g\n",
+ fr->nuca_pda.area.h,
+ fr->nuca_pda.area.w);
+
+ fr->router->print_router();
+
+ printf("\n\nWire stats:\n");
+ if (fr->h_wire->wt == Global) {
+ printf("\tWire type - Full swing global wires with least "
+ "possible delay\n");
+ }
+ else if (fr->h_wire->wt == Global_5) {
+ printf("\tWire type - Full swing global wires with "
+ "5%% delay penalty\n");
+ }
+ else if (fr->h_wire->wt == Global_10) {
+ printf("\tWire type - Full swing global wires with "
+ "10%% delay penalty\n");
+ }
+ else if (fr->h_wire->wt == Global_20) {
+ printf("\tWire type - Full swing global wires with "
+ "20%% delay penalty\n");
+ }
+ else if (fr->h_wire->wt == Global_30) {
+ printf("\tWire type - Full swing global wires with "
+ "30%% delay penalty\n");
+ }
+ else if(fr->h_wire->wt == Low_swing) {
+ printf("\tWire type - Low swing wires\n");
+ }
+
+ printf("\tHorizontal link delay - %g (ns)\n",
+ fr->h_wire->delay*1e9);
+ printf("\tVertical link delay - %g (ns)\n",
+ fr->v_wire->delay*1e9);
+ printf("\tDelay/length - %g (ns/mm)\n",
+ fr->h_wire->delay*1e9/fr->bank_pda.area.w);
+ printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->h_wire->power.readOp.dynamic*1e9,
+ fr->h_wire->power.readOp.leakage*1e9);
+ printf("\tVertical link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->v_wire->power.readOp.dynamic*1e9,
+ fr->v_wire->power.readOp.leakage*1e9);
+ printf("\n\n");
+ fr->v_wire->print_wire();
+ printf("\n\nBank stats:\n");
+}
+
+
+ nuca_org_t *
+Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
+{
+ double cost = 0;
+ double min_cost = BIGNUM;
+ nuca_org_t *res = NULL;
+ float d, a, dp, lp, c;
+ int v;
+ dp = g_ip->dynamic_power_wt_nuca;
+ lp = g_ip->leakage_power_wt_nuca;
+ a = g_ip->area_wt_nuca;
+ d = g_ip->delay_wt_nuca;
+ c = g_ip->cycle_time_wt_nuca;
+
+ list<nuca_org_t *>::iterator niter;
+
+
+ for (niter = n->begin(); niter != n->end(); niter++) {
+ fprintf(stderr, "\n-----------------------------"
+ "---------------\n");
+
+
+ printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
+ "bank_dpower = %g \tleak = %g \tcycle = %g\n",
+ (*niter)->bank_count,
+ (*niter)->nuca_pda.delay,
+ (*niter)->nuca_pda.power.readOp.dynamic,
+ (*niter)->h_wire->wt,
+ (*niter)->bank_pda.power.readOp.dynamic,
+ (*niter)->nuca_pda.power.readOp.leakage,
+ (*niter)->nuca_pda.cycle_time);
+
+
+ if (g_ip->ed == 1) {
+ cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
+ ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ }
+ else if (g_ip->ed == 2) {
+ cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
+ ((*niter)->nuca_pda.delay/minval->min_delay)*
+ ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ }
+ else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ v = check_nuca_org((*niter), minval);
+ if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
+
+ if (v) {
+ cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) +
+ c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) +
+ dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
+ lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
+ a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
+ fprintf(stderr, "cost = %g\n", cost);
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ }
+ else {
+ niter = n->erase(niter);
+ if (niter !=n->begin())
+ niter --;
+ }
+ }
+ }
+ return res;
+}
+
+ int
+Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
+{
+ if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
+ g_ip->leakage_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
+ g_ip->cycle_time_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
+ g_ip->area_dev_nuca) {
+ return 0;
+ }
+ return 1;
+}
+
+ void
+Nuca::calculate_nuca_area (nuca_org_t *nuca)
+{
+ nuca->nuca_pda.area.h=
+ nuca->rows * ((nuca->h_wire->wire_width +
+ nuca->h_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.h);
+
+ nuca->nuca_pda.area.w =
+ nuca->columns * ((nuca->v_wire->wire_width +
+ nuca->v_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.w);
+}
+
diff --git a/ext/mcpat/cacti/nuca.h b/ext/mcpat/cacti/nuca.h
new file mode 100644
index 000000000..adfe32564
--- /dev/null
+++ b/ext/mcpat/cacti/nuca.h
@@ -0,0 +1,100 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __NUCA_H__
+#define __NUCA_H__
+
+#include <iostream>
+
+#include "assert.h"
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "io.h"
+#include "mat.h"
+#include "parameter.h"
+#include "router.h"
+#include "wire.h"
+
+class nuca_org_t {
+ public:
+ ~nuca_org_t();
+// int size;
+ /* area, power, access time, and cycle time stats */
+ Component nuca_pda;
+ Component bank_pda;
+ Component wire_pda;
+ Wire *h_wire;
+ Wire *v_wire;
+ Router *router;
+ /* for particular network configuration
+ * calculated based on a cycle accurate
+ * simulation Ref: CACTI 6 - Tech report
+ */
+ double contention;
+
+ /* grid network stats */
+ double avg_hops;
+ int rows;
+ int columns;
+ int bank_count;
+};
+
+
+
+class Nuca : public Component
+{
+ public:
+ Nuca(
+ TechnologyParameter::DeviceType *dt);
+ void print_router();
+ ~Nuca();
+ void sim_nuca();
+ void init_cont();
+ int calc_cycles(double lat, double oper_freq);
+ void calculate_nuca_area (nuca_org_t *nuca);
+ int check_nuca_org (nuca_org_t *n, min_values_t *minval);
+ nuca_org_t * find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval);
+ void print_nuca(nuca_org_t *n);
+ void print_cont_stats();
+
+ private:
+
+ TechnologyParameter::DeviceType *deviceType;
+ int wt_min, wt_max;
+ Wire *wire_vertical[WIRE_TYPES],
+ *wire_horizontal[WIRE_TYPES];
+
+};
+
+
+#endif
diff --git a/ext/mcpat/cacti/parameter.cc b/ext/mcpat/cacti/parameter.cc
new file mode 100644
index 000000000..b71640c19
--- /dev/null
+++ b/ext/mcpat/cacti/parameter.cc
@@ -0,0 +1,713 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <iomanip>
+#include <iostream>
+#include <string>
+
+#include "area.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+InputParameter * g_ip;
+TechnologyParameter g_tp;
+
+
+
+void TechnologyParameter::DeviceType::display(uint32_t indent)
+{
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
+ cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
+ cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
+ cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
+ cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
+ cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
+ cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
+ cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
+ cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
+ cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
+ cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
+ cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
+ cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
+ cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
+ cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
+ cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
+ cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
+}
+
+
+
+void TechnologyParameter::InterconnectType::display(uint32_t indent)
+{
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
+ cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
+ cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
+}
+
+void TechnologyParameter::ScalingFactor::display(uint32_t indent)
+{
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
+ cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
+}
+
+void TechnologyParameter::MemoryType::display(uint32_t indent)
+{
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
+ cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
+ cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
+ cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
+ cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
+ cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
+}
+
+
+
+void TechnologyParameter::display(uint32_t indent)
+{
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
+ cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
+ cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
+ cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
+ cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
+ cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
+ cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
+ cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
+ cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
+ cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
+ cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
+ cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
+ cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
+ cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
+ cout << endl;
+ cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
+ cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
+ cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
+ cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
+ cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
+ cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
+ cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
+ cout << endl;
+ cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
+ cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
+ cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
+ cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
+ cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
+ cout << endl;
+ cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
+ cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
+ cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
+ cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
+ cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
+ cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
+ cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
+
+ cout << endl;
+ cout << indent_str << "SRAM cell transistor: " << endl;
+ sram_cell.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM access transistor: " << endl;
+ dram_acc.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM wordline transistor: " << endl;
+ dram_wl.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "peripheral global transistor: " << endl;
+ peri_global.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire local" << endl;
+ wire_local.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire inside mat" << endl;
+ wire_inside_mat.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire outside mat" << endl;
+ wire_outside_mat.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "SRAM" << endl;
+ sram.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM" << endl;
+ dram.display(indent + 2);
+}
+
+
+DynamicParameter::DynamicParameter():
+ use_inp_params(0), cell(), is_valid(true)
+{
+}
+
+
+
+DynamicParameter::DynamicParameter(
+ bool is_tag_,
+ int pure_ram_,
+ int pure_cam_,
+ double Nspd_,
+ unsigned int Ndwl_,
+ unsigned int Ndbl_,
+ unsigned int Ndcm_,
+ unsigned int Ndsam_lev_1_,
+ unsigned int Ndsam_lev_2_,
+ bool is_main_mem_):
+ is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
+ Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
+ number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
+ is_main_mem(is_main_mem_), cell(), is_valid(false)
+{
+ ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+
+ unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
+ const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
+ fully_assoc = (g_ip->fully_assoc) ? true : false;
+
+ if (fully_assoc || pure_cam)
+ { // fully-assocative cache -- ref: CACTi 2.0 report
+ if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
+ Ndcm != 1 || //Ndcm is fixed to 1 for FA
+ Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
+ Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
+ Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
+ Ndbl < 2)
+ {
+ return;
+ }
+ }
+
+ if ((is_dram) && (!is_tag) && (Ndcm > 1))
+ {
+ return; // For a DRAM array, each bitline has its own sense-amp
+ }
+
+ // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
+ // at least two because an array is assumed to have at least one mat. And a mat
+ // is formed out of two horizontal subarrays and two vertical subarrays
+ if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
+ {
+ return;
+ }
+
+ //***********compute row, col of an subarray
+ if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
+ {
+ // if data array, let tagbits = 0
+ if (is_tag)
+ {
+ if (g_ip->specific_tag)
+ {
+ tagbits = g_ip->tag_w;
+ }
+ else
+ {
+ tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
+ _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks);
+
+ }
+ tagbits = (((tagbits + 3) >> 2) << 2);
+
+ num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
+ g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
+ num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
+ //burst_length = 1;
+ }
+ else
+ {
+ num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
+ g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
+ num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
+ // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
+ }
+
+ if (num_r_subarray < MINSUBARRAYROWS) return;
+ if (num_r_subarray == 0) return;
+ if (num_r_subarray > MAXSUBARRAYROWS) return;
+ if (num_c_subarray < MINSUBARRAYCOLS) return;
+ if (num_c_subarray > MAXSUBARRAYCOLS) return;
+
+ }
+
+ else
+ {//either fully-asso or cam
+ if (pure_cam)
+ {
+ if (g_ip->specific_tag)
+ {
+ tagbits = int(ceil(g_ip->tag_w/8.0)*8);
+ }
+ else
+ {
+ tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8);
+// cout<<"Pure CAM needs tag width to be specified"<<endl;
+// exit(0);
+ }
+ //tagbits = (((tagbits + 3) >> 2) << 2);
+
+ tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
+ //tag_num_c_subarray = (int)(tagbits + EPSILON);
+ tag_num_c_subarray = tagbits;
+ if (tag_num_r_subarray == 0) return;
+ if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
+ num_r_subarray = tag_num_r_subarray;
+ }
+ else //fully associative
+ {
+ if (g_ip->specific_tag)
+ {
+ tagbits = g_ip->tag_w;
+ }
+ else
+ {
+ tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
+ }
+ tagbits = (((tagbits + 3) >> 2) << 2);
+
+ tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
+ tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
+ if (tag_num_r_subarray == 0) return;
+ if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
+
+ data_num_r_subarray = tag_num_r_subarray;
+ data_num_c_subarray = 8 * g_ip->block_sz;
+ if (data_num_r_subarray == 0) return;
+ if (data_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (data_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
+ num_r_subarray = tag_num_r_subarray;
+ }
+ }
+
+ num_subarrays = Ndwl * Ndbl;
+ //****************end of computation of row, col of an subarray
+
+ // calculate wire parameters
+ if (fully_assoc || pure_cam)
+ {
+ cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
+ + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
+ cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
+ + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
+
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
+ + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
+ + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
+ }
+ else
+ {
+ if(is_tag)
+ {
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
+ g_ip->num_wr_ports);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
+ (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+ }
+ else
+ {
+ if (is_dram)
+ {
+ cell.h = g_tp.dram.b_h;
+ cell.w = g_tp.dram.b_w;
+ }
+ else
+ {
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
+ g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
+ (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
+ g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
+ }
+ }
+ }
+
+ double c_b_metal = cell.h * wire_local.C_per_um;
+ double C_bl;
+
+ if (!(fully_assoc || pure_cam))
+ {
+ if (is_dram)
+ {
+ deg_bl_muxing = 1;
+ if (ram_cell_tech_type == comm_dram)
+ {
+ C_bl = num_r_subarray * c_b_metal;
+ V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
+ if (V_b_sense < VBITSENSEMIN)
+ {
+ return;
+ }
+ V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
+ dram_refresh_period = 64e-3;
+ }
+ else
+ {
+ double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
+
+ if (V_b_sense < VBITSENSEMIN)
+ {
+ return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
+ }
+ V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
+ //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
+ //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
+ dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
+ }
+ }
+ else
+ { //SRAM
+ V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
+ deg_bl_muxing = Ndcm;
+ // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
+ // contacts in a physical layout
+ double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ dram_refresh_period = 0;
+ }
+ }
+ else
+ {
+ c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
+ V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
+ deg_bl_muxing = 1;//FA fix as 1
+ // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
+ // contacts in a physical layout
+ double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ dram_refresh_period = 0;
+ }
+
+
+ // do/di: data in/out, for fully associative they are the data width for normal read and write
+ // so/si: search data in/out, for fully associative they are the data width for the search ops
+ // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
+ // so/si needs broadcase while do/di do not
+
+ if (fully_assoc || pure_cam)
+ {
+ switch (Ndbl) {
+ case (0):
+ cout << " Invalid Ndbl \n"<<endl;
+ exit(0);
+ break;
+ case (1):
+ num_mats_h_dir = 1;//one subarray per mat
+ num_mats_v_dir = 1;
+ break;
+ case (2):
+ num_mats_h_dir = 1;//two subarrays per mat
+ num_mats_v_dir = 1;
+ break;
+ default:
+ num_mats_h_dir = int(floor(sqrt(Ndbl/4.0)));//4 subbarrys per mat
+ num_mats_v_dir = int(Ndbl/4.0 / num_mats_h_dir);
+ }
+ num_mats = num_mats_h_dir * num_mats_v_dir;
+
+ if (fully_assoc)
+ {
+ num_so_b_mat = data_num_c_subarray;
+ num_do_b_mat = data_num_c_subarray + tagbits;
+ }
+ else
+ {
+ num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
+ num_do_b_mat = tagbits;
+ }
+ }
+ else
+ {
+ num_mats_h_dir = MAX(Ndwl / 2, 1);
+ num_mats_v_dir = MAX(Ndbl / 2, 1);
+ num_mats = num_mats_h_dir * num_mats_v_dir;
+ num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
+ }
+
+ if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats)))
+ {
+ return;
+ }
+
+
+ int deg_sa_mux_l1_non_assoc;
+ //TODO:the i/o for subbank is not necessary and should be removed.
+ if (!(fully_assoc || pure_cam))
+ {
+ if (!is_tag)
+ {
+ if (is_main_mem == true)
+ {
+ num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ }
+ else
+ {
+ if (g_ip->fast_access == true)
+ {
+ num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ }
+ else
+ {
+
+ num_do_b_subbank = g_ip->out_w;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
+ if (deg_sa_mux_l1_non_assoc < 1)
+ {
+ return;
+ }
+
+ }
+ }
+ }
+ else
+ {
+ num_do_b_subbank = tagbits * g_ip->tag_assoc;
+ if (num_do_b_mat < tagbits)
+ {
+ return;
+ }
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
+ }
+ }
+ else
+ {
+ if (fully_assoc)
+ {
+ num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
+ num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
+ }
+ else
+ {
+ num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
+ num_do_b_subbank = tag_num_c_subarray;
+ }
+
+ deg_sa_mux_l1_non_assoc = 1;
+ }
+
+ deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
+
+ if (fully_assoc || pure_cam)
+ {
+ num_act_mats_hor_dir = 1;
+ num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
+ }
+ else
+ {
+ num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
+ if (num_act_mats_hor_dir == 0)
+ {
+ return;
+ }
+ }
+
+ //compute num_do_mat for tag
+ if (is_tag)
+ {
+ if (!(fully_assoc || pure_cam))
+ {
+ num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
+ num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
+ }
+ }
+
+ if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
+ {
+ if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
+ {
+ return;
+ }
+ }
+
+// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
+ if (is_tag == false && g_ip->is_main_mem == true &&
+ num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
+ {
+ return;
+ }
+
+ if (num_act_mats_hor_dir > num_mats_h_dir)
+ {
+ return;
+ }
+
+
+ //compute di for mat subbank and bank
+ if (!(fully_assoc ||pure_cam))
+ {
+ if(!is_tag)
+ {
+ if(g_ip->fast_access == true)
+ {
+ num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
+ }
+ else
+ {
+ num_di_b_mat = num_do_b_mat;
+ }
+ }
+ else
+ {
+ num_di_b_mat = tagbits;
+ }
+ }
+ else
+ {
+ if (fully_assoc)
+ {
+ num_di_b_mat = num_do_b_mat;
+ //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
+ //but inside the mat wire tracks need to be reserved for search data bus
+ num_si_b_mat = tagbits;
+ }
+ else
+ {
+ num_di_b_mat = tagbits;
+ num_si_b_mat = tagbits;//*num_subarrays/num_mats;
+ }
+
+ }
+
+ num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
+ num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
+
+ int num_addr_b_row_dec = _log2(num_r_subarray);
+ if ((fully_assoc ||pure_cam))
+ num_addr_b_row_dec +=_log2(num_subarrays/num_mats);
+ int number_subbanks = num_mats / num_act_mats_hor_dir;
+ number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
+
+ num_rw_ports = g_ip->num_rw_ports;
+ num_rd_ports = g_ip->num_rd_ports;
+ num_wr_ports = g_ip->num_wr_ports;
+ num_se_rd_ports = g_ip->num_se_rd_ports;
+ num_search_ports = g_ip->num_search_ports;
+
+ if (is_dram && is_main_mem)
+ {
+ number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
+ _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
+ }
+ else
+ {
+ number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
+ _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
+ }
+
+ if (!(fully_assoc ||pure_cam))
+ {
+ if (is_tag)
+ {
+ num_di_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = g_ip->data_assoc;
+ }
+ else
+ {
+ num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
+ num_do_b_bank_per_port = g_ip->out_w;
+ }
+ }
+ else
+ {
+ if (fully_assoc)
+ {
+ num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
+ num_si_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = g_ip->out_w + tagbits;
+ num_so_b_bank_per_port = g_ip->out_w;
+ }
+ else
+ {
+ num_di_b_bank_per_port = tagbits;
+ num_si_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = tagbits;
+ num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
+ }
+ }
+
+ if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
+ {
+ number_way_select_signals_mat = g_ip->data_assoc;
+ }
+
+ // add ECC adjustment to all data signals that traverse on H-trees.
+ if (g_ip->add_ecc_b_ == true)
+ {
+ num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
+ num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
+ num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
+ num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
+ num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
+ num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
+
+ num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
+ num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
+ num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
+ num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
+ num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
+ num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
+ }
+
+ is_valid = true;
+}
+
diff --git a/ext/mcpat/cacti/parameter.h b/ext/mcpat/cacti/parameter.h
new file mode 100644
index 000000000..9c827bbc8
--- /dev/null
+++ b/ext/mcpat/cacti/parameter.h
@@ -0,0 +1,367 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __PARAMETER_H__
+#define __PARAMETER_H__
+
+#include "area.h"
+#include "cacti_interface.h"
+#include "const.h"
+#include "io.h"
+
+// parameters which are functions of certain device technology
+class TechnologyParameter
+{
+ public:
+ class DeviceType
+ {
+ public:
+ double C_g_ideal;
+ double C_fringe;
+ double C_overlap;
+ double C_junc; // C_junc_area
+ double C_junc_sidewall;
+ double l_phy;
+ double l_elec;
+ double R_nch_on;
+ double R_pch_on;
+ double Vdd;
+ double Vth;
+ double I_on_n;
+ double I_on_p;
+ double I_off_n;
+ double I_off_p;
+ double I_g_on_n;
+ double I_g_on_p;
+ double C_ox;
+ double t_ox;
+ double n_to_p_eff_curr_drv_ratio;
+ double long_channel_leakage_reduction;
+
+ DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
+ C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
+ Vdd(0), Vth(0),
+ I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
+ C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { };
+ void reset()
+ {
+ C_g_ideal = 0;
+ C_fringe = 0;
+ C_overlap = 0;
+ C_junc = 0;
+ l_phy = 0;
+ l_elec = 0;
+ R_nch_on = 0;
+ R_pch_on = 0;
+ Vdd = 0;
+ Vth = 0;
+ I_on_n = 0;
+ I_on_p = 0;
+ I_off_n = 0;
+ I_off_p = 0;
+ I_g_on_n = 0;
+ I_g_on_p = 0;
+ C_ox = 0;
+ t_ox = 0;
+ n_to_p_eff_curr_drv_ratio = 0;
+ long_channel_leakage_reduction = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+ class InterconnectType
+ {
+ public:
+ double pitch;
+ double R_per_um;
+ double C_per_um;
+ double horiz_dielectric_constant;
+ double vert_dielectric_constant;
+ double aspect_ratio;
+ double miller_value;
+ double ild_thickness;
+
+ InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
+
+ void reset()
+ {
+ pitch = 0;
+ R_per_um = 0;
+ C_per_um = 0;
+ horiz_dielectric_constant = 0;
+ vert_dielectric_constant = 0;
+ aspect_ratio = 0;
+ miller_value = 0;
+ ild_thickness = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+ class MemoryType
+ {
+ public:
+ double b_w;
+ double b_h;
+ double cell_a_w;
+ double cell_pmos_w;
+ double cell_nmos_w;
+ double Vbitpre;
+
+ void reset()
+ {
+ b_w = 0;
+ b_h = 0;
+ cell_a_w = 0;
+ cell_pmos_w = 0;
+ cell_nmos_w = 0;
+ Vbitpre = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+
+ class ScalingFactor
+ {
+ public:
+ double logic_scaling_co_eff;
+ double core_tx_density;
+ double long_channel_leakage_reduction;
+
+ ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
+ long_channel_leakage_reduction(0) { };
+
+ void reset()
+ {
+ logic_scaling_co_eff= 0;
+ core_tx_density = 0;
+ long_channel_leakage_reduction= 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+
+ double ram_wl_stitching_overhead_;
+ double min_w_nmos_;
+ double max_w_nmos_;
+ double max_w_nmos_dec;
+ double unit_len_wire_del;
+ double FO4;
+ double kinv;
+ double vpp;
+ double w_sense_en;
+ double w_sense_n;
+ double w_sense_p;
+ double sense_delay;
+ double sense_dy_power;
+ double w_iso;
+ double w_poly_contact;
+ double spacing_poly_to_poly;
+ double spacing_poly_to_contact;
+
+ double w_comp_inv_p1;
+ double w_comp_inv_p2;
+ double w_comp_inv_p3;
+ double w_comp_inv_n1;
+ double w_comp_inv_n2;
+ double w_comp_inv_n3;
+ double w_eval_inv_p;
+ double w_eval_inv_n;
+ double w_comp_n;
+ double w_comp_p;
+
+ double dram_cell_I_on;
+ double dram_cell_Vdd;
+ double dram_cell_I_off_worst_case_len_temp;
+ double dram_cell_C;
+ double gm_sense_amp_latch;
+
+ double w_nmos_b_mux;
+ double w_nmos_sa_mux;
+ double w_pmos_bl_precharge;
+ double w_pmos_bl_eq;
+ double MIN_GAP_BET_P_AND_N_DIFFS;
+ double MIN_GAP_BET_SAME_TYPE_DIFFS;
+ double HPOWERRAIL;
+ double cell_h_def;
+
+ double chip_layout_overhead;
+ double macro_layout_overhead;
+ double sckt_co_eff;
+
+ double fringe_cap;
+
+ uint64_t h_dec;
+
+ DeviceType sram_cell; // SRAM cell transistor
+ DeviceType dram_acc; // DRAM access transistor
+ DeviceType dram_wl; // DRAM wordline transistor
+ DeviceType peri_global; // peripheral global
+ DeviceType cam_cell; // SRAM cell transistor
+
+ InterconnectType wire_local;
+ InterconnectType wire_inside_mat;
+ InterconnectType wire_outside_mat;
+
+ ScalingFactor scaling_factor;
+
+ MemoryType sram;
+ MemoryType dram;
+ MemoryType cam;
+
+ void display(uint32_t indent = 0);
+
+ void reset()
+ {
+ dram_cell_Vdd = 0;
+ dram_cell_I_on = 0;
+ dram_cell_C = 0;
+ vpp = 0;
+
+ sense_delay = 0;
+ sense_dy_power = 0;
+ fringe_cap = 0;
+// horiz_dielectric_constant = 0;
+// vert_dielectric_constant = 0;
+// aspect_ratio = 0;
+// miller_value = 0;
+// ild_thickness = 0;
+
+ dram_cell_I_off_worst_case_len_temp = 0;
+
+ sram_cell.reset();
+ dram_acc.reset();
+ dram_wl.reset();
+ peri_global.reset();
+ cam_cell.reset();
+
+ scaling_factor.reset();
+
+ wire_local.reset();
+ wire_inside_mat.reset();
+ wire_outside_mat.reset();
+
+ sram.reset();
+ dram.reset();
+ cam.reset();
+
+ chip_layout_overhead = 0;
+ macro_layout_overhead = 0;
+ sckt_co_eff = 0;
+ }
+};
+
+
+
+class DynamicParameter
+{
+ public:
+ bool is_tag;
+ bool pure_ram;
+ bool pure_cam;
+ bool fully_assoc;
+ int tagbits;
+ int num_subarrays; // only for leakage computation -- the number of subarrays per bank
+ int num_mats; // only for leakage computation -- the number of mats per bank
+ double Nspd;
+ int Ndwl;
+ int Ndbl;
+ int Ndcm;
+ int deg_bl_muxing;
+ int deg_senseamp_muxing_non_associativity;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ int number_addr_bits_mat; // per port
+ int number_subbanks_decode; // per_port
+ int num_di_b_bank_per_port;
+ int num_do_b_bank_per_port;
+ int num_di_b_mat;
+ int num_do_b_mat;
+ int num_di_b_subbank;
+ int num_do_b_subbank;
+
+ int num_si_b_mat;
+ int num_so_b_mat;
+ int num_si_b_subbank;
+ int num_so_b_subbank;
+ int num_si_b_bank_per_port;
+ int num_so_b_bank_per_port;
+
+ int number_way_select_signals_mat;
+ int num_act_mats_hor_dir;
+
+ int num_act_mats_hor_dir_sl;
+ bool is_dram;
+ double V_b_sense;
+ unsigned int num_r_subarray;
+ unsigned int num_c_subarray;
+ int tag_num_r_subarray;//sheng: fully associative cache tag and data must be computed together, data and tag must be separate
+ int tag_num_c_subarray;
+ int data_num_r_subarray;
+ int data_num_c_subarray;
+ int num_mats_h_dir;
+ int num_mats_v_dir;
+ uint32_t ram_cell_tech_type;
+ double dram_refresh_period;
+
+ DynamicParameter();
+ DynamicParameter(
+ bool is_tag_,
+ int pure_ram_,
+ int pure_cam_,
+ double Nspd_,
+ unsigned int Ndwl_,
+ unsigned int Ndbl_,
+ unsigned int Ndcm_,
+ unsigned int Ndsam_lev_1_,
+ unsigned int Ndsam_lev_2_,
+ bool is_main_mem_);
+
+ int use_inp_params;
+ unsigned int num_rw_ports;
+ unsigned int num_rd_ports;
+ unsigned int num_wr_ports;
+ unsigned int num_se_rd_ports; // number of single ended read ports
+ unsigned int num_search_ports;
+ unsigned int out_w;// == nr_bits_out
+ bool is_main_mem;
+ Area cell, cam_cell;//cell is the sram_cell in both nomal cache/ram and FA.
+ bool is_valid;
+};
+
+
+
+extern InputParameter * g_ip;
+extern TechnologyParameter g_tp;
+
+#endif
+
diff --git a/ext/mcpat/cacti/router.cc b/ext/mcpat/cacti/router.cc
new file mode 100644
index 000000000..06f170691
--- /dev/null
+++ b/ext/mcpat/cacti/router.cc
@@ -0,0 +1,311 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include "router.h"
+
+Router::Router(
+ double flit_size_,
+ double vc_buf, /* vc size = vc_buffer_size * flit_size */
+ double vc_c,
+ TechnologyParameter::DeviceType *dt,
+ double I_,
+ double O_,
+ double M_
+ ):flit_size(flit_size_),
+ deviceType(dt),
+ I(I_),
+ O(O_),
+ M(M_)
+{
+ vc_buffer_size = vc_buf;
+ vc_count = vc_c;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
+ double technology = g_ip->F_sz_um;
+
+ Vdd = dt->Vdd;
+
+ /*Crossbar parameters. Transmisson gate is employed for connector*/
+ NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
+ wt = 15*technology*1e-6/2; /*track width*/
+ ht = 15*technology*1e-6/2; /*track height*/
+// I = 5; /*Number of crossbar input ports*/
+// O = 5; /*Number of crossbar output ports*/
+ NTi = 12.5*technology*1e-6/2;
+ PTi = 25*technology*1e-6/2;
+
+ NTid = 60*technology*1e-6/2; //m
+ PTid = 120*technology*1e-6/2; // m
+ NTod = 60*technology*1e-6/2; // m
+ PTod = 120*technology*1e-6/2; // m
+
+ calc_router_parameters();
+}
+
+Router::~Router(){}
+
+
+double //wire cap with triple spacing
+Router::Cw3(double length) {
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ return (wc.wire_cap(length));
+}
+
+/*Function to calculate the gate capacitance*/
+double
+Router::gate_cap(double w) {
+ return (double) gate_C (w*1e6 /*u*/, 0);
+}
+
+/*Function to calculate the diffusion capacitance*/
+double
+Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
+ double s /*number of stacking transistors*/) {
+ return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
+}
+
+
+/*crossbar related functions */
+
+// Model for simple transmission gate
+double
+Router::transmission_buf_inpcap() {
+ return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
+}
+
+double
+Router::transmission_buf_outcap() {
+ return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
+}
+
+double
+Router::transmission_buf_ctrcap() {
+ return gate_cap(NTtr)+gate_cap(PTtr);
+}
+
+double
+Router::crossbar_inpline() {
+ return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
+ gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
+}
+
+double
+Router::crossbar_outline() {
+ return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
+ gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
+}
+
+double
+Router::crossbar_ctrline() {
+ return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
+ diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
+ gate_cap(NTi) + gate_cap(PTi));
+}
+
+double
+Router::tr_crossbar_power() {
+ return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
+ crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
+}
+
+void Router::buffer_stats()
+{
+ DynamicParameter dyn_p;
+ dyn_p.is_tag = false;
+ dyn_p.pure_cam = false;
+ dyn_p.fully_assoc = false;
+ dyn_p.pure_ram = true;
+ dyn_p.is_dram = false;
+ dyn_p.is_main_mem = false;
+ dyn_p.num_subarrays = 1;
+ dyn_p.num_mats = 1;
+ dyn_p.Ndbl = 1;
+ dyn_p.Ndwl = 1;
+ dyn_p.Nspd = 1;
+ dyn_p.deg_bl_muxing = 1;
+ dyn_p.deg_senseamp_muxing_non_associativity = 1;
+ dyn_p.Ndsam_lev_1 = 1;
+ dyn_p.Ndsam_lev_2 = 1;
+ dyn_p.Ndcm = 1;
+ dyn_p.number_addr_bits_mat = 8;
+ dyn_p.number_way_select_signals_mat = 1;
+ dyn_p.number_subbanks_decode = 0;
+ dyn_p.num_act_mats_hor_dir = 1;
+ dyn_p.V_b_sense = Vdd; // FIXME check power calc.
+ dyn_p.ram_cell_tech_type = 0;
+ dyn_p.num_r_subarray = (int) vc_buffer_size;
+ dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
+ dyn_p.num_mats_h_dir = 1;
+ dyn_p.num_mats_v_dir = 1;
+ dyn_p.num_do_b_subbank = (int)flit_size;
+ dyn_p.num_di_b_subbank = (int)flit_size;
+ dyn_p.num_do_b_mat = (int) flit_size;
+ dyn_p.num_di_b_mat = (int) flit_size;
+ dyn_p.num_do_b_mat = (int) flit_size;
+ dyn_p.num_di_b_mat = (int) flit_size;
+ dyn_p.num_do_b_bank_per_port = (int) flit_size;
+ dyn_p.num_di_b_bank_per_port = (int) flit_size;
+ dyn_p.out_w = (int) flit_size;
+
+ dyn_p.use_inp_params = 1;
+ dyn_p.num_wr_ports = (unsigned int) vc_count;
+ dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
+ dyn_p.num_rw_ports = 0;
+ dyn_p.num_se_rd_ports =0;
+ dyn_p.num_search_ports =0;
+
+
+
+ dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
+ dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
+ dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
+ (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
+ dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
+
+ Mat buff(dyn_p);
+ buff.compute_delays(0);
+ buff.compute_power_energy();
+ buffer.power.readOp = buff.power.readOp;
+ buffer.power.writeOp = buffer.power.readOp; //FIXME
+ buffer.area = buff.area;
+}
+
+
+
+ void
+Router::cb_stats ()
+{
+ if (1) {
+ Crossbar c_b(I, O, flit_size);
+ c_b.compute_power();
+ crossbar.delay = c_b.delay;
+ crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
+ crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
+ crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
+ crossbar.area = c_b.area;
+// c_b.print_crossbar();
+ }
+ else {
+ crossbar.power.readOp.dynamic = tr_crossbar_power();
+ crossbar.power.readOp.leakage = flit_size * I * O *
+ cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
+ crossbar.power.readOp.gate_leakage = flit_size * I * O *
+ cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
+ }
+}
+
+void
+Router::get_router_power()
+{
+ /* calculate buffer stats */
+ buffer_stats();
+
+ /* calculate cross-bar stats */
+ cb_stats();
+
+ /* calculate arbiter stats */
+ Arbiter vcarb(vc_count, flit_size, buffer.area.w);
+ Arbiter cbarb(I, flit_size, crossbar.area.w);
+ vcarb.compute_power();
+ cbarb.compute_power();
+ arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
+ cbarb.power.readOp.dynamic * O;
+ arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
+ cbarb.power.readOp.leakage * O;
+ arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
+ cbarb.power.readOp.gate_leakage * O;
+
+// arb_stats();
+ power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
+ crossbar.power.readOp.dynamic +
+ arbiter.power.readOp.dynamic)*MIN(I, O)*M;
+ double pppm_t[4] = {1,I,I,1};
+ power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
+
+}
+
+ void
+Router::get_router_delay ()
+{
+ FREQUENCY=5; // move this to config file --TODO
+ cycle_time = (1/(double)FREQUENCY)*1e3; //ps
+ delay = 4;
+ max_cyc = 17 * g_tp.FO4; //s
+ max_cyc *= 1e12; //ps
+ if (cycle_time < max_cyc) {
+ FREQUENCY = (1/max_cyc)*1e3; //GHz
+ }
+}
+
+ void
+Router::get_router_area()
+{
+ area.h = I*buffer.area.h;
+ area.w = buffer.area.w+crossbar.area.w;
+}
+
+ void
+Router::calc_router_parameters()
+{
+ /* calculate router frequency and pipeline cycles */
+ get_router_delay();
+
+ /* router power stats */
+ get_router_power();
+
+ /* area stats */
+ get_router_area();
+}
+
+ void
+Router::print_router()
+{
+ cout << "\n\nRouter stats:\n";
+ cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
+ cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
+ cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
+ cout << "\tNo. of Virtual channels - " << vc_count << "\n";
+ cout << "\tNo. of pipeline stages - " << delay << endl;
+ cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
+ cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
+ cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
+ cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
+ cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
+ cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
+ cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
+ cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
+ cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
+ cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
+
+}
+
diff --git a/ext/mcpat/cacti/router.h b/ext/mcpat/cacti/router.h
new file mode 100644
index 000000000..72ef44939
--- /dev/null
+++ b/ext/mcpat/cacti/router.h
@@ -0,0 +1,115 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __ROUTER_H__
+#define __ROUTER_H__
+
+#include <assert.h>
+
+#include <iostream>
+
+#include "arbiter.h"
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "crossbar.h"
+#include "mat.h"
+#include "parameter.h"
+#include "wire.h"
+
+class Router : public Component
+{
+ public:
+ Router(
+ double flit_size_,
+ double vc_buf, /* vc size = vc_buffer_size * flit_size */
+ double vc_count,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global),
+ double I_ = 5,
+ double O_ = 5,
+ double M_ = 0.6);
+ ~Router();
+
+
+ void print_router();
+
+ Component arbiter, crossbar, buffer;
+
+ double cycle_time, max_cyc;
+ double flit_size;
+ double vc_count;
+ double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
+
+ private:
+ TechnologyParameter::DeviceType *deviceType;
+ double FREQUENCY; // move this to config file --TODO
+ double Cw3(double len);
+ double gate_cap(double w);
+ double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
+ enum Wire_type wtype;
+ enum Wire_placement wire_placement;
+ //corssbar
+ double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2;
+ double M; //network load
+ double transmission_buf_inpcap();
+ double transmission_buf_outcap();
+ double transmission_buf_ctrcap();
+ double crossbar_inpline();
+ double crossbar_outline();
+ double crossbar_ctrline();
+ double tr_crossbar_power();
+ void cb_stats ();
+ double arb_power();
+ void arb_stats ();
+ double buffer_params();
+ void buffer_stats();
+
+
+ //arbiter
+
+ //buffer
+
+ //router params
+ double Vdd;
+
+ void calc_router_parameters();
+ void get_router_area();
+ void get_router_power();
+ void get_router_delay();
+
+ double min_w_pmos;
+
+
+};
+
+#endif
diff --git a/ext/mcpat/cacti/subarray.cc b/ext/mcpat/cacti/subarray.cc
new file mode 100755
index 000000000..7cbf7d990
--- /dev/null
+++ b/ext/mcpat/cacti/subarray.cc
@@ -0,0 +1,196 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+#include "subarray.h"
+
+Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
+ dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
+ num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
+ cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
+{
+ //num_cols=7;
+ //cout<<"num_cols ="<< num_cols <<endl;
+ if (!(is_fa || dp.pure_cam))
+ {
+ num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
+ uint32_t ram_num_cells_wl_stitching =
+ (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
+ (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
+
+ area.h = cell.h * num_rows;
+
+ area.w = cell.w * num_cols +
+ ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
+ }
+ else //cam fa
+ {
+
+ //should not add dummy row here since the dummy row do not need decoder
+ if (is_fa)// fully associative cache
+ {
+ num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
+ num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
+ num_cols = num_cols_fa_cam + num_cols_fa_ram;
+ }
+ else
+ {
+ num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
+ num_cols_fa_ram = 0;
+ num_cols = num_cols_fa_cam;
+ }
+
+ area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
+ area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
+ + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
+ + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
+ + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
+ }
+
+ assert(area.h>0);
+ assert(area.w>0);
+ compute_C();
+}
+
+
+
+Subarray::~Subarray()
+{
+}
+
+
+
+double Subarray::get_total_cell_area()
+{
+// return (is_fa==false? cell.get_area() * num_rows * num_cols
+// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
+// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
+// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
+
+ if (!(is_fa || dp.pure_cam))
+ return (cell.get_area() * num_rows * num_cols);
+ else if (is_fa)
+ { //for FA, this area includes the dummy cells in SRAM arrays.
+ //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
+ //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
+ return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
+ }
+ else
+ return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
+
+
+}
+
+
+
+void Subarray::compute_C()
+{
+ double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
+ double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
+ double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
+ double C_b_row_drain_C;
+
+ if (dp.is_dram)
+ {
+ C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
+
+ if (dp.ram_cell_tech_type == comm_dram)
+ {
+ C_bl = num_rows * C_b_metal;
+ }
+ else
+ {
+ C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
+ C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+ }
+ }
+ else
+ {
+ if (!(is_fa ||dp.pure_cam))
+ {
+ C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
+ c_w_metal) * num_cols;
+ C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
+ C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+ }
+ else
+ {
+ //Following is wordline not matchline
+ //CAM portion
+ c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
+ r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
+ c_w_metal) * num_cols_fa_cam;
+ R_wl_cam = (r_w_metal) * num_cols_fa_cam;
+
+ if (!dp.pure_cam)
+ {
+ //RAM portion
+ c_w_metal = cell.w * g_tp.wire_local.C_per_um;
+ r_w_metal = cell.w * g_tp.wire_local.R_per_um;
+ C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
+ c_w_metal) * num_cols_fa_ram;
+ R_wl_ram = (r_w_metal) * num_cols_fa_ram;
+ }
+ else
+ {
+ C_wl_ram = R_wl_ram =0;
+ }
+ C_wl = C_wl_cam + C_wl_ram;
+ C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
+
+ R_wl = R_wl_cam + R_wl_ram;
+ R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
+
+ //there are two ways to write to a FA,
+ //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
+ //2) using separate wordline for read/write and search in RAM.
+ //We are using the second approach.
+
+ //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
+ C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
+ C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
+ C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
+ //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
+ C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
+ C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
+
+ }
+ }
+}
+
+
diff --git a/ext/mcpat/cacti/subarray.h b/ext/mcpat/cacti/subarray.h
new file mode 100755
index 000000000..5fb062420
--- /dev/null
+++ b/ext/mcpat/cacti/subarray.h
@@ -0,0 +1,70 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __SUBARRAY_H__
+#define __SUBARRAY_H__
+
+#include "area.h"
+#include "component.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+class Subarray : public Component
+{
+ public:
+ Subarray(const DynamicParameter & dp, bool is_fa_);
+ ~Subarray();
+
+ const DynamicParameter & dp;
+ double get_total_cell_area();
+ unsigned int num_rows;
+ unsigned int num_cols;
+ int32_t num_cols_fa_cam;
+ int32_t num_cols_fa_ram;
+ Area cell, cam_cell;
+
+ bool is_fa;
+ double C_wl, C_wl_cam, C_wl_ram;
+ double R_wl, R_wl_cam, R_wl_ram;
+ double C_bl, C_bl_cam;
+ private:
+
+ void compute_C(); // compute bitline and wordline capacitance
+};
+
+
+
+#endif
+
diff --git a/ext/mcpat/cacti/technology.cc b/ext/mcpat/cacti/technology.cc
new file mode 100644
index 000000000..a40c6eb44
--- /dev/null
+++ b/ext/mcpat/cacti/technology.cc
@@ -0,0 +1,2921 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#include "basic_circuit.h"
+
+#include "parameter.h"
+
+double wire_resistance(double resistivity, double wire_width, double wire_thickness,
+ double barrier_thickness, double dishing_thickness, double alpha_scatter)
+{
+ double resistance;
+ resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
+ return(resistance);
+}
+
+double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
+ double ild_thickness, double miller_value, double horiz_dielectric_constant,
+ double vert_dielectric_constant, double fringe_cap)
+{
+ double vertical_cap, sidewall_cap, total_cap;
+ vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
+ sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
+ total_cap = vertical_cap + sidewall_cap + fringe_cap;
+ return(total_cap);
+}
+
+
+void init_tech_params(double technology, bool is_tag)
+{
+ int iter, tech, tech_lo, tech_hi;
+ double curr_alpha, curr_vpp;
+ double wire_width, wire_thickness, wire_spacing,
+ fringe_cap, pmos_to_nmos_sizing_r;
+// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant;
+ double barrier_thickness, dishing_thickness, alpha_scatter;
+ double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
+
+ uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
+ uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
+
+ technology = technology * 1000.0; // in the unit of nm
+
+ // initialize parameters
+ g_tp.reset();
+ double gmp_to_gmn_multiplier_periph_global = 0;
+
+ double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
+ curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
+ curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
+ curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
+ double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
+ curr_asp_ratio_cell_cam;
+ double SENSE_AMP_D, SENSE_AMP_P; // J
+ double area_cell_dram = 0;
+ double asp_ratio_cell_dram = 0;
+ double area_cell_sram = 0;
+ double asp_ratio_cell_sram = 0;
+ double area_cell_cam = 0;
+ double asp_ratio_cell_cam = 0;
+ double mobility_eff_periph_global = 0;
+ double Vdsat_periph_global = 0;
+ double nmos_effective_resistance_multiplier;
+ double width_dram_access_transistor;
+
+ double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
+ double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
+ double curr_chip_layout_overhead = 0;
+ double curr_macro_layout_overhead = 0;
+ double curr_sckt_co_eff = 0;
+
+ if (technology < 181 && technology > 179)
+ {
+ tech_lo = 180;
+ tech_hi = 180;
+ }
+ else if (technology < 91 && technology > 89)
+ {
+ tech_lo = 90;
+ tech_hi = 90;
+ }
+ else if (technology < 66 && technology > 64)
+ {
+ tech_lo = 65;
+ tech_hi = 65;
+ }
+ else if (technology < 46 && technology > 44)
+ {
+ tech_lo = 45;
+ tech_hi = 45;
+ }
+ else if (technology < 33 && technology > 31)
+ {
+ tech_lo = 32;
+ tech_hi = 32;
+ }
+ else if (technology < 23 && technology > 21)
+ {
+ tech_lo = 22;
+ tech_hi = 22;
+ if (ram_cell_tech_type == 3 )
+ {
+ cout<<"current version does not support eDRAM technologies at 22nm"<<endl;
+ exit(0);
+ }
+ }
+// else if (technology < 17 && technology > 15)
+// {
+// tech_lo = 16;
+// tech_hi = 16;
+// }
+ else if (technology < 180 && technology > 90)
+ {
+ tech_lo = 180;
+ tech_hi = 90;
+ }
+ else if (technology < 90 && technology > 65)
+ {
+ tech_lo = 90;
+ tech_hi = 65;
+ }
+ else if (technology < 65 && technology > 45)
+ {
+ tech_lo = 65;
+ tech_hi = 45;
+ }
+ else if (technology < 45 && technology > 32)
+ {
+ tech_lo = 45;
+ tech_hi = 32;
+ }
+ else if (technology < 32 && technology > 22)
+ {
+ tech_lo = 32;
+ tech_hi = 22;
+ }
+// else if (technology < 22 && technology > 16)
+// {
+// tech_lo = 22;
+// tech_hi = 16;
+// }
+ else
+ {
+ cout<<"Invalid technology nodes"<<endl;
+ exit(0);
+ }
+
+ double vdd[NUMBER_TECH_FLAVORS];
+ double Lphy[NUMBER_TECH_FLAVORS];
+ double Lelec[NUMBER_TECH_FLAVORS];
+ double t_ox[NUMBER_TECH_FLAVORS];
+ double v_th[NUMBER_TECH_FLAVORS];
+ double c_ox[NUMBER_TECH_FLAVORS];
+ double mobility_eff[NUMBER_TECH_FLAVORS];
+ double Vdsat[NUMBER_TECH_FLAVORS];
+ double c_g_ideal[NUMBER_TECH_FLAVORS];
+ double c_fringe[NUMBER_TECH_FLAVORS];
+ double c_junc[NUMBER_TECH_FLAVORS];
+ double I_on_n[NUMBER_TECH_FLAVORS];
+ double I_on_p[NUMBER_TECH_FLAVORS];
+ double Rnchannelon[NUMBER_TECH_FLAVORS];
+ double Rpchannelon[NUMBER_TECH_FLAVORS];
+ double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
+ double I_off_n[NUMBER_TECH_FLAVORS][101];
+ double I_g_on_n[NUMBER_TECH_FLAVORS][101];
+ //double I_off_p[NUMBER_TECH_FLAVORS][101];
+ double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
+ //double curr_sckt_co_eff[NUMBER_TECH_FLAVORS];
+ double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
+
+ for (iter = 0; iter <= 1; ++iter)
+ {
+ // linear interpolation
+ if (iter == 0)
+ {
+ tech = tech_lo;
+ if (tech_lo == tech_hi)
+ {
+ curr_alpha = 1;
+ }
+ else
+ {
+ curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
+ }
+ }
+ else
+ {
+ tech = tech_hi;
+ if (tech_lo == tech_hi)
+ {
+ break;
+ }
+ else
+ {
+ curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
+ }
+ }
+
+ if (tech == 180)
+ {
+ //180nm technology-node. Corresponds to year 1999 in ITRS
+ //Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king
+ //MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input
+ bool Aggre_proj = false;
+ SENSE_AMP_D = .28e-9; // s
+ SENSE_AMP_P = 14.7e-15; // J
+ vdd[0] = 1.5;
+ Lphy[0] = 0.12;//Lphy is the physical gate-length. micron
+ Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron
+ t_ox[0] = 1.2e-3*(Aggre_proj? 1.9/1.2:2);//micron
+ v_th[0] = Aggre_proj? 0.36 : 0.4407;//V
+ c_ox[0] = 1.79e-14*(Aggre_proj? 1.9/1.2:2);//F/micron2
+ mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 0.128*2; //V
+ c_g_ideal[0] = (Aggre_proj? 1.9/1.2:2)*6.64e-16;//F/micron
+ c_fringe[0] = (Aggre_proj? 1.9/1.2:2)*0.08e-15;//F/micron
+ c_junc[0] = (Aggre_proj? 1.9/1.2:2)*1e-15;//F/micron2
+ I_on_n[0] = 750e-6;//A/micron
+ I_on_p[0] = 350e-6;//A/micron
+ //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
+ nmos_effective_resistance_multiplier = 1.54;
+ n_to_p_eff_curr_drv_ratio[0] = 2.45;
+ gmp_to_gmn_multiplier[0] = 1.22;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1;
+ I_off_n[0][0] = 7e-10;//A/micron
+ I_off_n[0][10] = 8.26e-10;
+ I_off_n[0][20] = 9.74e-10;
+ I_off_n[0][30] = 1.15e-9;
+ I_off_n[0][40] = 1.35e-9;
+ I_off_n[0][50] = 1.60e-9;
+ I_off_n[0][60] = 1.88e-9;
+ I_off_n[0][70] = 2.29e-9;
+ I_off_n[0][80] = 2.70e-9;
+ I_off_n[0][90] = 3.19e-9;
+ I_off_n[0][100] = 3.76e-9;
+
+ I_g_on_n[0][0] = 1.65e-10;//A/micron
+ I_g_on_n[0][10] = 1.65e-10;
+ I_g_on_n[0][20] = 1.65e-10;
+ I_g_on_n[0][30] = 1.65e-10;
+ I_g_on_n[0][40] = 1.65e-10;
+ I_g_on_n[0][50] = 1.65e-10;
+ I_g_on_n[0][60] = 1.65e-10;
+ I_g_on_n[0][70] = 1.65e-10;
+ I_g_on_n[0][80] = 1.65e-10;
+ I_g_on_n[0][90] = 1.65e-10;
+ I_g_on_n[0][100] = 1.65e-10;
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
+ curr_asp_ratio_cell_cam = 2.92;//2.5
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm
+ curr_core_tx_density = 1.25*0.7*0.7*0.4;
+ curr_sckt_co_eff = 1.11;
+ curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb
+
+ }
+
+ if (tech == 90)
+ {
+ SENSE_AMP_D = .28e-9; // s
+ SENSE_AMP_P = 14.7e-15; // J
+ //90nm technology-node. Corresponds to year 2004 in ITRS
+ //ITRS HP device type
+ vdd[0] = 1.2;
+ Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
+ Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
+ t_ox[0] = 1.2e-3;//micron
+ v_th[0] = 0.23707;//V
+ c_ox[0] = 1.79e-14;//F/micron2
+ mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 0.128; //V
+ c_g_ideal[0] = 6.64e-16;//F/micron
+ c_fringe[0] = 0.08e-15;//F/micron
+ c_junc[0] = 1e-15;//F/micron2
+ I_on_n[0] = 1076.9e-6;//A/micron
+ I_on_p[0] = 712.6e-6;//A/micron
+ //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
+ nmos_effective_resistance_multiplier = 1.54;
+ n_to_p_eff_curr_drv_ratio[0] = 2.45;
+ gmp_to_gmn_multiplier[0] = 1.22;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1;
+ I_off_n[0][0] = 3.24e-8;//A/micron
+ I_off_n[0][10] = 4.01e-8;
+ I_off_n[0][20] = 4.90e-8;
+ I_off_n[0][30] = 5.92e-8;
+ I_off_n[0][40] = 7.08e-8;
+ I_off_n[0][50] = 8.38e-8;
+ I_off_n[0][60] = 9.82e-8;
+ I_off_n[0][70] = 1.14e-7;
+ I_off_n[0][80] = 1.29e-7;
+ I_off_n[0][90] = 1.43e-7;
+ I_off_n[0][100] = 1.54e-7;
+
+ I_g_on_n[0][0] = 1.65e-8;//A/micron
+ I_g_on_n[0][10] = 1.65e-8;
+ I_g_on_n[0][20] = 1.65e-8;
+ I_g_on_n[0][30] = 1.65e-8;
+ I_g_on_n[0][40] = 1.65e-8;
+ I_g_on_n[0][50] = 1.65e-8;
+ I_g_on_n[0][60] = 1.65e-8;
+ I_g_on_n[0][70] = 1.65e-8;
+ I_g_on_n[0][80] = 1.65e-8;
+ I_g_on_n[0][90] = 1.65e-8;
+ I_g_on_n[0][100] = 1.65e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.3;
+ Lphy[1] = 0.075;
+ Lelec[1] = 0.0486;
+ t_ox[1] = 2.2e-3;
+ v_th[1] = 0.48203;
+ c_ox[1] = 1.22e-14;
+ mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 0.373;
+ c_g_ideal[1] = 9.15e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 503.6e-6;
+ I_on_p[1] = 235.1e-6;
+ nmos_effective_resistance_multiplier = 1.92;
+ n_to_p_eff_curr_drv_ratio[1] = 2.44;
+ gmp_to_gmn_multiplier[1] =0.88;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1;
+ I_off_n[1][0] = 2.81e-12;
+ I_off_n[1][10] = 4.76e-12;
+ I_off_n[1][20] = 7.82e-12;
+ I_off_n[1][30] = 1.25e-11;
+ I_off_n[1][40] = 1.94e-11;
+ I_off_n[1][50] = 2.94e-11;
+ I_off_n[1][60] = 4.36e-11;
+ I_off_n[1][70] = 6.32e-11;
+ I_off_n[1][80] = 8.95e-11;
+ I_off_n[1][90] = 1.25e-10;
+ I_off_n[1][100] = 1.7e-10;
+
+ I_g_on_n[1][0] = 3.87e-11;//A/micron
+ I_g_on_n[1][10] = 3.87e-11;
+ I_g_on_n[1][20] = 3.87e-11;
+ I_g_on_n[1][30] = 3.87e-11;
+ I_g_on_n[1][40] = 3.87e-11;
+ I_g_on_n[1][50] = 3.87e-11;
+ I_g_on_n[1][60] = 3.87e-11;
+ I_g_on_n[1][70] = 3.87e-11;
+ I_g_on_n[1][80] = 3.87e-11;
+ I_g_on_n[1][90] = 3.87e-11;
+ I_g_on_n[1][100] = 3.87e-11;
+
+ //ITRS LOP device type
+ vdd[2] = 0.9;
+ Lphy[2] = 0.053;
+ Lelec[2] = 0.0354;
+ t_ox[2] = 1.5e-3;
+ v_th[2] = 0.30764;
+ c_ox[2] = 1.59e-14;
+ mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 0.113;
+ c_g_ideal[2] = 8.45e-16;
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 386.6e-6;
+ I_on_p[2] = 209.7e-6;
+ nmos_effective_resistance_multiplier = 1.77;
+ n_to_p_eff_curr_drv_ratio[2] = 2.54;
+ gmp_to_gmn_multiplier[2] = 0.98;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1;
+ I_off_n[2][0] = 2.14e-9;
+ I_off_n[2][10] = 2.9e-9;
+ I_off_n[2][20] = 3.87e-9;
+ I_off_n[2][30] = 5.07e-9;
+ I_off_n[2][40] = 6.54e-9;
+ I_off_n[2][50] = 8.27e-8;
+ I_off_n[2][60] = 1.02e-7;
+ I_off_n[2][70] = 1.20e-7;
+ I_off_n[2][80] = 1.36e-8;
+ I_off_n[2][90] = 1.52e-8;
+ I_off_n[2][100] = 1.73e-8;
+
+ I_g_on_n[2][0] = 4.31e-8;//A/micron
+ I_g_on_n[2][10] = 4.31e-8;
+ I_g_on_n[2][20] = 4.31e-8;
+ I_g_on_n[2][30] = 4.31e-8;
+ I_g_on_n[2][40] = 4.31e-8;
+ I_g_on_n[2][50] = 4.31e-8;
+ I_g_on_n[2][60] = 4.31e-8;
+ I_g_on_n[2][70] = 4.31e-8;
+ I_g_on_n[2][80] = 4.31e-8;
+ I_g_on_n[2][90] = 4.31e-8;
+ I_g_on_n[2][100] = 4.31e-8;
+
+ if (ram_cell_tech_type == lp_dram)
+ {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.2;
+ Lphy[3] = 0.12;
+ Lelec[3] = 0.0756;
+ curr_v_th_dram_access_transistor = 0.4545;
+ width_dram_access_transistor = 0.14;
+ curr_I_on_dram_cell = 45e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 0.168;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.6;
+ t_ox[3] = 2.2e-3;
+ v_th[3] = 0.4545;
+ c_ox[3] = 1.22e-14;
+ mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.3;
+ c_g_ideal[3] = 1.47e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 321.6e-6;
+ I_on_p[3] = 203.3e-6;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.42e-11;
+ I_off_n[3][10] = 2.25e-11;
+ I_off_n[3][20] = 3.46e-11;
+ I_off_n[3][30] = 5.18e-11;
+ I_off_n[3][40] = 7.58e-11;
+ I_off_n[3][50] = 1.08e-10;
+ I_off_n[3][60] = 1.51e-10;
+ I_off_n[3][70] = 2.02e-10;
+ I_off_n[3][80] = 2.57e-10;
+ I_off_n[3][90] = 3.14e-10;
+ I_off_n[3][100] = 3.85e-10;
+ }
+ else if (ram_cell_tech_type == comm_dram)
+ {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.6;
+ Lphy[3] = 0.09;
+ Lelec[3] = 0.0576;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.09;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6*0.09*0.09;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 3.7;
+ t_ox[3] = 5.5e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 5.65e-15;
+ mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.32;
+ c_g_ideal[3] = 5.08e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1094.3e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.62;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 5.80e-15;
+ I_off_n[3][10] = 1.21e-14;
+ I_off_n[3][20] = 2.42e-14;
+ I_off_n[3][30] = 4.65e-14;
+ I_off_n[3][40] = 8.60e-14;
+ I_off_n[3][50] = 1.54e-13;
+ I_off_n[3][60] = 2.66e-13;
+ I_off_n[3][70] = 4.45e-13;
+ I_off_n[3][80] = 7.17e-13;
+ I_off_n[3][90] = 1.11e-12;
+ I_off_n[3][100] = 1.67e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
+ curr_asp_ratio_cell_cam = 2.92;//2.5
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 1;
+ curr_core_tx_density = 1.25*0.7*0.7;
+ curr_sckt_co_eff = 1.1539;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+
+
+ }
+
+ if (tech == 65)
+ { //65nm technology-node. Corresponds to year 2007 in ITRS
+ //ITRS HP device type
+ SENSE_AMP_D = .2e-9; // s
+ SENSE_AMP_P = 5.7e-15; // J
+ vdd[0] = 1.1;
+ Lphy[0] = 0.025;
+ Lelec[0] = 0.019;
+ t_ox[0] = 1.1e-3;
+ v_th[0] = .19491;
+ c_ox[0] = 1.88e-14;
+ mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 7.71e-2;
+ c_g_ideal[0] = 4.69e-16;
+ c_fringe[0] = 0.077e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 1197.2e-6;
+ I_on_p[0] = 870.8e-6;
+ nmos_effective_resistance_multiplier = 1.50;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
+ long_channel_leakage_reduction[0] = 1/3.74;
+ //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
+ //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
+ I_off_n[0][0] = 1.96e-7;
+ I_off_n[0][10] = 2.29e-7;
+ I_off_n[0][20] = 2.66e-7;
+ I_off_n[0][30] = 3.05e-7;
+ I_off_n[0][40] = 3.49e-7;
+ I_off_n[0][50] = 3.95e-7;
+ I_off_n[0][60] = 4.45e-7;
+ I_off_n[0][70] = 4.97e-7;
+ I_off_n[0][80] = 5.48e-7;
+ I_off_n[0][90] = 5.94e-7;
+ I_off_n[0][100] = 6.3e-7;
+ I_g_on_n[0][0] = 4.09e-8;//A/micron
+ I_g_on_n[0][10] = 4.09e-8;
+ I_g_on_n[0][20] = 4.09e-8;
+ I_g_on_n[0][30] = 4.09e-8;
+ I_g_on_n[0][40] = 4.09e-8;
+ I_g_on_n[0][50] = 4.09e-8;
+ I_g_on_n[0][60] = 4.09e-8;
+ I_g_on_n[0][70] = 4.09e-8;
+ I_g_on_n[0][80] = 4.09e-8;
+ I_g_on_n[0][90] = 4.09e-8;
+ I_g_on_n[0][100] = 4.09e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.2;
+ Lphy[1] = 0.045;
+ Lelec[1] = 0.0298;
+ t_ox[1] = 1.9e-3;
+ v_th[1] = 0.52354;
+ c_ox[1] = 1.36e-14;
+ mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 0.128;
+ c_g_ideal[1] = 6.14e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 519.2e-6;
+ I_on_p[1] = 266e-6;
+ nmos_effective_resistance_multiplier = 1.96;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1/2.82;
+ I_off_n[1][0] = 9.12e-12;
+ I_off_n[1][10] = 1.49e-11;
+ I_off_n[1][20] = 2.36e-11;
+ I_off_n[1][30] = 3.64e-11;
+ I_off_n[1][40] = 5.48e-11;
+ I_off_n[1][50] = 8.05e-11;
+ I_off_n[1][60] = 1.15e-10;
+ I_off_n[1][70] = 1.59e-10;
+ I_off_n[1][80] = 2.1e-10;
+ I_off_n[1][90] = 2.62e-10;
+ I_off_n[1][100] = 3.21e-10;
+
+ I_g_on_n[1][0] = 1.09e-10;//A/micron
+ I_g_on_n[1][10] = 1.09e-10;
+ I_g_on_n[1][20] = 1.09e-10;
+ I_g_on_n[1][30] = 1.09e-10;
+ I_g_on_n[1][40] = 1.09e-10;
+ I_g_on_n[1][50] = 1.09e-10;
+ I_g_on_n[1][60] = 1.09e-10;
+ I_g_on_n[1][70] = 1.09e-10;
+ I_g_on_n[1][80] = 1.09e-10;
+ I_g_on_n[1][90] = 1.09e-10;
+ I_g_on_n[1][100] = 1.09e-10;
+
+ //ITRS LOP device type
+ vdd[2] = 0.8;
+ Lphy[2] = 0.032;
+ Lelec[2] = 0.0216;
+ t_ox[2] = 1.2e-3;
+ v_th[2] = 0.28512;
+ c_ox[2] = 1.87e-14;
+ mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 0.292;
+ c_g_ideal[2] = 6e-16;
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 573.1e-6;
+ I_on_p[2] = 340.6e-6;
+ nmos_effective_resistance_multiplier = 1.82;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1/2.05;
+ I_off_n[2][0] = 4.9e-9;
+ I_off_n[2][10] = 6.49e-9;
+ I_off_n[2][20] = 8.45e-9;
+ I_off_n[2][30] = 1.08e-8;
+ I_off_n[2][40] = 1.37e-8;
+ I_off_n[2][50] = 1.71e-8;
+ I_off_n[2][60] = 2.09e-8;
+ I_off_n[2][70] = 2.48e-8;
+ I_off_n[2][80] = 2.84e-8;
+ I_off_n[2][90] = 3.13e-8;
+ I_off_n[2][100] = 3.42e-8;
+
+ I_g_on_n[2][0] = 9.61e-9;//A/micron
+ I_g_on_n[2][10] = 9.61e-9;
+ I_g_on_n[2][20] = 9.61e-9;
+ I_g_on_n[2][30] = 9.61e-9;
+ I_g_on_n[2][40] = 9.61e-9;
+ I_g_on_n[2][50] = 9.61e-9;
+ I_g_on_n[2][60] = 9.61e-9;
+ I_g_on_n[2][70] = 9.61e-9;
+ I_g_on_n[2][80] = 9.61e-9;
+ I_g_on_n[2][90] = 9.61e-9;
+ I_g_on_n[2][100] = 9.61e-9;
+
+ if (ram_cell_tech_type == lp_dram)
+ {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.2;
+ Lphy[3] = 0.12;
+ Lelec[3] = 0.0756;
+ curr_v_th_dram_access_transistor = 0.43806;
+ width_dram_access_transistor = 0.09;
+ curr_I_on_dram_cell = 36e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 0.11;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.6;
+ t_ox[3] = 2.2e-3;
+ v_th[3] = 0.43806;
+ c_ox[3] = 1.22e-14;
+ mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.43806;
+ c_g_ideal[3] = 1.46e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15 ;
+ I_on_n[3] = 399.8e-6;
+ I_on_p[3] = 243.4e-6;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 2.23e-11;
+ I_off_n[3][10] = 3.46e-11;
+ I_off_n[3][20] = 5.24e-11;
+ I_off_n[3][30] = 7.75e-11;
+ I_off_n[3][40] = 1.12e-10;
+ I_off_n[3][50] = 1.58e-10;
+ I_off_n[3][60] = 2.18e-10;
+ I_off_n[3][70] = 2.88e-10;
+ I_off_n[3][80] = 3.63e-10;
+ I_off_n[3][90] = 4.41e-10;
+ I_off_n[3][100] = 5.36e-10;
+ }
+ else if (ram_cell_tech_type == comm_dram)
+ {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.3;
+ Lphy[3] = 0.065;
+ Lelec[3] = 0.0426;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.065;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6*0.065*0.065;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 3.3;
+ t_ox[3] = 5e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 6.16e-15;
+ mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.385;
+ c_g_ideal[3] = 4e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15 ;
+ I_on_n[3] = 1031e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 2.39;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.80e-14;
+ I_off_n[3][10] = 3.64e-14;
+ I_off_n[3][20] = 7.03e-14;
+ I_off_n[3][30] = 1.31e-13;
+ I_off_n[3][40] = 2.35e-13;
+ I_off_n[3][50] = 4.09e-13;
+ I_off_n[3][60] = 6.89e-13;
+ I_off_n[3][70] = 1.13e-12;
+ I_off_n[3][80] = 1.78e-12;
+ I_off_n[3][90] = 2.71e-12;
+ I_off_n[3][100] = 3.99e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor
+ curr_core_tx_density = 1.25*0.7;
+ curr_sckt_co_eff = 1.1359;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
+
+ if (tech == 45)
+ { //45nm technology-node. Corresponds to year 2010 in ITRS
+ //ITRS HP device type
+ SENSE_AMP_D = .04e-9; // s
+ SENSE_AMP_P = 2.7e-15; // J
+ vdd[0] = 1.0;
+ Lphy[0] = 0.018;
+ Lelec[0] = 0.01345;
+ t_ox[0] = 0.65e-3;
+ v_th[0] = .18035;
+ c_ox[0] = 3.77e-14;
+ mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 9.38E-2;
+ c_g_ideal[0] = 6.78e-16;
+ c_fringe[0] = 0.05e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 2046.6e-6;
+ //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
+ //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
+ I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
+ nmos_effective_resistance_multiplier = 1.51;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
+ long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
+ I_off_n[0][0] = 2.8e-7;
+ I_off_n[0][10] = 3.28e-7;
+ I_off_n[0][20] = 3.81e-7;
+ I_off_n[0][30] = 4.39e-7;
+ I_off_n[0][40] = 5.02e-7;
+ I_off_n[0][50] = 5.69e-7;
+ I_off_n[0][60] = 6.42e-7;
+ I_off_n[0][70] = 7.2e-7;
+ I_off_n[0][80] = 8.03e-7;
+ I_off_n[0][90] = 8.91e-7;
+ I_off_n[0][100] = 9.84e-7;
+
+ I_g_on_n[0][0] = 3.59e-8;//A/micron
+ I_g_on_n[0][10] = 3.59e-8;
+ I_g_on_n[0][20] = 3.59e-8;
+ I_g_on_n[0][30] = 3.59e-8;
+ I_g_on_n[0][40] = 3.59e-8;
+ I_g_on_n[0][50] = 3.59e-8;
+ I_g_on_n[0][60] = 3.59e-8;
+ I_g_on_n[0][70] = 3.59e-8;
+ I_g_on_n[0][80] = 3.59e-8;
+ I_g_on_n[0][90] = 3.59e-8;
+ I_g_on_n[0][100] = 3.59e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.1;
+ Lphy[1] = 0.028;
+ Lelec[1] = 0.0212;
+ t_ox[1] = 1.4e-3;
+ v_th[1] = 0.50245;
+ c_ox[1] = 2.01e-14;
+ mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 9.12e-2;
+ c_g_ideal[1] = 5.18e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 666.2e-6;
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1/2.08;
+ I_off_n[1][0] = 1.01e-11;
+ I_off_n[1][10] = 1.65e-11;
+ I_off_n[1][20] = 2.62e-11;
+ I_off_n[1][30] = 4.06e-11;
+ I_off_n[1][40] = 6.12e-11;
+ I_off_n[1][50] = 9.02e-11;
+ I_off_n[1][60] = 1.3e-10;
+ I_off_n[1][70] = 1.83e-10;
+ I_off_n[1][80] = 2.51e-10;
+ I_off_n[1][90] = 3.29e-10;
+ I_off_n[1][100] = 4.1e-10;
+
+ I_g_on_n[1][0] = 9.47e-12;//A/micron
+ I_g_on_n[1][10] = 9.47e-12;
+ I_g_on_n[1][20] = 9.47e-12;
+ I_g_on_n[1][30] = 9.47e-12;
+ I_g_on_n[1][40] = 9.47e-12;
+ I_g_on_n[1][50] = 9.47e-12;
+ I_g_on_n[1][60] = 9.47e-12;
+ I_g_on_n[1][70] = 9.47e-12;
+ I_g_on_n[1][80] = 9.47e-12;
+ I_g_on_n[1][90] = 9.47e-12;
+ I_g_on_n[1][100] = 9.47e-12;
+
+ //ITRS LOP device type
+ vdd[2] = 0.7;
+ Lphy[2] = 0.022;
+ Lelec[2] = 0.016;
+ t_ox[2] = 0.9e-3;
+ v_th[2] = 0.22599;
+ c_ox[2] = 2.82e-14;//F/micron2
+ mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 5.71e-2;
+ c_g_ideal[2] = 6.2e-16;
+ c_fringe[2] = 0.073e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 748.9e-6;
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.76;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1/1.92;
+ I_off_n[2][0] = 4.03e-9;
+ I_off_n[2][10] = 5.02e-9;
+ I_off_n[2][20] = 6.18e-9;
+ I_off_n[2][30] = 7.51e-9;
+ I_off_n[2][40] = 9.04e-9;
+ I_off_n[2][50] = 1.08e-8;
+ I_off_n[2][60] = 1.27e-8;
+ I_off_n[2][70] = 1.47e-8;
+ I_off_n[2][80] = 1.66e-8;
+ I_off_n[2][90] = 1.84e-8;
+ I_off_n[2][100] = 2.03e-8;
+
+ I_g_on_n[2][0] = 3.24e-8;//A/micron
+ I_g_on_n[2][10] = 4.01e-8;
+ I_g_on_n[2][20] = 4.90e-8;
+ I_g_on_n[2][30] = 5.92e-8;
+ I_g_on_n[2][40] = 7.08e-8;
+ I_g_on_n[2][50] = 8.38e-8;
+ I_g_on_n[2][60] = 9.82e-8;
+ I_g_on_n[2][70] = 1.14e-7;
+ I_g_on_n[2][80] = 1.29e-7;
+ I_g_on_n[2][90] = 1.43e-7;
+ I_g_on_n[2][100] = 1.54e-7;
+
+ if (ram_cell_tech_type == lp_dram)
+ {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.1;
+ Lphy[3] = 0.078;
+ Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 0.44559;
+ width_dram_access_transistor = 0.079;
+ curr_I_on_dram_cell = 36e-6;//A
+ curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.5;
+ t_ox[3] = 2.1e-3;
+ v_th[3] = 0.44559;
+ c_ox[3] = 1.41e-14;
+ mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.181;
+ c_g_ideal[3] = 1.10e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 456e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 2.54e-11;
+ I_off_n[3][10] = 3.94e-11;
+ I_off_n[3][20] = 5.95e-11;
+ I_off_n[3][30] = 8.79e-11;
+ I_off_n[3][40] = 1.27e-10;
+ I_off_n[3][50] = 1.79e-10;
+ I_off_n[3][60] = 2.47e-10;
+ I_off_n[3][70] = 3.31e-10;
+ I_off_n[3][80] = 4.26e-10;
+ I_off_n[3][90] = 5.27e-10;
+ I_off_n[3][100] = 6.46e-10;
+ }
+ else if (ram_cell_tech_type == comm_dram)
+ {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.1;
+ Lphy[3] = 0.045;
+ Lelec[3] = 0.0298;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.045;
+ curr_I_on_dram_cell = 20e-6;//A
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6*0.045*0.045;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 2.7;
+ t_ox[3] = 4e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 7.98e-15;
+ mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.147;
+ c_g_ideal[3] = 3.59e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 999.4e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.31e-14;
+ I_off_n[3][10] = 2.68e-14;
+ I_off_n[3][20] = 5.25e-14;
+ I_off_n[3][30] = 9.88e-14;
+ I_off_n[3][40] = 1.79e-13;
+ I_off_n[3][50] = 3.15e-13;
+ I_off_n[3][60] = 5.36e-13;
+ I_off_n[3][70] = 8.86e-13;
+ I_off_n[3][80] = 1.42e-12;
+ I_off_n[3][90] = 2.20e-12;
+ I_off_n[3][100] = 3.29e-12;
+ }
+
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7*0.7;
+ curr_core_tx_density = 1.25;
+ curr_sckt_co_eff = 1.1387;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
+
+ if (tech == 32)
+ {
+ SENSE_AMP_D = .03e-9; // s
+ SENSE_AMP_P = 2.16e-15; // J
+ //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
+ //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
+ //HP and LSTP.
+ vdd[0] = 0.9;
+ Lphy[0] = 0.013;
+ Lelec[0] = 0.01013;
+ t_ox[0] = 0.5e-3;
+ v_th[0] = 0.21835;
+ c_ox[0] = 4.11e-14;
+ mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 5.09E-2;
+ c_g_ideal[0] = 5.34e-16;
+ c_fringe[0] = 0.04e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 2211.7e-6;
+ I_on_p[0] = I_on_n[0] / 2;
+ nmos_effective_resistance_multiplier = 1.49;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1/3.706;
+ //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
+ //whichever comes first
+ I_off_n[0][0] = 1.52e-7;
+ I_off_n[0][10] = 1.55e-7;
+ I_off_n[0][20] = 1.59e-7;
+ I_off_n[0][30] = 1.68e-7;
+ I_off_n[0][40] = 1.90e-7;
+ I_off_n[0][50] = 2.69e-7;
+ I_off_n[0][60] = 5.32e-7;
+ I_off_n[0][70] = 1.02e-6;
+ I_off_n[0][80] = 1.62e-6;
+ I_off_n[0][90] = 2.73e-6;
+ I_off_n[0][100] = 6.1e-6;
+
+ I_g_on_n[0][0] = 6.55e-8;//A/micron
+ I_g_on_n[0][10] = 6.55e-8;
+ I_g_on_n[0][20] = 6.55e-8;
+ I_g_on_n[0][30] = 6.55e-8;
+ I_g_on_n[0][40] = 6.55e-8;
+ I_g_on_n[0][50] = 6.55e-8;
+ I_g_on_n[0][60] = 6.55e-8;
+ I_g_on_n[0][70] = 6.55e-8;
+ I_g_on_n[0][80] = 6.55e-8;
+ I_g_on_n[0][90] = 6.55e-8;
+ I_g_on_n[0][100] = 6.55e-8;
+
+// 32 DG
+// I_g_on_n[0][0] = 2.71e-9;//A/micron
+// I_g_on_n[0][10] = 2.71e-9;
+// I_g_on_n[0][20] = 2.71e-9;
+// I_g_on_n[0][30] = 2.71e-9;
+// I_g_on_n[0][40] = 2.71e-9;
+// I_g_on_n[0][50] = 2.71e-9;
+// I_g_on_n[0][60] = 2.71e-9;
+// I_g_on_n[0][70] = 2.71e-9;
+// I_g_on_n[0][80] = 2.71e-9;
+// I_g_on_n[0][90] = 2.71e-9;
+// I_g_on_n[0][100] = 2.71e-9;
+
+ //LSTP device type
+ vdd[1] = 1;
+ Lphy[1] = 0.020;
+ Lelec[1] = 0.0173;
+ t_ox[1] = 1.2e-3;
+ v_th[1] = 0.513;
+ c_ox[1] = 2.29e-14;
+ mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 8.64e-2;
+ c_g_ideal[1] = 4.58e-16;
+ c_fringe[1] = 0.053e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 683.6e-6;
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1/1.93;
+ I_off_n[1][0] = 2.06e-11;
+ I_off_n[1][10] = 3.30e-11;
+ I_off_n[1][20] = 5.15e-11;
+ I_off_n[1][30] = 7.83e-11;
+ I_off_n[1][40] = 1.16e-10;
+ I_off_n[1][50] = 1.69e-10;
+ I_off_n[1][60] = 2.40e-10;
+ I_off_n[1][70] = 3.34e-10;
+ I_off_n[1][80] = 4.54e-10;
+ I_off_n[1][90] = 5.96e-10;
+ I_off_n[1][100] = 7.44e-10;
+
+ I_g_on_n[1][0] = 3.73e-11;//A/micron
+ I_g_on_n[1][10] = 3.73e-11;
+ I_g_on_n[1][20] = 3.73e-11;
+ I_g_on_n[1][30] = 3.73e-11;
+ I_g_on_n[1][40] = 3.73e-11;
+ I_g_on_n[1][50] = 3.73e-11;
+ I_g_on_n[1][60] = 3.73e-11;
+ I_g_on_n[1][70] = 3.73e-11;
+ I_g_on_n[1][80] = 3.73e-11;
+ I_g_on_n[1][90] = 3.73e-11;
+ I_g_on_n[1][100] = 3.73e-11;
+
+
+ //LOP device type
+ vdd[2] = 0.6;
+ Lphy[2] = 0.016;
+ Lelec[2] = 0.01232;
+ t_ox[2] = 0.9e-3;
+ v_th[2] = 0.24227;
+ c_ox[2] = 2.84e-14;
+ mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 4.64e-2;
+ c_g_ideal[2] = 4.54e-16;
+ c_fringe[2] = 0.057e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 827.8e-6;
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.73;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1/1.89;
+ I_off_n[2][0] = 5.94e-8;
+ I_off_n[2][10] = 7.23e-8;
+ I_off_n[2][20] = 8.7e-8;
+ I_off_n[2][30] = 1.04e-7;
+ I_off_n[2][40] = 1.22e-7;
+ I_off_n[2][50] = 1.43e-7;
+ I_off_n[2][60] = 1.65e-7;
+ I_off_n[2][70] = 1.90e-7;
+ I_off_n[2][80] = 2.15e-7;
+ I_off_n[2][90] = 2.39e-7;
+ I_off_n[2][100] = 2.63e-7;
+
+ I_g_on_n[2][0] = 2.93e-9;//A/micron
+ I_g_on_n[2][10] = 2.93e-9;
+ I_g_on_n[2][20] = 2.93e-9;
+ I_g_on_n[2][30] = 2.93e-9;
+ I_g_on_n[2][40] = 2.93e-9;
+ I_g_on_n[2][50] = 2.93e-9;
+ I_g_on_n[2][60] = 2.93e-9;
+ I_g_on_n[2][70] = 2.93e-9;
+ I_g_on_n[2][80] = 2.93e-9;
+ I_g_on_n[2][90] = 2.93e-9;
+ I_g_on_n[2][100] = 2.93e-9;
+
+ if (ram_cell_tech_type == lp_dram)
+ {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.0;
+ Lphy[3] = 0.056;
+ Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 0.44129;
+ width_dram_access_transistor = 0.056;
+ curr_I_on_dram_cell = 36e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.5;
+ t_ox[3] = 2e-3;
+ v_th[3] = 0.44467;
+ c_ox[3] = 1.48e-14;
+ mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.174;
+ c_g_ideal[3] = 7.45e-16;
+ c_fringe[3] = 0.053e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1055.4e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 3.57e-11;
+ I_off_n[3][10] = 5.51e-11;
+ I_off_n[3][20] = 8.27e-11;
+ I_off_n[3][30] = 1.21e-10;
+ I_off_n[3][40] = 1.74e-10;
+ I_off_n[3][50] = 2.45e-10;
+ I_off_n[3][60] = 3.38e-10;
+ I_off_n[3][70] = 4.53e-10;
+ I_off_n[3][80] = 5.87e-10;
+ I_off_n[3][90] = 7.29e-10;
+ I_off_n[3][100] = 8.87e-10;
+ }
+ else if (ram_cell_tech_type == comm_dram)
+ {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.0;
+ Lphy[3] = 0.032;
+ Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.032;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6*0.032*0.032;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 2.6;
+ t_ox[3] = 4e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 7.99e-15;
+ mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.129;
+ c_g_ideal[3] = 2.56e-16;
+ c_fringe[3] = 0.053e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1024.5e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 3.63e-14;
+ I_off_n[3][10] = 7.18e-14;
+ I_off_n[3][20] = 1.36e-13;
+ I_off_n[3][30] = 2.49e-13;
+ I_off_n[3][40] = 4.41e-13;
+ I_off_n[3][50] = 7.55e-13;
+ I_off_n[3][60] = 1.26e-12;
+ I_off_n[3][70] = 2.03e-12;
+ I_off_n[3][80] = 3.19e-12;
+ I_off_n[3][90] = 4.87e-12;
+ I_off_n[3][100] = 7.16e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7*0.7*0.7;
+ curr_core_tx_density = 1.25/0.7;
+ curr_sckt_co_eff = 1.1111;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
+
+ if(tech == 22){
+ SENSE_AMP_D = .03e-9; // s
+ SENSE_AMP_P = 2.16e-15; // J
+ //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
+ //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
+ //22 nm HP
+ vdd[0] = 0.8;
+ Lphy[0] = 0.009;//Lphy is the physical gate-length.
+ Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
+ t_ox[0] = 0.55e-3;//micron
+ v_th[0] = 0.1395;//V
+ c_ox[0] = 3.63e-14;//F/micron2
+ mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 2.33e-2; //V/micron
+ c_g_ideal[0] = 3.27e-16;//F/micron
+ c_fringe[0] = 0.06e-15;//F/micron
+ c_junc[0] = 0;//F/micron2
+ I_on_n[0] = 2626.4e-6;//A/micron
+ I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.45;
+ n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
+ //"Dynamic" tab of Device workspace.
+ gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1/3.274;
+ I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there.
+ I_off_n[0][10] = 1.55e-7/1.5*1.2;
+ I_off_n[0][20] = 1.59e-7/1.5*1.2;
+ I_off_n[0][30] = 1.68e-7/1.5*1.2;
+ I_off_n[0][40] = 1.90e-7/1.5*1.2;
+ I_off_n[0][50] = 2.69e-7/1.5*1.2;
+ I_off_n[0][60] = 5.32e-7/1.5*1.2;
+ I_off_n[0][70] = 1.02e-6/1.5*1.2;
+ I_off_n[0][80] = 1.62e-6/1.5*1.2;
+ I_off_n[0][90] = 2.73e-6/1.5*1.2;
+ I_off_n[0][100] = 6.1e-6/1.5*1.2;
+ //for 22nm DG HP
+ I_g_on_n[0][0] = 1.81e-9;//A/micron
+ I_g_on_n[0][10] = 1.81e-9;
+ I_g_on_n[0][20] = 1.81e-9;
+ I_g_on_n[0][30] = 1.81e-9;
+ I_g_on_n[0][40] = 1.81e-9;
+ I_g_on_n[0][50] = 1.81e-9;
+ I_g_on_n[0][60] = 1.81e-9;
+ I_g_on_n[0][70] = 1.81e-9;
+ I_g_on_n[0][80] = 1.81e-9;
+ I_g_on_n[0][90] = 1.81e-9;
+ I_g_on_n[0][100] = 1.81e-9;
+
+ //22 nm LSTP DG
+ vdd[1] = 0.8;
+ Lphy[1] = 0.014;
+ Lelec[1] = 0.008;//Lelec is the electrical gate-length.
+ t_ox[1] = 1.1e-3;//micron
+ v_th[1] = 0.40126;//V
+ c_ox[1] = 2.30e-14;//F/micron2
+ mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[1] = 6.64e-2; //V/micron
+ c_g_ideal[1] = 3.22e-16;//F/micron
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 0;//F/micron2
+ I_on_n[1] = 727.6e-6;//A/micron
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
+ long_channel_leakage_reduction[1] = 1/1.89;
+ I_off_n[1][0] = 2.43e-11;
+ I_off_n[1][10] = 4.85e-11;
+ I_off_n[1][20] = 9.68e-11;
+ I_off_n[1][30] = 1.94e-10;
+ I_off_n[1][40] = 3.87e-10;
+ I_off_n[1][50] = 7.73e-10;
+ I_off_n[1][60] = 3.55e-10;
+ I_off_n[1][70] = 3.09e-9;
+ I_off_n[1][80] = 6.19e-9;
+ I_off_n[1][90] = 1.24e-8;
+ I_off_n[1][100]= 2.48e-8;
+
+ I_g_on_n[1][0] = 4.51e-10;//A/micron
+ I_g_on_n[1][10] = 4.51e-10;
+ I_g_on_n[1][20] = 4.51e-10;
+ I_g_on_n[1][30] = 4.51e-10;
+ I_g_on_n[1][40] = 4.51e-10;
+ I_g_on_n[1][50] = 4.51e-10;
+ I_g_on_n[1][60] = 4.51e-10;
+ I_g_on_n[1][70] = 4.51e-10;
+ I_g_on_n[1][80] = 4.51e-10;
+ I_g_on_n[1][90] = 4.51e-10;
+ I_g_on_n[1][100] = 4.51e-10;
+
+ //22 nm LOP
+ vdd[2] = 0.6;
+ Lphy[2] = 0.011;
+ Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
+ t_ox[2] = 0.8e-3;//micron
+ v_th[2] = 0.2315;//V
+ c_ox[2] = 2.87e-14;//F/micron2
+ mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[2] = 1.81e-2; //V/micron
+ c_g_ideal[2] = 3.16e-16;//F/micron
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
+ I_on_n[2] = 916.1e-6;//A/micron
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.73;
+ n_to_p_eff_curr_drv_ratio[2] = 2;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
+ long_channel_leakage_reduction[2] = 1/2.38;
+
+ I_off_n[2][0] = 1.31e-8;
+ I_off_n[2][10] = 2.60e-8;
+ I_off_n[2][20] = 5.14e-8;
+ I_off_n[2][30] = 1.02e-7;
+ I_off_n[2][40] = 2.02e-7;
+ I_off_n[2][50] = 3.99e-7;
+ I_off_n[2][60] = 7.91e-7;
+ I_off_n[2][70] = 1.09e-6;
+ I_off_n[2][80] = 2.09e-6;
+ I_off_n[2][90] = 4.04e-6;
+ I_off_n[2][100]= 4.48e-6;
+
+ I_g_on_n[2][0] = 2.74e-9;//A/micron
+ I_g_on_n[2][10] = 2.74e-9;
+ I_g_on_n[2][20] = 2.74e-9;
+ I_g_on_n[2][30] = 2.74e-9;
+ I_g_on_n[2][40] = 2.74e-9;
+ I_g_on_n[2][50] = 2.74e-9;
+ I_g_on_n[2][60] = 2.74e-9;
+ I_g_on_n[2][70] = 2.74e-9;
+ I_g_on_n[2][80] = 2.74e-9;
+ I_g_on_n[2][90] = 2.74e-9;
+ I_g_on_n[2][100] = 2.74e-9;
+
+
+
+ if (ram_cell_tech_type == 3)
+ {}
+ else if (ram_cell_tech_type == 4)
+ {
+ //22 nm commodity DRAM cell access transistor technology parameters.
+ //parameters
+ curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
+ //2005 ITRS, the value was about twice the value in 2007 ITRS
+ Lphy[3] = 0.022;//micron
+ Lelec[3] = 0.0181;//micron.
+ curr_v_th_dram_access_transistor = 1;//V
+ width_dram_access_transistor = 0.022;//micron
+ curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always
+ //kept constant. In reality this could perhaps be lower
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6*0.022*0.022;//micron2.
+ curr_asp_ratio_cell_dram = 0.667;
+ curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
+ //kept constant.
+
+ //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
+ curr_vpp = 2.3;//vpp. V
+ t_ox[3] = 3.5e-3;//micron
+ v_th[3] = 1.0;//V
+ c_ox[3] = 9.06e-15;//F/micron2
+ mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs
+ Vdsat[3] = 0.0972; //V/micron
+ c_g_ideal[3] = 1.99e-16;//F/micron
+ c_fringe[3] = 0.053e-15;//F/micron
+ c_junc[3] = 1e-15;//F/micron2
+ I_on_n[3] = 910.5e-6;//A/micron
+ I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm.
+ //
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.1e-13; //A/micron
+ I_off_n[3][10] = 2.11e-13;
+ I_off_n[3][20] = 3.88e-13;
+ I_off_n[3][30] = 6.9e-13;
+ I_off_n[3][40] = 1.19e-12;
+ I_off_n[3][50] = 1.98e-12;
+ I_off_n[3][60] = 3.22e-12;
+ I_off_n[3][70] = 5.09e-12;
+ I_off_n[3][80] = 7.85e-12;
+ I_off_n[3][90] = 1.18e-11;
+ I_off_n[3][100] = 1.72e-11;
+
+ }
+ else
+ {
+ //some error handler
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7;
+ curr_core_tx_density = 1.25/0.7/0.7;
+ curr_sckt_co_eff = 1.1296;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
+
+ if(tech == 16){
+ //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
+ //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
+ //16 nm HP
+ vdd[0] = 0.7;
+ Lphy[0] = 0.006;//Lphy is the physical gate-length.
+ Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
+ t_ox[0] = 0.5e-3;//micron
+ v_th[0] = 0.1489;//V
+ c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
+ mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
+ c_g_ideal[0] = 2.30e-16;//F/micron
+ c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
+ c_junc[0] = 0;//F/micron2 MASTAR result dynamic
+ I_on_n[0] = 2768.4e-6;//A/micron
+ I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
+ n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
+ //"Dynamic" tab of Device workspace.
+ gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1/2.655;
+ I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07;
+ I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07;
+ I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07;
+ I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07;
+ I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07;
+ I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07;
+ I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07;
+ I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07;
+ I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07;
+ I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07;
+ I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07;
+ //for 16nm DG HP
+ I_g_on_n[0][0] = 1.07e-9;//A/micron
+ I_g_on_n[0][10] = 1.07e-9;
+ I_g_on_n[0][20] = 1.07e-9;
+ I_g_on_n[0][30] = 1.07e-9;
+ I_g_on_n[0][40] = 1.07e-9;
+ I_g_on_n[0][50] = 1.07e-9;
+ I_g_on_n[0][60] = 1.07e-9;
+ I_g_on_n[0][70] = 1.07e-9;
+ I_g_on_n[0][80] = 1.07e-9;
+ I_g_on_n[0][90] = 1.07e-9;
+ I_g_on_n[0][100] = 1.07e-9;
+
+// //16 nm LSTP DG
+// vdd[1] = 0.8;
+// Lphy[1] = 0.014;
+// Lelec[1] = 0.008;//Lelec is the electrical gate-length.
+// t_ox[1] = 1.1e-3;//micron
+// v_th[1] = 0.40126;//V
+// c_ox[1] = 2.30e-14;//F/micron2
+// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+// Vdsat[1] = 6.64e-2; //V/micron
+// c_g_ideal[1] = 3.22e-16;//F/micron
+// c_fringe[1] = 0.008e-15;
+// c_junc[1] = 0;//F/micron2
+// I_on_n[1] = 727.6e-6;//A/micron
+// I_on_p[1] = I_on_n[1] / 2;
+// nmos_effective_resistance_multiplier = 1.99;
+// n_to_p_eff_curr_drv_ratio[1] = 2;
+// gmp_to_gmn_multiplier[1] = 0.99;
+// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
+// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
+// I_off_n[1][0] = 2.43e-11;
+// I_off_n[1][10] = 4.85e-11;
+// I_off_n[1][20] = 9.68e-11;
+// I_off_n[1][30] = 1.94e-10;
+// I_off_n[1][40] = 3.87e-10;
+// I_off_n[1][50] = 7.73e-10;
+// I_off_n[1][60] = 3.55e-10;
+// I_off_n[1][70] = 3.09e-9;
+// I_off_n[1][80] = 6.19e-9;
+// I_off_n[1][90] = 1.24e-8;
+// I_off_n[1][100]= 2.48e-8;
+//
+// // for 22nm LSTP HP
+// I_g_on_n[1][0] = 4.51e-10;//A/micron
+// I_g_on_n[1][10] = 4.51e-10;
+// I_g_on_n[1][20] = 4.51e-10;
+// I_g_on_n[1][30] = 4.51e-10;
+// I_g_on_n[1][40] = 4.51e-10;
+// I_g_on_n[1][50] = 4.51e-10;
+// I_g_on_n[1][60] = 4.51e-10;
+// I_g_on_n[1][70] = 4.51e-10;
+// I_g_on_n[1][80] = 4.51e-10;
+// I_g_on_n[1][90] = 4.51e-10;
+// I_g_on_n[1][100] = 4.51e-10;
+
+
+ if (ram_cell_tech_type == 3)
+ {}
+ else if (ram_cell_tech_type == 4)
+ {
+ //22 nm commodity DRAM cell access transistor technology parameters.
+ //parameters
+ curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
+ //2005 ITRS, the value was about twice the value in 2007 ITRS
+ Lphy[3] = 0.022;//micron
+ Lelec[3] = 0.0181;//micron.
+ curr_v_th_dram_access_transistor = 1;//V
+ width_dram_access_transistor = 0.022;//micron
+ curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always
+ //kept constant. In reality this could perhaps be lower
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6*0.022*0.022;//micron2.
+ curr_asp_ratio_cell_dram = 0.667;
+ curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
+ //kept constant.
+
+ //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
+ curr_vpp = 2.3;//vpp. V
+ t_ox[3] = 3.5e-3;//micron
+ v_th[3] = 1.0;//V
+ c_ox[3] = 9.06e-15;//F/micron2
+ mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs
+ Vdsat[3] = 0.0972; //V/micron
+ c_g_ideal[3] = 1.99e-16;//F/micron
+ c_fringe[3] = 0.053e-15;//F/micron
+ c_junc[3] = 1e-15;//F/micron2
+ I_on_n[3] = 910.5e-6;//A/micron
+ I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm.
+ //
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.1e-13; //A/micron
+ I_off_n[3][10] = 2.11e-13;
+ I_off_n[3][20] = 3.88e-13;
+ I_off_n[3][30] = 6.9e-13;
+ I_off_n[3][40] = 1.19e-12;
+ I_off_n[3][50] = 1.98e-12;
+ I_off_n[3][60] = 3.22e-12;
+ I_off_n[3][70] = 5.09e-12;
+ I_off_n[3][80] = 7.85e-12;
+ I_off_n[3][90] = 1.18e-11;
+ I_off_n[3][100] = 1.72e-11;
+
+ }
+ else
+ {
+ //some error handler
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7;
+ curr_core_tx_density = 1.25/0.7/0.7/0.7;
+ curr_sckt_co_eff = 1.1296;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
+
+
+ g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type];
+ g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
+ g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
+ g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
+ g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
+ g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
+ g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
+ g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
+ g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
+ g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
+ g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
+ g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
+ g_tp.peri_global.n_to_p_eff_curr_drv_ratio
+ += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
+ g_tp.peri_global.long_channel_leakage_reduction
+ += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
+ g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
+ gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
+
+ g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
+ g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
+ g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
+ g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
+ g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
+ g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
+ g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
+ g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
+ g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
+ g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
+ g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
+ g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
+ g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
+ g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+
+ g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
+ g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
+ g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
+ g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
+ g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
+ g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
+ g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
+ g_tp.vpp += curr_alpha * curr_vpp;
+ g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
+ g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
+ g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
+ g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
+ g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
+ g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
+ g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
+ g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
+
+ g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
+ g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
+ g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
+ g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
+ g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
+ g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
+ g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
+ g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
+ g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
+ g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
+ g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
+ g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
+ g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
+ g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+
+ g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
+ g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
+ g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
+ area_cell_dram += curr_alpha * curr_area_cell_dram;
+ asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
+
+ g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
+ g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
+ g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
+ area_cell_sram += curr_alpha * curr_area_cell_sram;
+ asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
+
+ g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
+ g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
+ g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
+ area_cell_cam += curr_alpha * curr_area_cell_cam;
+ asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
+
+ //Sense amplifier latch Gm calculation
+ mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
+ Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
+
+ //Empirical undifferetiated core/FU coefficient
+ g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
+ g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
+ g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
+ g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
+ g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
+ }
+
+
+ //Currently we are not modeling the resistance/capacitance of poly anywhere.
+ //Continuous function (or date have been processed) does not need linear interpolation
+ g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
+ g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
+ g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
+ g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
+ g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
+ g_tp.cell_h_def = 50 * g_ip->F_sz_um;
+ g_tp.w_poly_contact = g_ip->F_sz_um;
+ g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
+ g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
+ g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
+
+ g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
+ g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
+ g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process
+ g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process
+ g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process
+ g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process
+ g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
+ g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_;
+
+ if (ram_cell_tech_type == comm_dram)
+ {
+ g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
+ g_tp.h_dec = 8; // in the unit of memory cell height
+ }
+ else
+ {
+ g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
+ g_tp.h_dec = 4; // in the unit of memory cell height
+ }
+
+ g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
+ g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
+ g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
+
+ g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
+ g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
+ //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
+
+ g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
+
+ double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
+ double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
+ g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch;
+
+ g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
+ g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
+ g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
+ g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
+ g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
+ g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
+
+ g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
+ g_tp.sram.Vbitpre = vdd[ram_cell_tech_type];
+ g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng
+ pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
+ g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
+
+
+ double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
+
+ for (iter=0; iter<=1; ++iter)
+ {
+ // linear interpolation
+ if (iter == 0)
+ {
+ tech = tech_lo;
+ if (tech_lo == tech_hi)
+ {
+ curr_alpha = 1;
+ }
+ else
+ {
+ curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
+ }
+ }
+ else
+ {
+ tech = tech_hi;
+ if (tech_lo == tech_hi)
+ {
+ break;
+ }
+ else
+ {
+ curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
+ }
+ }
+
+ if (tech == 180)
+ {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
+ aspect_ratio[0][0] = 2.0;
+ wire_width = wire_pitch[0][0] / 2; //micron
+ wire_thickness = aspect_ratio[0][0] * wire_width;//micron
+ wire_spacing = wire_pitch[0][0] - wire_width;//micron
+ barrier_thickness = 0.017;//micron
+ dishing_thickness = 0;//micron
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
+ ild_thickness[0][0] = 0.75;//micron
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.709;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
+ vert_dielectric_constant[0][0],
+ fringe_cap);//F/micron.
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.4;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.75;//micron
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.709;
+ vert_dielectric_constant[0][1] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.2;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 1.5;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.709;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0]= 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.017;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.75;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 3.038;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
+ vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.75;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 3.038;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
+ vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 1.98;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 3.038;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.18;
+ wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
+ wire_r_per_micron[1][3] = 12 / 0.18;
+ }
+ else if (tech == 90)
+ {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
+ aspect_ratio[0][0] = 2.4;
+ wire_width = wire_pitch[0][0] / 2; //micron
+ wire_thickness = aspect_ratio[0][0] * wire_width;//micron
+ wire_spacing = wire_pitch[0][0] - wire_width;//micron
+ barrier_thickness = 0.01;//micron
+ dishing_thickness = 0;//micron
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
+ ild_thickness[0][0] = 0.48;//micron
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.709;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
+ vert_dielectric_constant[0][0],
+ fringe_cap);//F/micron.
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.4;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.48;//micron
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.709;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.7;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.96;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.709;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.008;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.48;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 3.038;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
+ vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.48;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 3.038;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
+ vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 1.1;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 3.038;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.09;
+ wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
+ wire_r_per_micron[1][3] = 12 / 0.09;
+ }
+ else if (tech == 65)
+ {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 2.7;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.405;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.303;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.7;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.405;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.303;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.8;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.81;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.303;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.006;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.405;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.734;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.405;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.734;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.77;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.734;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.065;
+ wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
+ wire_r_per_micron[1][3] = 12 / 0.065;
+ }
+ else if (tech == 45)
+ {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.315;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.958;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.315;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.958;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.63;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.958;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.004;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.315;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.46;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.315;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.46;
+ vert_dielectric_constant[1][1] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.55;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.46;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.045;
+ wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
+ wire_r_per_micron[1][3] = 12 / 0.045;
+ }
+ else if (tech == 32)
+ {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.21;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.664;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.21;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.664;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.42;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.664;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.003;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.21;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.214;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ aspect_ratio[1][1] = 2.0;
+ wire_width = wire_pitch[1][1] / 2;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.21;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.214;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.385;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.214;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.032;//micron
+ wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
+ wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
+ }
+ else if (tech == 22)
+ {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.15;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.414;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.15;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.414;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.3;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.414;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+// //*************************
+// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
+// aspect_ratio = 3.0;
+// wire_width = wire_pitch[0][4] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[0][4] - wire_width;
+// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.3;
+// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
+// aspect_ratio = 3.0;
+// wire_width = wire_pitch[0][5] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[0][5] - wire_width;
+// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.3;
+// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
+// aspect_ratio = 3.0;
+// wire_width = wire_pitch[0][6] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[0][6] - wire_width;
+// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.3;
+// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+ //*************************
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.003;
+ dishing_thickness = 0;
+ alpha_scatter = 1.05;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.15;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.104;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.15;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.104;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.275;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.104;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.022;//micron
+ wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron
+ wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron
+
+ //******************
+// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
+// aspect_ratio = 2.2;
+// wire_width = wire_pitch[1][4] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[1][4] - wire_width;
+// dishing_thickness = 0.1 * wire_thickness;
+// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.275;
+// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
+// aspect_ratio = 2.2;
+// wire_width = wire_pitch[1][5] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[1][5] - wire_width;
+// dishing_thickness = 0.1 * wire_thickness;
+// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.275;
+// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
+// aspect_ratio = 2.2;
+// wire_width = wire_pitch[1][6] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[1][6] - wire_width;
+// dishing_thickness = 0.1 * wire_thickness;
+// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.275;
+// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+ }
+
+ else if (tech == 16)
+ {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.108;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.202;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
+ aspect_ratio[0][1] = 3.0;
+ wire_width = wire_pitch[0][1] / 2;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.108;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.202;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.216;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.202;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+// //*************************
+// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
+// aspect_ratio = 3.0;
+// wire_width = wire_pitch[0][4] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[0][4] - wire_width;
+// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.3;
+// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
+// aspect_ratio = 3.0;
+// wire_width = wire_pitch[0][5] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[0][5] - wire_width;
+// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.3;
+// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
+// aspect_ratio = 3.0;
+// wire_width = wire_pitch[0][6] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[0][6] - wire_width;
+// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.3;
+// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+ //*************************
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.002;
+ dishing_thickness = 0;
+ alpha_scatter = 1.05;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.108;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 1.998;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.108;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 1.998;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.198;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 1.998;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.016;//micron
+ wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron
+ wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron
+
+ //******************
+// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
+// aspect_ratio = 2.2;
+// wire_width = wire_pitch[1][4] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[1][4] - wire_width;
+// dishing_thickness = 0.1 * wire_thickness;
+// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.275;
+// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
+// aspect_ratio = 2.2;
+// wire_width = wire_pitch[1][5] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[1][5] - wire_width;
+// dishing_thickness = 0.1 * wire_thickness;
+// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.275;
+// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+//
+// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
+// aspect_ratio = 2.2;
+// wire_width = wire_pitch[1][6] / 2;
+// wire_thickness = aspect_ratio * wire_width;
+// wire_spacing = wire_pitch[1][6] - wire_width;
+// dishing_thickness = 0.1 * wire_thickness;
+// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
+// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+// ild_thickness = 0.275;
+// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
+// fringe_cap);
+ }
+ g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+ g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
+
+ g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+
+ g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+
+ g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2;
+
+ g_tp.sense_delay += curr_alpha *SENSE_AMP_D;
+ g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P;
+// g_tp.horiz_dielectric_constant += horiz_dielectric_constant;
+// g_tp.vert_dielectric_constant += vert_dielectric_constant;
+// g_tp.aspect_ratio += aspect_ratio;
+// g_tp.miller_value += miller_value;
+// g_tp.ild_thickness += ild_thickness;
+
+ }
+ g_tp.fringe_cap = fringe_cap;
+
+ double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
+ double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
+ double tf = rd * c_load;
+ g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
+ double KLOAD = 1;
+ c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
+ tf = rd * c_load;
+ g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
+}
+
diff --git a/ext/mcpat/cacti/uca.cc b/ext/mcpat/cacti/uca.cc
new file mode 100755
index 000000000..568cd9e44
--- /dev/null
+++ b/ext/mcpat/cacti/uca.cc
@@ -0,0 +1,426 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include <cmath>
+#include <iostream>
+
+#include "uca.h"
+
+UCA::UCA(const DynamicParameter & dyn_p)
+ :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
+{
+ int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
+ int num_banks_hor_dir = nbanks/num_banks_ver_dir;
+
+ if (dp.use_inp_params)
+ {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ }
+ else
+ {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+ }
+
+ num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
+ num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
+ num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
+ num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
+ num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
+
+ if (!dp.fully_assoc && !dp.pure_cam)
+ {
+
+ if (g_ip->fast_access && dp.is_tag == false)
+ {
+ num_do_b_bank *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
+ htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
+ htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
+ }
+
+ else
+ {
+
+ htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
+ htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
+ htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
+ htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
+ htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
+ }
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+
+ area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
+// cout<<"area cell"<<area_all_dataramcells<<endl;
+// cout<<area.get_area()<<endl;
+ // delay calculation
+ double inrisetime = 0.0;
+ compute_delays(inrisetime);
+ compute_power_energy();
+}
+
+
+
+UCA::~UCA()
+{
+ delete htree_in_add;
+ delete htree_in_data;
+ delete htree_out_data;
+}
+
+
+
+double UCA::compute_delays(double inrisetime)
+{
+ double outrisetime = bank.compute_delays(inrisetime);
+
+ double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
+ double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
+ delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
+ bank.mat.sa_mux_lev_1_predec->delay +
+ bank.mat.sa_mux_lev_1_dec->delay;
+ delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
+ bank.mat.sa_mux_lev_2_predec->delay +
+ bank.mat.sa_mux_lev_2_dec->delay;
+ double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
+
+ delay_before_subarray_output_driver =
+ MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
+ delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
+ MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
+ delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
+ delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
+ bank.htree_out_data->delay + htree_out_data->delay;
+ access_time = bank.mat.delay_comparator;
+
+ double ram_delay_inside_mat;
+ if (dp.fully_assoc)
+ {
+ //delay of FA contains both CAM tag and RAM data
+ { //delay of CAM
+ ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
+ access_time = htree_in_add->delay + bank.htree_in_add->delay;
+ //delay of fully-associative data array
+ access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
+ }
+ }
+ else
+ {
+ access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
+ }
+
+ if (dp.is_main_mem)
+ {
+ double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
+ double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
+ delay_from_subarray_out_drv_to_out;
+ access_time = t_rcd + cas_latency;
+ }
+
+ double temp;
+
+ if (!dp.fully_assoc)
+ {
+ temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
+ if (dp.is_dram)
+ {
+ temp += bank.mat.delay_writeback; // temp stores random cycle time
+ }
+
+
+ temp = MAX(temp, bank.mat.r_predec->delay);
+ temp = MAX(temp, bank.mat.b_mux_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
+ }
+ else
+ {
+ ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
+ temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
+ + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
+
+ temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
+ temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
+ }
+
+ // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
+ if (g_ip->rpters_in_htree == false)
+ {
+ temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
+ }
+ cycle_time = temp;
+
+ double delay_req_network = max_delay_before_row_decoder;
+ double delay_rep_network = delay_from_subarray_out_drv_to_out;
+ multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
+
+ if (dp.is_main_mem)
+ {
+ multisubbank_interleave_cycle_time = htree_in_add->delay;
+ precharge_delay = htree_in_add->delay +
+ bank.htree_in_add->delay + bank.mat.delay_writeback +
+ bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
+ cycle_time = access_time + precharge_delay;
+ }
+ else
+ {
+ precharge_delay = 0;
+ }
+
+ double dram_array_availability = 0;
+ if (dp.is_dram)
+ {
+ dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
+ }
+
+ return outrisetime;
+}
+
+
+
+// note: currently, power numbers are for a bank of an array
+void UCA::compute_power_energy()
+{
+ bank.compute_power_energy();
+ power = bank.power;
+
+ power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
+ power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
+ if (dp.fully_assoc || dp.pure_cam)
+ power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
+
+ power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
+ htree_in_data->power.readOp.leakage +
+ htree_out_data->power.readOp.leakage;
+
+ power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
+ htree_in_data->power.readOp.gate_leakage +
+ htree_out_data->power.readOp.gate_leakage;
+ if (dp.fully_assoc || dp.pure_cam)
+ {
+ power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
+ power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
+ }
+
+ power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
+ power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
+ power.readOp.leakage += power_routing_to_bank.readOp.leakage;
+ power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
+
+ // calculate total write energy per access
+ power.writeOp.dynamic = power.readOp.dynamic
+ - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ - power_routing_to_bank.readOp.dynamic
+ + power_routing_to_bank.writeOp.dynamic
+ + bank.htree_in_data->power.readOp.dynamic
+ - bank.htree_out_data->power.readOp.dynamic;
+
+ if (dp.is_dram == false)
+ {
+ power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ }
+
+ dyn_read_energy_from_closed_page = power.readOp.dynamic;
+ dyn_read_energy_from_open_page = power.readOp.dynamic -
+ (bank.mat.r_predec->power.readOp.dynamic +
+ bank.mat.power_row_decoders.readOp.dynamic +
+ bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
+ bank.mat.power_sa.readOp.dynamic +
+ bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
+
+ dyn_read_energy_remaining_words_in_burst =
+ (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
+ ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
+ bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
+ bank.htree_out_data->power.readOp.dynamic +
+ power_routing_to_bank.readOp.dynamic);
+ dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
+ dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
+
+ activate_energy = htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
+ (bank.mat.r_predec->power.readOp.dynamic +
+ bank.mat.power_row_decoders.readOp.dynamic +
+ bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
+ read_energy = (htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
+ (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
+ bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
+ bank.htree_out_data->power.readOp.dynamic +
+ htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
+ write_energy = (htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
+ htree_in_data->power.readOp.dynamic +
+ bank.htree_in_data->power.readOp.dynamic +
+ (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
+ precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
+ bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_closed_page =
+ (bank.mat.r_predec->power.readOp.leakage +
+ bank.mat.b_mux_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
+ bank.mat.power_row_decoders.readOp.leakage +
+ bank.mat.power_bit_mux_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
+ bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_closed_page +=
+ (bank.mat.r_predec->power.readOp.gate_leakage +
+ bank.mat.b_mux_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ bank.mat.power_row_decoders.readOp.gate_leakage +
+ bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
+ //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_open_page =
+ (bank.mat.r_predec->power.readOp.leakage +
+ bank.mat.b_mux_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
+ bank.mat.power_row_decoders.readOp.leakage +
+ bank.mat.power_bit_mux_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
+ bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_open_page +=
+ (bank.mat.r_predec->power.readOp.gate_leakage +
+ bank.mat.b_mux_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ bank.mat.power_row_decoders.readOp.gate_leakage +
+ bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
+ //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_request_and_reply_networks =
+ power_routing_to_bank.readOp.leakage +
+ bank.htree_in_add->power.readOp.leakage +
+ bank.htree_in_data->power.readOp.leakage +
+ bank.htree_out_data->power.readOp.leakage;
+
+ leak_power_request_and_reply_networks +=
+ power_routing_to_bank.readOp.gate_leakage +
+ bank.htree_in_add->power.readOp.gate_leakage +
+ bank.htree_in_data->power.readOp.gate_leakage +
+ bank.htree_out_data->power.readOp.gate_leakage;
+
+ if (dp.fully_assoc || dp.pure_cam)
+ {
+ leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
+ leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
+ }
+
+
+ if (dp.is_dram)
+ { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
+ refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
+ bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
+ refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
+ refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
+ refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ refresh_power /= dp.dram_refresh_period;
+ }
+
+
+ if (dp.is_tag == false)
+ {
+ power.readOp.dynamic = dyn_read_energy_from_closed_page;
+ power.writeOp.dynamic = dyn_read_energy_from_closed_page
+ - dyn_read_energy_remaining_words_in_burst
+ - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ + (power_routing_to_bank.writeOp.dynamic -
+ power_routing_to_bank.readOp.dynamic -
+ bank.htree_out_data->power.readOp.dynamic +
+ bank.htree_in_data->power.readOp.dynamic) *
+ (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
+
+ if (dp.is_dram == false)
+ {
+ power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ }
+ }
+
+ // if DRAM, add refresh power to total leakage
+ if (dp.is_dram)
+ {
+ power.readOp.leakage += refresh_power;
+ }
+
+ // TODO: below should be avoided.
+ /*if (dp.is_main_mem)
+ {
+ power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
+ }*/
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.writeOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+}
+
diff --git a/ext/mcpat/cacti/uca.h b/ext/mcpat/cacti/uca.h
new file mode 100755
index 000000000..fdab14fc7
--- /dev/null
+++ b/ext/mcpat/cacti/uca.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __UCA_H__
+#define __UCA_H__
+
+#include "area.h"
+#include "bank.h"
+#include "component.h"
+#include "htree2.h"
+#include "parameter.h"
+
+class UCA : public Component
+{
+ public:
+ UCA(const DynamicParameter & dyn_p);
+ ~UCA();
+ double compute_delays(double inrisetime); // returns outrisetime
+ void compute_power_energy();
+
+ DynamicParameter dp;
+ Bank bank;
+
+ Htree2 * htree_in_add;
+ Htree2 * htree_in_data;
+ Htree2 * htree_out_data;
+ Htree2 * htree_in_search;
+ Htree2 * htree_out_search;
+
+ powerDef power_routing_to_bank;
+
+ uint32_t nbanks;
+
+ int num_addr_b_bank;
+ int num_di_b_bank;
+ int num_do_b_bank;
+ int num_si_b_bank;
+ int num_so_b_bank;
+ int RWP, ERP, EWP,SCHP;
+ double area_all_dataramcells;
+
+ double dyn_read_energy_from_closed_page;
+ double dyn_read_energy_from_open_page;
+ double dyn_read_energy_remaining_words_in_burst;
+
+ double refresh_power; // only for DRAM
+ double activate_energy;
+ double read_energy;
+ double write_energy;
+ double precharge_energy;
+ double leak_power_subbank_closed_page;
+ double leak_power_subbank_open_page;
+ double leak_power_request_and_reply_networks;
+
+ double delay_array_to_sa_mux_lev_1_decoder;
+ double delay_array_to_sa_mux_lev_2_decoder;
+ double delay_before_subarray_output_driver;
+ double delay_from_subarray_out_drv_to_out;
+ double access_time;
+ double precharge_delay;
+ double multisubbank_interleave_cycle_time;
+};
+
+#endif
+
diff --git a/ext/mcpat/cacti/wire.cc b/ext/mcpat/cacti/wire.cc
new file mode 100644
index 000000000..742000c85
--- /dev/null
+++ b/ext/mcpat/cacti/wire.cc
@@ -0,0 +1,832 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include "wire.h"
+#include "cmath"
+// use this constructor to calculate wire stats
+Wire::Wire(
+ enum Wire_type wire_model,
+ double wl,
+ int n,
+ double w_s,
+ double s_s,
+ enum Wire_placement wp,
+ double resistivity,
+ TechnologyParameter::DeviceType *dt
+ ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s),
+ resistivity(resistivity), deviceType(dt)
+{
+ wire_placement = wp;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
+ in_rise_time = 0;
+ out_rise_time = 0;
+ if (initialized != 1) {
+ cout << "Wire not initialized. Initializing it with default values\n";
+ Wire winit;
+ }
+ calculate_wire_stats();
+ // change everything back to seconds, microns, and Joules
+ repeater_spacing *= 1e6;
+ wire_length *= 1e6;
+ wire_width *= 1e6;
+ wire_spacing *= 1e6;
+ assert(wire_length > 0);
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
+}
+
+ // the following values are for peripheral global technology
+ // specified in the input config file
+ Component Wire::global;
+ Component Wire::global_5;
+ Component Wire::global_10;
+ Component Wire::global_20;
+ Component Wire::global_30;
+ Component Wire::low_swing;
+
+ int Wire::initialized;
+ double Wire::wire_width_init;
+ double Wire::wire_spacing_init;
+
+
+Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt)
+{
+ w_scale = w_s;
+ s_scale = s_s;
+ deviceType = dt;
+ wire_placement = wp;
+ resistivity = resis;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ in_rise_time = 0;
+ out_rise_time = 0;
+
+ switch (wire_placement)
+ {
+ case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break;
+ case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break;
+ default: wire_width = g_tp.wire_local.pitch; break;
+ }
+
+ wire_spacing = wire_width;
+
+ wire_width *= (w_scale * 1e-6/2) /* (m) */;
+ wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
+
+ initialized = 1;
+ init_wire();
+ wire_width_init = wire_width;
+ wire_spacing_init = wire_spacing;
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
+}
+
+
+
+Wire::~Wire()
+{
+}
+
+
+
+void
+Wire::calculate_wire_stats()
+{
+
+ if (wire_placement == outside_mat) {
+ wire_width = g_tp.wire_outside_mat.pitch;
+ }
+ else if (wire_placement == inside_mat) {
+ wire_width = g_tp.wire_inside_mat.pitch;
+ }
+ else {
+ wire_width = g_tp.wire_local.pitch;
+ }
+
+ wire_spacing = wire_width;
+
+ wire_width *= (w_scale * 1e-6/2) /* (m) */;
+ wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
+
+
+ if (wt != Low_swing) {
+
+ // delay_optimal_wire();
+
+ if (wt == Global) {
+ delay = global.delay * wire_length;
+ power.readOp.dynamic = global.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global.area.w;
+ repeater_size = global.area.h;
+ area.set_area((wire_length/repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
+ }
+ else if (wt == Global_5) {
+ delay = global_5.delay * wire_length;
+ power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_5.area.w;
+ repeater_size = global_5.area.h;
+ area.set_area((wire_length/repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
+ }
+ else if (wt == Global_10) {
+ delay = global_10.delay * wire_length;
+ power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_10.area.w;
+ repeater_size = global_10.area.h;
+ area.set_area((wire_length/repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
+ }
+ else if (wt == Global_20) {
+ delay = global_20.delay * wire_length;
+ power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_20.area.w;
+ repeater_size = global_20.area.h;
+ area.set_area((wire_length/repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
+ }
+ else if (wt == Global_30) {
+ delay = global_30.delay * wire_length;
+ power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_30.area.w;
+ repeater_size = global_30.area.h;
+ area.set_area((wire_length/repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
+ }
+ out_rise_time = delay*repeater_spacing/deviceType->Vth;
+ }
+ else if (wt == Low_swing) {
+ low_swing_model ();
+ repeater_spacing = wire_length;
+ repeater_size = 1;
+ }
+ else {
+ assert(0);
+ }
+}
+
+
+
+/*
+ * The fall time of an input signal to the first stage of a circuit is
+ * assumed to be same as the fall time of the output signal of two
+ * inverters connected in series (refer: CACTI 1 Technical report,
+ * section 6.1.3)
+ */
+ double
+Wire::signal_fall_time ()
+{
+
+ /* rise time of inverter 1's output */
+ double rt;
+ /* fall time of inverter 2's output */
+ double ft;
+ double timeconst;
+
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(min_w_pmos, PCH, 1);
+ rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
+ return ft;
+}
+
+
+
+double Wire::signal_rise_time ()
+{
+
+ /* rise time of inverter 1's output */
+ double ft;
+ /* fall time of inverter 2's output */
+ double rt;
+ double timeconst;
+
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(min_w_pmos, PCH, 1);
+ ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
+ return ft; //sec
+}
+
+
+
+/* Wire resistance and capacitance calculations
+ * wire width
+ *
+ * /__/
+ * | |
+ * | | height = ASPECT_RATIO*wire width (ASPECT_RATIO = 2.2, ref: ITRS)
+ * |__|/
+ *
+ * spacing between wires in same level = wire width
+ * spacing between wires in adjacent levels = wire width---this is incorrect,
+ * according to R.Ho's paper and thesis. ILD != wire width
+ *
+ */
+
+double Wire::wire_cap (double len /* in m */, bool call_from_outside)
+{
+ //TODO: this should be consistent with the wire_res in technology file
+ double sidewall, adj, tot_cap;
+ double wire_height;
+ double epsilon0 = 8.8542e-12;
+ double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness;
+
+ switch (wire_placement)
+ {
+ case outside_mat:
+ {
+ aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
+ miller_value = g_tp.wire_outside_mat.miller_value;
+ ild_thickness = g_tp.wire_outside_mat.ild_thickness;
+ break;
+ }
+ case inside_mat :
+ {
+ aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
+ miller_value = g_tp.wire_inside_mat.miller_value;
+ ild_thickness = g_tp.wire_inside_mat.ild_thickness;
+ break;
+ }
+ default:
+ {
+ aspect_ratio = g_tp.wire_local.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
+ miller_value = g_tp.wire_local.miller_value;
+ ild_thickness = g_tp.wire_local.ild_thickness;
+ break;
+ }
+ }
+
+ if (call_from_outside)
+ {
+ wire_width *= 1e-6;
+ wire_spacing *= 1e-6;
+ }
+ wire_height = wire_width/w_scale*aspect_ratio;
+ /*
+ * assuming height does not change. wire_width = width_original*w_scale
+ * So wire_height does not change as wire width increases
+ */
+
+// capacitance between wires in the same level
+// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
+// * epsilon0;
+
+ sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
+ * epsilon0;
+
+
+ // capacitance between wires in adjacent levels
+ //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
+ //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
+
+ adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
+ //Change ild_thickness from micron to M
+
+ //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
+ tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
+
+ if (call_from_outside)
+ {
+ wire_width *= 1e6;
+ wire_spacing *= 1e6;
+ }
+ return (tot_cap*len); // (F)
+}
+
+
+ double
+Wire::wire_res (double len /*(in m)*/)
+{
+
+ double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0;
+ //TODO: this should be consistent with the wire_res in technology file
+ //The whole computation should be consistent with the wire_res in technology.cc too!
+
+ switch (wire_placement)
+ {
+ case outside_mat:
+ {
+ aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
+ break;
+ }
+ case inside_mat :
+ {
+ aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
+ break;
+ }
+ default:
+ {
+ aspect_ratio = g_tp.wire_local.aspect_ratio;
+ break;
+ }
+ }
+ return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)*
+ (wire_width-2*barrier_thickness)));
+}
+
+/*
+ * Calculates the delay, power and area of the transmitter circuit.
+ *
+ * The transmitter delay is the sum of nand gate delay, inverter delay
+ * low swing nmos delay, and the wire delay
+ * (ref: Technical report 6)
+ */
+ void
+Wire::low_swing_model()
+{
+ double len = wire_length;
+ double beta = pmos_to_nmos_sz_ratio();
+
+
+ double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time;
+
+ /* Final nmos low swing driver size calculation:
+ * Try to size the driver such that the delay
+ * is less than 8FO4.
+ * If the driver size is greater than
+ * the max allowable size, assume max size for the driver.
+ * In either case, recalculate the delay using
+ * the final driver size assuming slow input with
+ * finite rise time instead of ideal step input
+ *
+ * (ref: Technical report 6)
+ */
+ double cwire = wire_cap(len); /* load capacitance */
+ double rwire = wire_res(len);
+
+#define RES_ADJ (8.6) // Increase in resistance due to low driving vol.
+
+ double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ;
+ double nsize = R_to_w(driver_res, NCH);
+
+ nsize = MIN(nsize, g_tp.max_w_nmos_);
+ nsize = MAX(nsize, g_tp.min_w_nmos_);
+
+ if(rwire*cwire > 8*g_tp.FO4)
+ {
+ nsize = g_tp.max_w_nmos_;
+ }
+
+ // size the inverter appropriately to minimize the transmitter delay
+ // Note - In order to minimize leakage, we are not adding a set of inverters to
+ // bring down delay. Instead, we are sizing the single gate
+ // based on the logical effort.
+ double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0)
+ + gate_C(2*min_w_pmos, 0)));
+ double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff;
+ double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0));
+ inv_size = MAX(inv_size, 1);
+
+ /* nand gate delay */
+ double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1));
+ double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(inv_size*g_tp.min_w_nmos_, 0) +
+ gate_C(inv_size*min_w_pmos, 0);
+
+ double timeconst = res_eq * cap_eq;
+
+ delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
+ deviceType->Vth/deviceType->Vdd, RISE);
+ double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd;
+
+ inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */
+
+ /* Inverter delay:
+ * The load capacitance of this inv depends on
+ * the gate capacitance of the final stage nmos
+ * transistor which in turn depends on nsize
+ */
+ res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1);
+ cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(nsize, 0);
+ timeconst = res_eq * cap_eq;
+
+ delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
+ deviceType->Vth/deviceType->Vdd, FALL);
+ temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd;
+
+
+ transmitter.delay = delay;
+ transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/
+ transmitter.power.readOp.leakage = deviceType->Vdd *
+ (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
+ 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
+
+ transmitter.power.readOp.gate_leakage = deviceType->Vdd *
+ (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
+ 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
+
+ inputrise = delay / deviceType->Vth;
+
+ /* nmos delay + wire delay */
+ cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 +
+ nsense * sense_amp_input_cap(); //+receiver cap
+ /*
+ * NOTE: nmos is used as both pull up and pull down transistor
+ * in the transmitter. This is because for low voltage swing, drive
+ * resistance of nmos is less than pmos
+ * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency)
+ */
+ timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire +
+ drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) +
+ rwire*cwire/2 +
+ (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) *
+ nsense * sense_amp_input_cap();
+
+ /*
+ * since we are pre-equalizing and overdriving the low
+ * swing wires, the net time constant is less
+ * than the actual value
+ */
+ delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0);
+#define VOL_SWING .1
+ temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */
+ temp_power *= 2; /* differential wire */
+
+ l_wire.delay = delay - transmitter.delay;
+ l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic;
+ l_wire.power.readOp.leakage = deviceType->Vdd*
+ (4* cmos_Isub_leakage(nsize, 0, 1, nmos));
+
+ l_wire.power.readOp.gate_leakage = deviceType->Vdd*
+ (4* cmos_Ig_leakage(nsize, 0, 1, nmos));
+
+ //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
+ // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth;
+
+ delay += g_tp.sense_delay;
+
+ sense_amp.delay = g_tp.sense_delay;
+ out_rise_time = g_tp.sense_delay/(deviceType->Vth);
+ sense_amp.power.readOp.dynamic = g_tp.sense_dy_power;
+ sense_amp.power.readOp.leakage = 0; //FIXME
+ sense_amp.power.readOp.gate_leakage = 0;
+
+ power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic;
+ power.readOp.leakage = transmitter.power.readOp.leakage +
+ l_wire.power.readOp.leakage +
+ sense_amp.power.readOp.leakage;
+ power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage +
+ l_wire.power.readOp.gate_leakage +
+ sense_amp.power.readOp.gate_leakage;
+}
+
+ double
+Wire::sense_amp_input_cap()
+{
+ return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def);
+}
+
+
+void Wire::delay_optimal_wire ()
+{
+ double len = wire_length;
+ //double min_wire_width = wire_width; //m
+ double beta = pmos_to_nmos_sz_ratio();
+ double switching = 0; // switching energy
+ double short_ckt = 0; // short-circuit energy
+ double tc = 0; // time constant
+ // input cap of min sized driver
+ double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
+
+ // output parasitic capacitance of
+ // the min. sized driver
+ double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
+ // drive resistance
+ double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
+ tr_R_on(min_w_pmos, PCH, 1))/2;
+ double wr = wire_res(len); //ohm
+
+ // wire cap /m
+ double wc = wire_cap(len);
+
+ // size the repeater such that the delay of the wire is minimum
+ double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel
+
+ // calc the optimum spacing between the repeaters (m)
+
+ repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap)/
+ ((wr/len)*(wc/len)));
+ repeater_size = repeater_scaling;
+
+ switching = (repeater_scaling * (input_cap + out_cap) +
+ repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
+
+ tc = out_res * (input_cap + out_cap) +
+ out_res * wc/len * repeater_spacing/repeater_scaling +
+ wr/len * repeater_spacing * input_cap * repeater_scaling +
+ 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
+
+ delay = 0.693 * tc * len/repeater_spacing;
+
+#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
+ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
+ repeater_scaling * tc;
+
+ area.set_area((len/repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
+ g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def));
+ power.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
+ power.readOp.leakage = ((len/repeater_spacing)*
+ deviceType->Vdd*
+ cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
+ power.readOp.gate_leakage = ((len/repeater_spacing)*
+ deviceType->Vdd*
+ cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
+}
+
+
+
+// calculate power/delay values for wires with suboptimal repeater sizing/spacing
+void
+Wire::init_wire(){
+ wire_length = 1;
+ delay_optimal_wire();
+ double sp, si;
+ powerDef pow;
+ si = repeater_size;
+ sp = repeater_spacing;
+ sp *= 1e6; // in microns
+
+ double i, j, del;
+ repeated_wire.push_back(Component());
+ for (j=sp; j < 4*sp; j+=100) {
+ for (i = si; i > 1; i--) {
+ pow = wire_model(j*1e-6, i, &del);
+ if (j == sp && i == si) {
+ global.delay = del;
+ global.power = pow;
+ global.area.h = si;
+ global.area.w = sp*1e-6; // m
+ }
+// cout << "Repeater size - "<< i <<
+// " Repeater spacing - " << j <<
+// " Delay - " << del <<
+// " PowerD - " << pow.readOp.dynamic <<
+// " PowerL - " << pow.readOp.leakage <<endl;
+ repeated_wire.back().delay = del;
+ repeated_wire.back().power.readOp = pow.readOp;
+ repeated_wire.back().area.w = j*1e-6; //m
+ repeated_wire.back().area.h = i;
+ repeated_wire.push_back(Component());
+
+ }
+ }
+ repeated_wire.pop_back();
+ update_fullswing();
+ Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1);
+ low_swing.delay = l_wire->delay;
+ low_swing.power = l_wire->power;
+ delete l_wire;
+}
+
+
+
+void Wire::update_fullswing()
+{
+
+ list<Component>::iterator citer;
+ double del[4];
+ del[3] = this->global.delay + this->global.delay*.3;
+ del[2] = global.delay + global.delay*.2;
+ del[1] = global.delay + global.delay*.1;
+ del[0] = global.delay + global.delay*.05;
+ double threshold;
+ double ncost;
+ double cost;
+ int i = 4;
+ while (i>0) {
+ threshold = del[i-1];
+ cost = BIGNUM;
+ for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++)
+ {
+ if (citer->delay > threshold) {
+ citer = repeated_wire.erase(citer);
+ citer --;
+ }
+ else {
+ ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic +
+ citer->power.readOp.leakage/global.power.readOp.leakage;
+ if(ncost < cost)
+ {
+ cost = ncost;
+ if (i == 4) {
+ global_30.delay = citer->delay;
+ global_30.power = citer->power;
+ global_30.area = citer->area;
+ }
+ else if (i==3) {
+ global_20.delay = citer->delay;
+ global_20.power = citer->power;
+ global_20.area = citer->area;
+ }
+ else if(i==2) {
+ global_10.delay = citer->delay;
+ global_10.power = citer->power;
+ global_10.area = citer->area;
+ }
+ else if(i==1) {
+ global_5.delay = citer->delay;
+ global_5.power = citer->power;
+ global_5.area = citer->area;
+ }
+ }
+ }
+ }
+ i--;
+ }
+}
+
+
+
+powerDef Wire::wire_model (double space, double size, double *delay)
+{
+ powerDef ptemp;
+ double len = 1;
+ //double min_wire_width = wire_width; //m
+ double beta = pmos_to_nmos_sz_ratio();
+ // switching energy
+ double switching = 0;
+ // short-circuit energy
+ double short_ckt = 0;
+ // time constant
+ double tc = 0;
+ // input cap of min sized driver
+ double input_cap = gate_C (g_tp.min_w_nmos_ +
+ min_w_pmos, 0);
+
+ // output parasitic capacitance of
+ // the min. sized driver
+ double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
+ // drive resistance
+ double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
+ tr_R_on(min_w_pmos, PCH, 1))/2;
+ double wr = wire_res(len); //ohm
+
+ // wire cap /m
+ double wc = wire_cap(len);
+
+ repeater_spacing = space;
+ repeater_size = size;
+
+ switching = (repeater_size * (input_cap + out_cap) +
+ repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
+
+ tc = out_res * (input_cap + out_cap) +
+ out_res * wc/len * repeater_spacing/repeater_size +
+ wr/len * repeater_spacing * out_cap * repeater_size +
+ 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
+
+ *delay = 0.693 * tc * len/repeater_spacing;
+
+#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
+ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
+ repeater_size * tc;
+
+ ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
+ ptemp.readOp.leakage = ((len/repeater_spacing)*
+ deviceType->Vdd*
+ cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
+
+ ptemp.readOp.gate_leakage = ((len/repeater_spacing)*
+ deviceType->Vdd*
+ cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
+
+ return ptemp;
+}
+
+void
+Wire::print_wire()
+{
+
+ cout << "\nWire Properties:\n\n";
+ cout << " Delay Optimal\n\tRepeater size - "<< global.area.h <<
+ " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)"
+ " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n";
+ cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
+ cout <<endl;
+
+ cout << " 5% Overhead\n\tRepeater size - "<< global_5.area.h <<
+ " \n\tRepeater spacing - " << global_5.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_5.delay *1e6<< " (ns/mm)"
+ " \n\tPowerD - " << global_5.power.readOp.dynamic *1e6<< " (nJ/mm)"
+ " \n\tPowerL - " << global_5.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_5.power.readOp.gate_leakage << " (mW/mm)\n";
+ cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
+ cout <<endl;
+ cout << " 10% Overhead\n\tRepeater size - "<< global_10.area.h <<
+ " \n\tRepeater spacing - " << global_10.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_10.delay *1e6<< " (ns/mm)"
+ " \n\tPowerD - " << global_10.power.readOp.dynamic *1e6<< " (nJ/mm)"
+ " \n\tPowerL - " << global_10.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_10.power.readOp.gate_leakage << " (mW/mm)\n";
+ cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
+ cout <<endl;
+ cout << " 20% Overhead\n\tRepeater size - "<< global_20.area.h <<
+ " \n\tRepeater spacing - " << global_20.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_20.delay *1e6<< " (ns/mm)"
+ " \n\tPowerD - " << global_20.power.readOp.dynamic *1e6<< " (nJ/mm)"
+ " \n\tPowerL - " << global_20.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_20.power.readOp.gate_leakage << " (mW/mm)\n";
+ cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
+ cout <<endl;
+ cout << " 30% Overhead\n\tRepeater size - "<< global_30.area.h <<
+ " \n\tRepeater spacing - " << global_30.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_30.delay *1e6<< " (ns/mm)"
+ " \n\tPowerD - " << global_30.power.readOp.dynamic *1e6<< " (nJ/mm)"
+ " \n\tPowerL - " << global_30.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_30.power.readOp.gate_leakage << " (mW/mm)\n";
+ cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
+ cout <<endl;
+ cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\tdelay and power "
+ "values of low-swing wires do not\n\thave a linear relationship with length." <<
+ " \n\tdelay - " << low_swing.delay *1e9<< " (ns)"
+ " \n\tpowerD - " << low_swing.power.readOp.dynamic *1e9<< " (nJ)"
+ " \n\tPowerL - " << low_swing.power.readOp.leakage << " (mW)"
+ " \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage << " (mW)\n";
+ cout << "\tWire width - " <<wire_width_init * 2 /* differential */<< " microns\n";
+ cout << "\tWire spacing - " <<wire_spacing_init * 2 /* differential */<< " microns\n";
+ cout <<endl;
+ cout <<endl;
+
+}
+
diff --git a/ext/mcpat/cacti/wire.h b/ext/mcpat/cacti/wire.h
new file mode 100644
index 000000000..51d55afff
--- /dev/null
+++ b/ext/mcpat/cacti/wire.h
@@ -0,0 +1,124 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __WIRE_H__
+#define __WIRE_H__
+
+#include <iostream>
+#include <list>
+
+#include "assert.h"
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "parameter.h"
+
+class Wire : public Component
+{
+ public:
+ Wire(enum Wire_type wire_model, double len /* in u*/,
+ int nsense = 1/* no. of sense amps connected to the low-swing wire */,
+ double width_scaling = 1,
+ double spacing_scaling = 1,
+ enum Wire_placement wire_placement = outside_mat,
+ double resistivity = CU_RESISTIVITY,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ ~Wire();
+
+ Wire( double width_scaling = 1,
+ double spacing_scaling = 1,
+ enum Wire_placement wire_placement = outside_mat,
+ double resistivity = CU_RESISTIVITY,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+ ); // should be used only once for initializing static members
+ void init_wire();
+
+ void calculate_wire_stats();
+ void delay_optimal_wire();
+ double wire_cap(double len, bool call_from_outside=false);
+ double wire_res(double len);
+ void low_swing_model();
+ double signal_fall_time();
+ double signal_rise_time();
+ double sense_amp_input_cap();
+
+ enum Wire_type wt;
+ double wire_spacing;
+ double wire_width;
+ enum Wire_placement wire_placement;
+ double repeater_size;
+ double repeater_spacing;
+ double wire_length;
+ double in_rise_time, out_rise_time;
+
+ void set_in_rise_time(double rt)
+ {
+ in_rise_time = rt;
+ }
+ static Component global;
+ static Component global_5;
+ static Component global_10;
+ static Component global_20;
+ static Component global_30;
+ static Component low_swing;
+ static double wire_width_init;
+ static double wire_spacing_init;
+ void print_wire();
+
+ private:
+
+ int nsense; // no. of sense amps connected to a low-swing wire if it
+ // is broadcasting data to multiple destinations
+ // width and spacing scaling factor can be used
+ // to model low level wires or special
+ // fat wires
+ double w_scale, s_scale;
+ double resistivity;
+ powerDef wire_model (double space, double size, double *delay);
+ list <Component> repeated_wire;
+ void update_fullswing();
+ static int initialized;
+
+
+ //low-swing
+ Component transmitter;
+ Component l_wire;
+ Component sense_amp;
+
+ double min_w_pmos;
+
+ TechnologyParameter::DeviceType *deviceType;
+
+};
+
+#endif