summaryrefslogtreecommitdiff
path: root/ext/mcpat/cacti/Ucache.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mcpat/cacti/Ucache.cc')
-rw-r--r--ext/mcpat/cacti/Ucache.cc1404
1 files changed, 711 insertions, 693 deletions
diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc
index f3e1227df..ada9c5aa1 100644
--- a/ext/mcpat/cacti/Ucache.cc
+++ b/ext/mcpat/cacti/Ucache.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -54,176 +55,163 @@ using namespace std;
const uint32_t nthreads = NTHREADS;
-void min_values_t::update_min_values(const min_values_t * val)
-{
- min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
- min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
- min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
- min_area = (min_area > val->min_area) ? val->min_area : min_area;
- min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
+void min_values_t::update_min_values(const min_values_t * val) {
+ min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
+ min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
+ min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
+ min_area = (min_area > val->min_area) ? val->min_area : min_area;
+ min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
}
-void min_values_t::update_min_values(const uca_org_t & res)
-{
- min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
- min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
- min_area = (min_area > res.area) ? res.area : min_area;
- min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
+void min_values_t::update_min_values(const uca_org_t & res) {
+ min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
+ min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res.area) ? res.area : min_area;
+ min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
}
-void min_values_t::update_min_values(const nuca_org_t * res)
-{
- min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
- min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
- min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
- min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
+void min_values_t::update_min_values(const nuca_org_t * res) {
+ min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
+ min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
+ min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
}
-void min_values_t::update_min_values(const mem_array * res)
-{
- min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
- min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
- min_area = (min_area > res->area) ? res->area : min_area;
- min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
+void min_values_t::update_min_values(const mem_array * res) {
+ min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
+ min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->area) ? res->area : min_area;
+ min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
}
-void * calc_time_mt_wrapper(void * void_obj)
-{
- calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
- uint32_t tid = calc_obj->tid;
- list<mem_array *> & data_arr = calc_obj->data_arr;
- list<mem_array *> & tag_arr = calc_obj->tag_arr;
- bool is_tag = calc_obj->is_tag;
- bool pure_ram = calc_obj->pure_ram;
- bool pure_cam = calc_obj->pure_cam;
- bool is_main_mem = calc_obj->is_main_mem;
- double Nspd_min = calc_obj->Nspd_min;
- min_values_t * data_res = calc_obj->data_res;
- min_values_t * tag_res = calc_obj->tag_res;
-
- data_arr.clear();
- data_arr.push_back(new mem_array);
- tag_arr.clear();
- tag_arr.push_back(new mem_array);
-
- uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
- uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
-
-
- bool is_valid_partition;
- int wt_min, wt_max;
-
- if (g_ip->force_wiretype) {
- if (g_ip->wt == 0) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
- else {
- wt_min = Global;
- wt_max = Low_swing-1;
- }
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
+void * calc_time_mt_wrapper(void * void_obj) {
+ calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
+ uint32_t tid = calc_obj->tid;
+ list<mem_array *> & data_arr = calc_obj->data_arr;
+ list<mem_array *> & tag_arr = calc_obj->tag_arr;
+ bool is_tag = calc_obj->is_tag;
+ bool pure_ram = calc_obj->pure_ram;
+ bool pure_cam = calc_obj->pure_cam;
+ bool is_main_mem = calc_obj->is_main_mem;
+ double Nspd_min = calc_obj->Nspd_min;
+ min_values_t * data_res = calc_obj->data_res;
+ min_values_t * tag_res = calc_obj->tag_res;
- for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
- {
- for (int wr = wt_min; wr <= wt_max; wr++)
- {
- for (uint32_t iter = tid; iter < niter; iter += nthreads)
- {
- // reconstruct Ndwl, Ndbl, Ndcm
- unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
- unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
- unsigned int Ndcm = 1 << (iter % Ndcm_niter);
- for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
- {
- for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
- {
- //for debuging
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = g_ip->wt;
- Ndwl = g_ip->ndwl;
- Ndbl = g_ip->ndbl;
- Ndcm = g_ip->ndcm;
- if(g_ip->nspd != 0) {
- Nspd = g_ip->nspd;
- }
- if(g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = g_ip->ndsam1;
- Ndsam_lev_2 = g_ip->ndsam2;
- }
- }
-
- if (is_tag == true)
- {
- is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- tag_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
- // If it's a fully-associative cache, the data array partition parameters are identical to that of
- // the tag array, so compute data array partition properties also here.
- if (is_tag == false || g_ip->fully_assoc)
- {
- is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- data_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
-
- if (is_valid_partition)
- {
- if (is_tag == true)
- {
- tag_arr.back()->wt = (enum Wire_type) wr;
- tag_res->update_min_values(tag_arr.back());
- tag_arr.push_back(new mem_array);
- }
- if (is_tag == false || g_ip->fully_assoc)
- {
- data_arr.back()->wt = (enum Wire_type) wr;
- data_res->update_min_values(data_arr.back());
- data_arr.push_back(new mem_array);
- }
- }
+ data_arr.clear();
+ data_arr.push_back(new mem_array);
+ tag_arr.clear();
+ tag_arr.push_back(new mem_array);
+
+ uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
+ uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
+
+
+ bool is_valid_partition;
+ int wt_min, wt_max;
+
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == 0) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing - 1;
+ }
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = wt_max;
- iter = niter;
- if(g_ip->nspd != 0) {
- Nspd = MAXDATASPD;
- }
- if (g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = MAX_COL_MUX+1;
- Ndsam_lev_2 = MAX_COL_MUX+1;
+ for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) {
+ for (int wr = wt_min; wr <= wt_max; wr++) {
+ for (uint32_t iter = tid; iter < niter; iter += nthreads) {
+ // reconstruct Ndwl, Ndbl, Ndcm
+ unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
+ unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter);
+ unsigned int Ndcm = 1 << (iter % Ndcm_niter);
+ for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX;
+ Ndsam_lev_1 *= 2) {
+ for (unsigned int Ndsam_lev_2 = 1;
+ Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) {
+ //for debuging
+ if (g_ip->force_cache_config && is_tag == false) {
+ wr = g_ip->wt;
+ Ndwl = g_ip->ndwl;
+ Ndbl = g_ip->ndbl;
+ Ndcm = g_ip->ndcm;
+ if (g_ip->nspd != 0) {
+ Nspd = g_ip->nspd;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = g_ip->ndsam1;
+ Ndsam_lev_2 = g_ip->ndsam2;
+ }
+ }
+
+ if (is_tag == true) {
+ is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ tag_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+ // If it's a fully-associative cache, the data array partition parameters are identical to that of
+ // the tag array, so compute data array partition properties also here.
+ if (is_tag == false || g_ip->fully_assoc) {
+ is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ data_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+
+ if (is_valid_partition) {
+ if (is_tag == true) {
+ tag_arr.back()->wt = (enum Wire_type) wr;
+ tag_res->update_min_values(tag_arr.back());
+ tag_arr.push_back(new mem_array);
+ }
+ if (is_tag == false || g_ip->fully_assoc) {
+ data_arr.back()->wt = (enum Wire_type) wr;
+ data_res->update_min_values(data_arr.back());
+ data_arr.push_back(new mem_array);
+ }
+ }
+
+ if (g_ip->force_cache_config && is_tag == false) {
+ wr = wt_max;
+ iter = niter;
+ if (g_ip->nspd != 0) {
+ Nspd = MAXDATASPD;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = MAX_COL_MUX + 1;
+ Ndsam_lev_2 = MAX_COL_MUX + 1;
+ }
+ }
+ }
}
}
- }
}
- }
}
- }
- delete data_arr.back();
- delete tag_arr.back();
- data_arr.pop_back();
- tag_arr.pop_back();
+ delete data_arr.back();
+ delete tag_arr.back();
+ data_arr.pop_back();
+ tag_arr.pop_back();
- pthread_exit(NULL);
+#ifndef DEBUG
+ pthread_exit(NULL);
+#else
+ return NULL;
+#endif
}
@@ -242,423 +230,448 @@ bool calculate_time(
int flag_results_populate,
results_mem_array *ptr_results,
uca_org_t *ptr_fin_res,
- bool is_main_mem)
-{
- DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
+ bool is_main_mem) {
+ DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
- if (dyn_p.is_valid == false)
- {
- return false;
- }
+ if (dyn_p.is_valid == false) {
+ return false;
+ }
- UCA * uca = new UCA(dyn_p);
+ UCA * uca = new UCA(dyn_p);
- if (flag_results_populate)
- { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
- }
- else
- {
- int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
- int num_mats = uca->bank.dp.num_mats;
- bool is_fa = uca->bank.dp.fully_assoc;
- bool pure_cam = uca->bank.dp.pure_cam;
+ //For the final solution, populate the ptr_results data structure
+ //-- TODO: copy only necessary variables
+ if (flag_results_populate) {
+ } else {
+ int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
+ int num_mats = uca->bank.dp.num_mats;
+ bool is_fa = uca->bank.dp.fully_assoc;
+ bool pure_cam = uca->bank.dp.pure_cam;
ptr_array->Ndwl = Ndwl;
- ptr_array->Ndbl = Ndbl;
- ptr_array->Nspd = Nspd;
- ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
- ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
- ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
- ptr_array->access_time = uca->access_time;
- ptr_array->cycle_time = uca->cycle_time;
- ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
- ptr_array->area_ram_cells = uca->area_all_dataramcells;
- ptr_array->area = uca->area.get_area();
- ptr_array->height = uca->area.h;
- ptr_array->width = uca->area.w;
- ptr_array->mat_height = uca->bank.mat.area.h;
- ptr_array->mat_length = uca->bank.mat.area.w;
- ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
- ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
- ptr_array->power = uca->power;
- ptr_array->delay_senseamp_mux_decoder =
- MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
- uca->delay_array_to_sa_mux_lev_2_decoder);
- ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
- ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
-
- ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
- ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
- ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
- ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
- ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
- ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
- ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
- ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
- ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
- ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
-
- ptr_array->all_banks_height = uca->area.h;
- ptr_array->all_banks_width = uca->area.w;
- ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
-
- ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
- ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
- ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
-// cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl;
- ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
-// cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl;
- ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
- ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
- ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
- ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
- ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
- ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
- ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
- ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
- ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bitlines = uca->bank.mat.power_bitline;
- ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_sense_amps = uca->bank.mat.power_sa;
- ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
- ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
- ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_comparators = uca->bank.mat.power_comparator;
- ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
-
-// cout << " num of mats: " << dyn_p.num_mats << endl;
- if (is_fa || pure_cam)
- {
- ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
-// cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl;
- ptr_array->power_htree_out_search = uca->bank.htree_out_search->power;
-// cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl;
- ptr_array->power_searchline = uca->bank.mat.power_searchline;
-// cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl;
- ptr_array->power_searchline.searchOp.dynamic *= num_mats;
- ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
- ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchlines = uca->bank.mat.power_matchline;
- ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
- ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
-// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl;
- }
-
- ptr_array->activate_energy = uca->activate_energy;
- ptr_array->read_energy = uca->read_energy;
- ptr_array->write_energy = uca->write_energy;
- ptr_array->precharge_energy = uca->precharge_energy;
- ptr_array->refresh_power = uca->refresh_power;
- ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
- ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
- ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
-
- ptr_array->precharge_delay = uca->precharge_delay;
-
-
-// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl;
-//
-// if (!(is_fa || pure_cam))
-// {
-// cout << " num of cols: " << dyn_p.num_c_subarray << endl;
-// }
-// else if (is_fa)
-// {
-// cout << " num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl;
-// } else
-// cout << " num of cols: " << dyn_p.tag_num_c_subarray<< endl;
-// cout << uca->bank.mat.subarray.get_total_cell_area()<<endl;
- }
+ ptr_array->Ndbl = Ndbl;
+ ptr_array->Nspd = Nspd;
+ ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
+ ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
+ ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
+ ptr_array->access_time = uca->access_time;
+ ptr_array->cycle_time = uca->cycle_time;
+ ptr_array->multisubbank_interleave_cycle_time =
+ uca->multisubbank_interleave_cycle_time;
+ ptr_array->area_ram_cells = uca->area_all_dataramcells;
+ ptr_array->area = uca->area.get_area();
+ ptr_array->height = uca->area.h;
+ ptr_array->width = uca->area.w;
+ ptr_array->mat_height = uca->bank.mat.area.h;
+ ptr_array->mat_length = uca->bank.mat.area.w;
+ ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
+ ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
+ ptr_array->power = uca->power;
+ ptr_array->delay_senseamp_mux_decoder =
+ MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
+ uca->delay_array_to_sa_mux_lev_2_decoder);
+ ptr_array->delay_before_subarray_output_driver =
+ uca->delay_before_subarray_output_driver;
+ ptr_array->delay_from_subarray_output_driver_to_output =
+ uca->delay_from_subarray_out_drv_to_out;
+
+ ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
+ ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
+ ptr_array->delay_row_predecode_driver_and_block =
+ uca->bank.mat.r_predec->delay;
+ ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
+ ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
+ ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
+ ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
+ ptr_array->delay_subarray_output_driver =
+ uca->bank.mat.delay_subarray_out_drv_htree;
+ ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
+ ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
+
+ ptr_array->all_banks_height = uca->area.h;
+ ptr_array->all_banks_width = uca->area.w;
+ ptr_array->area_efficiency = uca->area_all_dataramcells * 100 /
+ (uca->area.get_area());
+
+ ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
+ ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
+ ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
+ ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
+
+ ptr_array->power_row_predecoder_drivers =
+ uca->bank.mat.r_predec->driver_power;
+ ptr_array->power_row_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_row_predecoder_blocks =
+ uca->bank.mat.r_predec->block_power;
+ ptr_array->power_row_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
+ ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_drivers =
+ uca->bank.mat.b_mux_predec->driver_power;
+ ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_blocks =
+ uca->bank.mat.b_mux_predec->block_power;
+ ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
+ ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers =
+ uca->bank.mat.sa_mux_lev_1_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks =
+ uca->bank.mat.sa_mux_lev_1_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_decoders =
+ uca->bank.mat.power_sa_mux_lev_1_decoders;
+ ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers =
+ uca->bank.mat.sa_mux_lev_2_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks =
+ uca->bank.mat.sa_mux_lev_2_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_decoders =
+ uca->bank.mat.power_sa_mux_lev_2_decoders;
+ ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bitlines = uca->bank.mat.power_bitline;
+ ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_sense_amps = uca->bank.mat.power_sa;
+ ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_prechg_eq_drivers =
+ uca->bank.mat.power_bl_precharge_eq_drv;
+ ptr_array->power_prechg_eq_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_output_drivers_at_subarray =
+ uca->bank.mat.power_subarray_out_drv;
+ ptr_array->power_output_drivers_at_subarray.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_comparators = uca->bank.mat.power_comparator;
+ ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ if (is_fa || pure_cam) {
+ ptr_array->power_htree_in_search =
+ uca->bank.htree_in_search->power;
+ ptr_array->power_htree_out_search =
+ uca->bank.htree_out_search->power;
+ ptr_array->power_searchline = uca->bank.mat.power_searchline;
+ ptr_array->power_searchline.searchOp.dynamic *= num_mats;
+ ptr_array->power_searchline_precharge =
+ uca->bank.mat.power_searchline_precharge;
+ ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchlines = uca->bank.mat.power_matchline;
+ ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_precharge =
+ uca->bank.mat.power_matchline_precharge;
+ ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_to_wordline_drv =
+ uca->bank.mat.power_ml_to_ram_wl_drv;
+ }
+
+ ptr_array->activate_energy = uca->activate_energy;
+ ptr_array->read_energy = uca->read_energy;
+ ptr_array->write_energy = uca->write_energy;
+ ptr_array->precharge_energy = uca->precharge_energy;
+ ptr_array->refresh_power = uca->refresh_power;
+ ptr_array->leak_power_subbank_closed_page =
+ uca->leak_power_subbank_closed_page;
+ ptr_array->leak_power_subbank_open_page =
+ uca->leak_power_subbank_open_page;
+ ptr_array->leak_power_request_and_reply_networks =
+ uca->leak_power_request_and_reply_networks;
+
+ ptr_array->precharge_delay = uca->precharge_delay;
+ }
- delete uca;
- return true;
+ delete uca;
+ return true;
}
-bool check_uca_org(uca_org_t & u, min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
+bool check_uca_org(uca_org_t & u, min_values_t *minval) {
+ if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
+ g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage) * 100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area) / minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
}
-bool check_mem_org(mem_array & u, const min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
+bool check_mem_org(mem_array & u, const min_values_t *minval) {
+ if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
+ g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage) * 100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area) / minval->min_area) * 100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
}
-void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- float d, a, dp, lp, c;
-
- dp = g_ip->dynamic_power_wt;
- lp = g_ip->leakage_power_wt;
- a = g_ip->area_wt;
- d = g_ip->delay_wt;
- c = g_ip->cycle_time_wt;
+void find_optimal_uca(uca_org_t *res, min_values_t * minval,
+ list<uca_org_t> & ulist) {
+ double cost = 0;
+ double min_cost = BIGNUM;
+ float d, a, dp, lp, c;
- if (ulist.empty() == true)
- {
- cout << "ERROR: no valid cache organizations found" << endl;
- exit(0);
- }
+ dp = g_ip->dynamic_power_wt;
+ lp = g_ip->leakage_power_wt;
+ a = g_ip->area_wt;
+ d = g_ip->delay_wt;
+ c = g_ip->cycle_time_wt;
- for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
- {
- if (g_ip->ed == 1)
- {
- cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
- }
- else if (g_ip->ed == 2)
- {
- cost = ((niter)->access_time/minval->min_delay)*
- ((niter)->access_time/minval->min_delay)*
- ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
+ if (ulist.empty() == true) {
+ cout << "ERROR: no valid cache organizations found" << endl;
+ exit(0);
}
- else
- {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- bool v = check_uca_org(*niter, minval);
- //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v)
- {
- cost = (d * ((niter)->access_time/minval->min_delay) +
- c * ((niter)->cycle_time/minval->min_cyc) +
- dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
- lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
- a * ((niter)->area/minval->min_area));
- //fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- *res = (*(niter));
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
+
+ for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end();
+ niter++) {
+ if (g_ip->ed == 1) {
+ cost = ((niter)->access_time / minval->min_delay) *
+ ((niter)->power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ } else if (g_ip->ed == 2) {
+ cost = ((niter)->access_time / minval->min_delay) *
+ ((niter)->access_time / minval->min_delay) *
+ ((niter)->power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ } else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ bool v = check_uca_org(*niter, minval);
+
+ if (v) {
+ cost = (d * ((niter)->access_time / minval->min_delay) +
+ c * ((niter)->cycle_time / minval->min_cyc) +
+ dp * ((niter)->power.readOp.dynamic / minval->min_dyn) +
+ lp *
+ ((niter)->power.readOp.leakage / minval->min_leakage) +
+ a * ((niter)->area / minval->min_area));
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ niter = ulist.erase(niter);
+ if (niter != ulist.begin())
+ niter--;
+ }
+ } else {
+ niter = ulist.erase(niter);
+ if (niter != ulist.begin())
+ niter--;
+ }
}
- }
- else {
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
- }
}
- }
- if (min_cost == BIGNUM)
- {
- cout << "ERROR: no cache organizations met optimization criteria" << endl;
- exit(0);
- }
+ if (min_cost == BIGNUM) {
+ cout << "ERROR: no cache organizations met optimization criteria"
+ << endl;
+ exit(0);
+ }
}
-void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
-{
- double cost = BIGNUM;
- double cur_cost;
- double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
- mem_array * res = NULL;
+void filter_tag_arr(const min_values_t * min, list<mem_array *> & list) {
+ double cost = BIGNUM;
+ double cur_cost;
+ double wt_delay = g_ip->delay_wt;
+ double wt_dyn = g_ip->dynamic_power_wt;
+ double wt_leakage = g_ip->leakage_power_wt;
+ double wt_cyc = g_ip->cycle_time_wt;
+ double wt_area = g_ip->area_wt;
+ mem_array * res = NULL;
- if (list.empty() == true)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(1);
- }
+ if (list.empty() == true) {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(1);
+ }
- while (list.empty() != true)
- {
- bool v = check_mem_org(*list.back(), min);
- if (v)
- {
- cur_cost = wt_delay * (list.back()->access_time/min->min_delay) +
- wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
- wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
- wt_area * (list.back()->area/min->min_area) +
- wt_cyc * (list.back()->cycle_time/min->min_cyc);
- }
- else
- {
- cur_cost = BIGNUM;
- }
- if (cur_cost < cost)
- {
- if (res != NULL)
- {
- delete res;
- }
- cost = cur_cost;
- res = list.back();
+ while (list.empty() != true) {
+ bool v = check_mem_org(*list.back(), min);
+ if (v) {
+ cur_cost = wt_delay * (list.back()->access_time / min->min_delay) +
+ wt_dyn * (list.back()->power.readOp.dynamic /
+ min->min_dyn) +
+ wt_leakage * (list.back()->power.readOp.leakage /
+ min->min_leakage) +
+ wt_area * (list.back()->area / min->min_area) +
+ wt_cyc * (list.back()->cycle_time / min->min_cyc);
+ } else {
+ cur_cost = BIGNUM;
+ }
+ if (cur_cost < cost) {
+ if (res != NULL) {
+ delete res;
+ }
+ cost = cur_cost;
+ res = list.back();
+ } else {
+ delete list.back();
+ }
+ list.pop_back();
}
- else
- {
- delete list.back();
+ if (!res) {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(0);
}
- list.pop_back();
- }
- if(!res)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(0);
- }
- list.push_back(res);
+ list.push_back(res);
}
-void filter_data_arr(list<mem_array *> & curr_list)
-{
- if (curr_list.empty() == true)
- {
- cout << "ERROR: no valid data array organizations found" << endl;
- exit(1);
- }
+void filter_data_arr(list<mem_array *> & curr_list) {
+ if (curr_list.empty() == true) {
+ cout << "ERROR: no valid data array organizations found" << endl;
+ exit(1);
+ }
- list<mem_array *>::iterator iter;
+ list<mem_array *>::iterator iter;
- for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
- {
- mem_array * m = *iter;
+ for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) {
+ mem_array * m = *iter;
- if (m == NULL) exit(1);
+ if (m == NULL) exit(1);
- if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
- ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
- {
- delete m;
- iter = curr_list.erase(iter);
- iter --;
+ if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay >
+ 0.5) &&
+ ((m->power.readOp.dynamic - m->arr_min->min_dyn) /
+ m->arr_min->min_dyn > 0.5)) {
+ delete m;
+ iter = curr_list.erase(iter);
+ iter --;
+ }
}
- }
}
@@ -675,210 +688,199 @@ void filter_data_arr(list<mem_array *> & curr_list)
* above results
* 4. Cache model with least cost is picked from sol_list
*/
-void solve(uca_org_t *fin_res)
-{
- bool is_dram = false;
- int pure_ram = g_ip->pure_ram;
- bool pure_cam = g_ip->pure_cam;
-
- init_tech_params(g_ip->F_sz_um, false);
-
-
- list<mem_array *> tag_arr (0);
- list<mem_array *> data_arr(0);
- list<mem_array *>::iterator miter;
- list<uca_org_t> sol_list(1, uca_org_t());
-
- fin_res->tag_array.access_time = 0;
- fin_res->tag_array.Ndwl = 0;
- fin_res->tag_array.Ndbl = 0;
- fin_res->tag_array.Nspd = 0;
- fin_res->tag_array.deg_bl_muxing = 0;
- fin_res->tag_array.Ndsam_lev_1 = 0;
- fin_res->tag_array.Ndsam_lev_2 = 0;
-
-
- // distribute calculate_time() execution to multiple threads
- calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
- pthread_t threads[nthreads];
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].tid = t;
- calc_array[t].pure_ram = pure_ram;
- calc_array[t].pure_cam = pure_cam;
- calc_array[t].data_res = new min_values_t();
- calc_array[t].tag_res = new min_values_t();
- }
-
- bool is_tag;
- uint32_t ram_cell_tech_type;
-
- // If it's a cache, first calculate the area, delay and power for all tag array partitions.
- if (!(pure_ram||pure_cam||g_ip->fully_assoc))
- { //cache
- is_tag = true;
- ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
- init_tech_params(g_ip->F_sz_um, is_tag);
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = false;
- calc_array[t].Nspd_min = 0.125;
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+void solve(uca_org_t *fin_res) {
+ bool is_dram = false;
+ int pure_ram = g_ip->pure_ram;
+ bool pure_cam = g_ip->pure_cam;
+
+ init_tech_params(g_ip->F_sz_um, false);
+
+
+ list<mem_array *> tag_arr (0);
+ list<mem_array *> data_arr(0);
+ list<mem_array *>::iterator miter;
+ list<uca_org_t> sol_list(1, uca_org_t());
+
+ fin_res->tag_array.access_time = 0;
+ fin_res->tag_array.Ndwl = 0;
+ fin_res->tag_array.Ndbl = 0;
+ fin_res->tag_array.Nspd = 0;
+ fin_res->tag_array.deg_bl_muxing = 0;
+ fin_res->tag_array.Ndsam_lev_1 = 0;
+ fin_res->tag_array.Ndsam_lev_2 = 0;
+
+
+ // distribute calculate_time() execution to multiple threads
+ calc_time_mt_wrapper_struct * calc_array =
+ new calc_time_mt_wrapper_struct[nthreads];
+ pthread_t threads[nthreads];
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].tid = t;
+ calc_array[t].pure_ram = pure_ram;
+ calc_array[t].pure_cam = pure_cam;
+ calc_array[t].data_res = new min_values_t();
+ calc_array[t].tag_res = new min_values_t();
}
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
- }
+ bool is_tag;
+ uint32_t ram_cell_tech_type;
+
+ // If it's a cache, first calculate the area, delay and power for all tag array partitions.
+ if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache
+ is_tag = true;
+ ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) ||
+ (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = false;
+ calc_array[t].Nspd_min = 0.125;
+#ifndef DEBUG
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
+ (void *)(&(calc_array[t])));
+#else
+ calc_time_mt_wrapper((void *)(&(calc_array[t])));
+#endif
+ }
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- calc_array[t].tag_arr.sort(mem_array::lt);
- tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+#ifndef DEBUG
+ for (uint32_t t = 0; t < nthreads; t++) {
+ pthread_join(threads[t], NULL);
+ }
+#endif
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ calc_array[t].tag_arr.sort(mem_array::lt);
+ tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+ }
}
- }
- // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
-// if (!g_ip->fully_assoc)
-// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
+ // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
+ // in the new cacti, cam, fully_associative cache are processed as single array in the data portion
is_tag = false;
ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
init_tech_params(g_ip->F_sz_um, is_tag);
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = g_ip->is_main_mem;
- if (!(pure_cam||g_ip->fully_assoc))
- {
- calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
- }
- else
- {
- calc_array[t].Nspd_min = 1;
- }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = g_ip->is_main_mem;
+ if (!(pure_cam || g_ip->fully_assoc)) {
+ calc_array[t].Nspd_min = (double)(g_ip->out_w) /
+ (double)(g_ip->block_sz * 8);
+ } else {
+ calc_array[t].Nspd_min = 1;
+ }
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+#ifndef DEBUG
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
+ (void *)(&(calc_array[t])));
+#else
+ calc_time_mt_wrapper((void *)(&(calc_array[t])));
+#endif
}
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
+#ifndef DEBUG
+ for (uint32_t t = 0; t < nthreads; t++) {
+ pthread_join(threads[t], NULL);
}
+#endif
data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- }
-// }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- min_values_t * d_min = new min_values_t();
- min_values_t * t_min = new min_values_t();
- min_values_t * cache_min = new min_values_t();
+ }
- for (uint32_t t = 0; t < nthreads; t++)
- {
- d_min->update_min_values(calc_array[t].data_res);
- t_min->update_min_values(calc_array[t].tag_res);
- }
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- (*miter)->arr_min = d_min;
- }
+ min_values_t * d_min = new min_values_t();
+ min_values_t * t_min = new min_values_t();
+ min_values_t * cache_min = new min_values_t();
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
- filter_data_arr(data_arr);
- if(!(pure_ram||pure_cam||g_ip->fully_assoc))
- {
- filter_tag_arr(t_min, tag_arr);
- }
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
+ for (uint32_t t = 0; t < nthreads; t++) {
+ d_min->update_min_values(calc_array[t].data_res);
+ t_min->update_min_values(calc_array[t].tag_res);
+ }
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ (*miter)->arr_min = d_min;
+ }
- if (pure_ram||pure_cam||g_ip->fully_assoc)
- {
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = NULL;
- curr_org.data_array2 = (*miter);
+ filter_data_arr(data_arr);
+ if (!(pure_ram || pure_cam || g_ip->fully_assoc)) {
+ filter_tag_arr(t_min, tag_arr);
+ }
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
+ if (pure_ram || pure_cam || g_ip->fully_assoc) {
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = NULL;
+ curr_org.data_array2 = (*miter);
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
- sol_list.push_back(uca_org_t());
- }
- }
- else
- {
- while (tag_arr.empty() != true)
- {
- mem_array * arr_temp = (tag_arr.back());
- //delete tag_arr.back();
- tag_arr.pop_back();
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = arr_temp;
- curr_org.data_array2 = (*miter);
+ sol_list.push_back(uca_org_t());
+ }
+ } else {
+ while (tag_arr.empty() != true) {
+ mem_array * arr_temp = (tag_arr.back());
+ tag_arr.pop_back();
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = arr_temp;
+ curr_org.data_array2 = (*miter);
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
- sol_list.push_back(uca_org_t());
- }
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
}
- }
- sol_list.pop_back();
+ sol_list.pop_back();
- find_optimal_uca(fin_res, cache_min, sol_list);
+ find_optimal_uca(fin_res, cache_min, sol_list);
- sol_list.clear();
+ sol_list.clear();
- for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
- {
- if (*miter != fin_res->data_array2)
- {
- delete *miter;
+ for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) {
+ if (*miter != fin_res->data_array2) {
+ delete *miter;
+ }
}
- }
- data_arr.clear();
+ data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- delete calc_array[t].data_res;
- delete calc_array[t].tag_res;
- }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ delete calc_array[t].data_res;
+ delete calc_array[t].tag_res;
+ }
- delete [] calc_array;
- delete cache_min;
- delete d_min;
- delete t_min;
+ delete [] calc_array;
+ delete cache_min;
+ delete d_min;
+ delete t_min;
}
void update(uca_org_t *fin_res)
@@ -886,7 +888,14 @@ void update(uca_org_t *fin_res)
if(fin_res->tag_array2)
{
init_tech_params(g_ip->F_sz_um,true);
- DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam,
+ fin_res->tag_array2->Nspd,
+ fin_res->tag_array2->Ndwl,
+ fin_res->tag_array2->Ndbl,
+ fin_res->tag_array2->Ndcm,
+ fin_res->tag_array2->Ndsam_lev_1,
+ fin_res->tag_array2->Ndsam_lev_2,
+ g_ip->is_main_mem);
if(tag_arr_dyn_p.is_valid)
{
UCA * tag_arr = new UCA(tag_arr_dyn_p);
@@ -894,12 +903,20 @@ void update(uca_org_t *fin_res)
}
else
{
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback"
+ << endl;
exit(1);
}
}
init_tech_params(g_ip->F_sz_um,false);
- DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam,
+ fin_res->data_array2->Nspd,
+ fin_res->data_array2->Ndwl,
+ fin_res->data_array2->Ndbl,
+ fin_res->data_array2->Ndcm,
+ fin_res->data_array2->Ndsam_lev_1,
+ fin_res->data_array2->Ndsam_lev_2,
+ g_ip->is_main_mem);
if(data_arr_dyn_p.is_valid)
{
UCA * data_arr = new UCA(data_arr_dyn_p);
@@ -907,7 +924,8 @@ void update(uca_org_t *fin_res)
}
else
{
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback"
+ << endl;
exit(1);
}