summaryrefslogtreecommitdiff
path: root/ext/mcpat/cacti/nuca.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mcpat/cacti/nuca.cc')
-rw-r--r--ext/mcpat/cacti/nuca.cc1007
1 files changed, 503 insertions, 504 deletions
diff --git a/ext/mcpat/cacti/nuca.cc b/ext/mcpat/cacti/nuca.cc
index 2aabe843f..e0b4dcdaf 100644
--- a/ext/mcpat/cacti/nuca.cc
+++ b/ext/mcpat/cacti/nuca.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,89 +37,86 @@
#include "Ucache.h"
#include "nuca.h"
-unsigned int MIN_BANKSIZE=65536;
+unsigned int MIN_BANKSIZE = 65536;
#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
#define CONTR_2_BANK_LAT 0
int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
- Nuca::Nuca(
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- ):deviceType(dt)
-{
- init_cont();
+Nuca::Nuca(
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+): deviceType(dt) {
+ init_cont();
}
void
-Nuca::init_cont()
-{
- FILE *cont;
- char line[5000];
- char jk[5000];
- cont = fopen("contention.dat", "r");
- if (!cont) {
- cout << "contention.dat file is missing!\n";
- exit(0);
- }
-
- for(int i=0; i<2; i++) {
- for(int j=2; j<5; j++) {
- for(int k=0; k<ROUTER_TYPES; k++) {
- for(int l=0;l<7; l++) {
- int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
- assert(fscanf(cont, "%[^\n]\n", line) != EOF);
- sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
- &temp[4], &temp[5], &temp[6], &temp[7]);
+Nuca::init_cont() {
+ FILE *cont;
+ char line[5000];
+ char jk[5000];
+ cont = fopen("contention.dat", "r");
+ if (!cont) {
+ cout << "contention.dat file is missing!\n";
+ exit(0);
+ }
+
+ for (int i = 0; i < 2; i++) {
+ for (int j = 2; j < 5; j++) {
+ for (int k = 0; k < ROUTER_TYPES; k++) {
+ for (int l = 0; l < 7; l++) {
+ int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
+ assert(fscanf(cont, "%[^\n]\n", line) != EOF);
+ sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
+ &temp[0], &temp[1], &temp[2], &temp[3],
+ &temp[4], &temp[5], &temp[6], &temp[7]);
+ }
+ }
}
- }
}
- }
- fclose(cont);
+ fclose(cont);
}
- void
-Nuca::print_cont_stats()
-{
- for(int i=0; i<2; i++) {
- for(int j=2; j<5; j++) {
- for(int k=0; k<ROUTER_TYPES; k++) {
- for(int l=0;l<7; l++) {
- for(int m=0;l<7; l++) {
- cout << cont_stats[i][j][k][l][m] << " ";
- }
- cout << endl;
+void
+Nuca::print_cont_stats() {
+ for (int i = 0; i < 2; i++) {
+ for (int j = 2; j < 5; j++) {
+ for (int k = 0; k < ROUTER_TYPES; k++) {
+ for (int l = 0; l < 7; l++) {
+ for (int m = 0; l < 7; l++) {
+ cout << cont_stats[i][j][k][l][m] << " ";
+ }
+ cout << endl;
+ }
+ }
}
- }
}
- }
- cout << endl;
+ cout << endl;
}
-Nuca::~Nuca(){
- for (int i = wt_min; i <= wt_max; i++) {
- delete wire_vertical[i];
- delete wire_horizontal[i];
- }
+Nuca::~Nuca() {
+ for (int i = wt_min; i <= wt_max; i++) {
+ delete wire_vertical[i];
+ delete wire_horizontal[i];
+ }
}
/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
- int
-Nuca::calc_cycles(double lat, double oper_freq)
-{
- //TODO: convert latch delay to FO4 */
- double cycle_time = (1.0/(oper_freq*1e9)); /*s*/
- cycle_time -= LATCH_DELAY;
- cycle_time -= FIXED_OVERHEAD;
-
- return (int)ceil(lat/cycle_time);
+int
+Nuca::calc_cycles(double lat, double oper_freq) {
+ //TODO: convert latch delay to FO4 */
+ double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
+ cycle_time -= LATCH_DELAY;
+ cycle_time -= FIXED_OVERHEAD;
+
+ return (int)ceil(lat / cycle_time);
}
nuca_org_t::~nuca_org_t() {
- // if(h_wire) delete h_wire;
- // if(v_wire) delete v_wire;
- // if(router) delete router;
+ // if(h_wire) delete h_wire;
+ // if(v_wire) delete v_wire;
+ // if(router) delete router;
}
/*
@@ -137,476 +135,477 @@ nuca_org_t::~nuca_org_t() {
* Finally include contention statistics and find the optimal
* NUCA configuration
*/
- void
-Nuca::sim_nuca()
-{
- /* temp variables */
- int it, ro, wr;
- int num_cyc;
- unsigned int i, j, k;
- unsigned int r, c;
- int l2_c;
- int bank_count = 0;
- uca_org_t ures;
- nuca_org_t *opt_n;
- mem_array tag, data;
- list<nuca_org_t *> nuca_list;
- Router *router_s[ROUTER_TYPES];
- router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
- router_s[0]->print_router();
- router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
- router_s[1]->print_router();
- router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
- router_s[2]->print_router();
-
- int core_in; // to store no. of cores
-
- /* to search diff grid organizations */
- double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
- curr_acclat;
- double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
- avg_leakage_power;
-
- double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
- int opt_rows = 0;
- int opt_columns = 0;
- double opt_totno_hops = 0;
- double opt_avg_hop = 0;
- double opt_dyn_power = 0, opt_leakage_power = 0;
- min_values_t minval;
-
- int bank_start = 0;
-
- int flit_width = 0;
-
- /* vertical and horizontal hop latency values */
- int ver_hop_lat, hor_hop_lat; /* in cycles */
-
-
- /* no. of different bank sizes to consider */
- int iterations;
-
-
- g_ip->nuca_cache_sz = g_ip->cache_sz;
- nuca_list.push_back(new nuca_org_t());
-
- if (g_ip->cache_level == 0) l2_c = 1;
- else l2_c = 0;
-
- if (g_ip->cores <= 4) core_in = 2;
- else if (g_ip->cores <= 8) core_in = 3;
- else if (g_ip->cores <= 16) core_in = 4;
- else {cout << "Number of cores should be <= 16!\n"; exit(0);}
-
-
- // set the lower bound to an appropriate value. this depends on cache associativity
- if (g_ip->assoc > 2) {
- i = 2;
- while (i != g_ip->assoc) {
- MIN_BANKSIZE *= 2;
- i *= 2;
- }
- }
-
- iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE);
-
- if (g_ip->force_wiretype)
- {
- if (g_ip->wt == Low_swing) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
+void
+Nuca::sim_nuca() {
+ /* temp variables */
+ int it, ro, wr;
+ int num_cyc;
+ unsigned int i, j, k;
+ unsigned int r, c;
+ int l2_c;
+ int bank_count = 0;
+ uca_org_t ures;
+ nuca_org_t *opt_n;
+ mem_array tag, data;
+ list<nuca_org_t *> nuca_list;
+ Router *router_s[ROUTER_TYPES];
+ router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
+ router_s[0]->print_router();
+ router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
+ router_s[1]->print_router();
+ router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
+ router_s[2]->print_router();
+
+ int core_in; // to store no. of cores
+
+ /* to search diff grid organizations */
+ double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
+ curr_acclat;
+ double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
+ avg_leakage_power;
+
+ double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
+ int opt_rows = 0;
+ int opt_columns = 0;
+ double opt_totno_hops = 0;
+ double opt_avg_hop = 0;
+ double opt_dyn_power = 0, opt_leakage_power = 0;
+ min_values_t minval;
+
+ int bank_start = 0;
+
+ int flit_width = 0;
+
+ /* vertical and horizontal hop latency values */
+ int ver_hop_lat, hor_hop_lat; /* in cycles */
+
+
+ /* no. of different bank sizes to consider */
+ int iterations;
+
+
+ g_ip->nuca_cache_sz = g_ip->cache_sz;
+ nuca_list.push_back(new nuca_org_t());
+
+ if (g_ip->cache_level == 0) l2_c = 1;
+ else l2_c = 0;
+
+ if (g_ip->cores <= 4) core_in = 2;
+ else if (g_ip->cores <= 8) core_in = 3;
+ else if (g_ip->cores <= 16) core_in = 4;
else {
- wt_min = Global;
- wt_max = Low_swing-1;
+ cout << "Number of cores should be <= 16!\n";
+ exit(0);
}
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
- if (g_ip->nuca_bank_count != 0) { // simulate just one bank
- if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
- g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
- g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
- fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
- }
- bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
- iterations = bank_start+1;
- g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count;
- }
- cout << "Simulating various NUCA configurations\n";
- for (it=bank_start; it<iterations; it++) { /* different bank count values */
- ures.tag_array2 = &tag;
- ures.data_array2 = &data;
- /*
- * find the optimal bank organization
- */
- solve(&ures);
-// output_UCA(&ures);
- bank_count = g_ip->nuca_cache_sz/g_ip->cache_sz;
- cout << "====" << g_ip->cache_sz << "\n";
-
- for (wr=wt_min; wr<=wt_max; wr++) {
-
- for (ro=0; ro<ROUTER_TYPES; ro++)
- {
- flit_width = (int) router_s[ro]->flit_size; //initialize router
- nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
-
- /* calculate router and wire parameters */
-
- double vlength = ures.cache_ht; /* length of the wire (u)*/
- double hlength = ures.cache_len; // u
- /* find delay, area, and power for wires */
- wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
- wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+ // set the lower bound to an appropriate value. this depends on cache associativity
+ if (g_ip->assoc > 2) {
+ i = 2;
+ while (i != g_ip->assoc) {
+ MIN_BANKSIZE *= 2;
+ i *= 2;
+ }
+ }
- hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
- ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
+ iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == Low_swing) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing - 1;
+ }
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
+ if (g_ip->nuca_bank_count != 0) { // simulate just one bank
+ if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
+ g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
+ g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
+ fprintf(stderr, "Incorrect bank count value! Please fix the ",
+ "value in cache.cfg\n");
+ }
+ bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
+ iterations = bank_start + 1;
+ g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
+ }
+ cout << "Simulating various NUCA configurations\n";
+ for (it = bank_start; it < iterations; it++) {
+ /* different bank count values */
+ ures.tag_array2 = &tag;
+ ures.data_array2 = &data;
/*
- * assume a grid like topology and explore for optimal network
- * configuration using different row and column count values.
+ * find the optimal bank organization
*/
- for (c=1; c<=(unsigned int)bank_count; c++) {
- while (bank_count%c != 0) c++;
- r = bank_count/c;
-
- /*
- * to find the avg access latency of a NUCA cache, uncontended
- * access time to each bank from the
- * cache controller is calculated.
- * avg latency =
- * sum of the access latencies to individual banks)/bank
- * count value.
- */
- totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
- k = 1;
- for (i=0; i<r; i++) {
- for (j=0; j<c; j++) {
- /*
- * vertical hops including the
- * first hop from the cache controller
- */
- curr_hop = i + 1;
- curr_hop += j; /* horizontal hops */
- totno_hhops += j;
- totno_vhops += (i+1);
- curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
- j * hor_hop_lat);
-
- tot_lat += curr_acclat;
- totno_hops += curr_hop;
+ solve(&ures);
+// output_UCA(&ures);
+ bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
+ cout << "====" << g_ip->cache_sz << "\n";
+
+ for (wr = wt_min; wr <= wt_max; wr++) {
+
+ for (ro = 0; ro < ROUTER_TYPES; ro++) {
+ flit_width = (int) router_s[ro]->flit_size; //initialize router
+ nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
+
+ /* calculate router and wire parameters */
+
+ double vlength = ures.cache_ht; /* length of the wire (u)*/
+ double hlength = ures.cache_len; // u
+
+ /* find delay, area, and power for wires */
+ wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
+ wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+
+
+ hor_hop_lat =
+ calc_cycles(wire_horizontal[wr]->delay,
+ 1 /(nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+ ver_hop_lat =
+ calc_cycles(wire_vertical[wr]->delay,
+ 1 / (nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+
+ /*
+ * assume a grid like topology and explore for optimal network
+ * configuration using different row and column count values.
+ */
+ for (c = 1; c <= (unsigned int)bank_count; c++) {
+ while (bank_count % c != 0) c++;
+ r = bank_count / c;
+
+ /*
+ * to find the avg access latency of a NUCA cache, uncontended
+ * access time to each bank from the
+ * cache controller is calculated.
+ * avg latency =
+ * sum of the access latencies to individual banks)/bank
+ * count value.
+ */
+ totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
+ k = 1;
+ for (i = 0; i < r; i++) {
+ for (j = 0; j < c; j++) {
+ /*
+ * vertical hops including the
+ * first hop from the cache controller
+ */
+ curr_hop = i + 1;
+ curr_hop += j; /* horizontal hops */
+ totno_hhops += j;
+ totno_vhops += (i + 1);
+ curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
+ j * hor_hop_lat);
+
+ tot_lat += curr_acclat;
+ totno_hops += curr_hop;
+ }
+ }
+ avg_lat = tot_lat / bank_count;
+ avg_hop = totno_hops / bank_count;
+ avg_hhop = totno_hhops / bank_count;
+ avg_vhop = totno_vhops / bank_count;
+
+ /* net access latency */
+ curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
+ avg_hop) +
+ calc_cycles(ures.access_time,
+ 1 /
+ (nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+
+ /* avg access lat of nuca */
+ avg_dyn_power =
+ avg_hop *
+ (router_s[ro]->power.readOp.dynamic) + avg_hhop *
+ (wire_horizontal[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz * 8 + 64) + avg_vhop *
+ (wire_vertical[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
+
+ avg_leakage_power =
+ bank_count * router_s[ro]->power.readOp.leakage +
+ avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay) * flit_width +
+ avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay);
+
+ if (curr_acclat < opt_acclat) {
+ opt_acclat = curr_acclat;
+ opt_tot_lat = tot_lat;
+ opt_avg_lat = avg_lat;
+ opt_totno_hops = totno_hops;
+ opt_avg_hop = avg_hop;
+ opt_rows = r;
+ opt_columns = c;
+ opt_dyn_power = avg_dyn_power;
+ opt_leakage_power = avg_leakage_power;
+ }
+ totno_hops = 0;
+ tot_lat = 0;
+ totno_hhops = 0;
+ totno_vhops = 0;
+ }
+ nuca_list.back()->wire_pda.power.readOp.dynamic =
+ opt_avg_hop * flit_width *
+ (wire_horizontal[wr]->power.readOp.dynamic +
+ wire_vertical[wr]->power.readOp.dynamic);
+ nuca_list.back()->avg_hops = opt_avg_hop;
+ /* network delay/power */
+ nuca_list.back()->h_wire = wire_horizontal[wr];
+ nuca_list.back()->v_wire = wire_vertical[wr];
+ nuca_list.back()->router = router_s[ro];
+ /* bank delay/power */
+
+ nuca_list.back()->bank_pda.delay = ures.access_time;
+ nuca_list.back()->bank_pda.power = ures.power;
+ nuca_list.back()->bank_pda.area.h = ures.cache_ht;
+ nuca_list.back()->bank_pda.area.w = ures.cache_len;
+ nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
+
+ num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
+ 1 /
+ (nuca_list.back()->nuca_pda.cycle_time *
+ .001/*GHz*/));
+ if (num_cyc % 2 != 0) num_cyc++;
+ if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
+
+ if (it < 7) {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ } else {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ }
+ nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
+ nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+
+ /* array organization */
+ nuca_list.back()->bank_count = bank_count;
+ nuca_list.back()->rows = opt_rows;
+ nuca_list.back()->columns = opt_columns;
+ calculate_nuca_area (nuca_list.back());
+
+ minval.update_min_values(nuca_list.back());
+ nuca_list.push_back(new nuca_org_t());
+ opt_acclat = BIGNUM;
+
}
- }
- avg_lat = tot_lat/bank_count;
- avg_hop = totno_hops/bank_count;
- avg_hhop = totno_hhops/bank_count;
- avg_vhop = totno_vhops/bank_count;
-
- /* net access latency */
- curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
- calc_cycles(ures.access_time,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
-
- /* avg access lat of nuca */
- avg_dyn_power =
- avg_hop *
- (router_s[ro]->power.readOp.dynamic) + avg_hhop *
- (wire_horizontal[wr]->power.readOp.dynamic) *
- (g_ip->block_sz*8 + 64) + avg_vhop *
- (wire_vertical[wr]->power.readOp.dynamic) *
- (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic;
-
- avg_leakage_power =
- bank_count * router_s[ro]->power.readOp.leakage +
- avg_hhop * (wire_horizontal[wr]->power.readOp.leakage*
- wire_horizontal[wr]->delay) * flit_width +
- avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
- wire_horizontal[wr]->delay);
-
- if (curr_acclat < opt_acclat) {
- opt_acclat = curr_acclat;
- opt_tot_lat = tot_lat;
- opt_avg_lat = avg_lat;
- opt_totno_hops = totno_hops;
- opt_avg_hop = avg_hop;
- opt_rows = r;
- opt_columns = c;
- opt_dyn_power = avg_dyn_power;
- opt_leakage_power = avg_leakage_power;
- }
- totno_hops = 0;
- tot_lat = 0;
- totno_hhops = 0;
- totno_vhops = 0;
}
- nuca_list.back()->wire_pda.power.readOp.dynamic =
- opt_avg_hop * flit_width *
- (wire_horizontal[wr]->power.readOp.dynamic +
- wire_vertical[wr]->power.readOp.dynamic);
- nuca_list.back()->avg_hops = opt_avg_hop;
- /* network delay/power */
- nuca_list.back()->h_wire = wire_horizontal[wr];
- nuca_list.back()->v_wire = wire_vertical[wr];
- nuca_list.back()->router = router_s[ro];
- /* bank delay/power */
-
- nuca_list.back()->bank_pda.delay = ures.access_time;
- nuca_list.back()->bank_pda.power = ures.power;
- nuca_list.back()->bank_pda.area.h = ures.cache_ht;
- nuca_list.back()->bank_pda.area.w = ures.cache_len;
- nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
-
- num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
- if(num_cyc%2 != 0) num_cyc++;
- if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
-
- if (it < 7) {
- nuca_list.back()->nuca_pda.delay = opt_acclat +
- cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
- nuca_list.back()->contention =
- cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
- }
- else {
- nuca_list.back()->nuca_pda.delay = opt_acclat +
- cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
- nuca_list.back()->contention =
- cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
- }
- nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
- nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+ g_ip->cache_sz /= 2;
+ }
- /* array organization */
- nuca_list.back()->bank_count = bank_count;
- nuca_list.back()->rows = opt_rows;
- nuca_list.back()->columns = opt_columns;
- calculate_nuca_area (nuca_list.back());
+ delete(nuca_list.back());
+ nuca_list.pop_back();
+ opt_n = find_optimal_nuca(&nuca_list, &minval);
+ print_nuca(opt_n);
+ g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
- minval.update_min_values(nuca_list.back());
- nuca_list.push_back(new nuca_org_t());
- opt_acclat = BIGNUM;
+ list<nuca_org_t *>::iterator niter;
+ for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
+ delete *niter;
+ }
+ nuca_list.clear();
- }
+ for (int i = 0; i < ROUTER_TYPES; i++) {
+ delete router_s[i];
}
- g_ip->cache_sz /= 2;
- }
-
- delete(nuca_list.back());
- nuca_list.pop_back();
- opt_n = find_optimal_nuca(&nuca_list, &minval);
- print_nuca(opt_n);
- g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count;
-
- list<nuca_org_t *>::iterator niter;
- for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
- {
- delete *niter;
- }
- nuca_list.clear();
-
- for(int i=0; i < ROUTER_TYPES; i++)
- {
- delete router_s[i];
- }
- g_ip->display_ip();
- // g_ip->force_cache_config = true;
- // g_ip->ndwl = 8;
- // g_ip->ndbl = 16;
- // g_ip->nspd = 4;
- // g_ip->ndcm = 1;
- // g_ip->ndsam1 = 8;
- // g_ip->ndsam2 = 32;
+ g_ip->display_ip();
+ // g_ip->force_cache_config = true;
+ // g_ip->ndwl = 8;
+ // g_ip->ndbl = 16;
+ // g_ip->nspd = 4;
+ // g_ip->ndcm = 1;
+ // g_ip->ndsam1 = 8;
+ // g_ip->ndsam2 = 32;
}
- void
-Nuca::print_nuca (nuca_org_t *fr)
-{
- printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
- "----------\n\n");
- printf("Optimal number of banks - %d\n", fr->bank_count);
- printf("Grid organization rows x columns - %d x %d\n",
- fr->rows, fr->columns);
- printf("Network frequency - %g GHz\n",
- (1/fr->nuca_pda.cycle_time)*1e3);
- printf("Cache dimension (mm x mm) - %g x %g\n",
- fr->nuca_pda.area.h,
- fr->nuca_pda.area.w);
-
- fr->router->print_router();
-
- printf("\n\nWire stats:\n");
- if (fr->h_wire->wt == Global) {
- printf("\tWire type - Full swing global wires with least "
- "possible delay\n");
- }
- else if (fr->h_wire->wt == Global_5) {
- printf("\tWire type - Full swing global wires with "
- "5%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_10) {
- printf("\tWire type - Full swing global wires with "
- "10%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_20) {
- printf("\tWire type - Full swing global wires with "
- "20%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_30) {
- printf("\tWire type - Full swing global wires with "
- "30%% delay penalty\n");
- }
- else if(fr->h_wire->wt == Low_swing) {
- printf("\tWire type - Low swing wires\n");
- }
-
- printf("\tHorizontal link delay - %g (ns)\n",
- fr->h_wire->delay*1e9);
- printf("\tVertical link delay - %g (ns)\n",
- fr->v_wire->delay*1e9);
- printf("\tDelay/length - %g (ns/mm)\n",
- fr->h_wire->delay*1e9/fr->bank_pda.area.w);
- printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
- "\t -leakage %g (nW)\n\n",
- fr->h_wire->power.readOp.dynamic*1e9,
- fr->h_wire->power.readOp.leakage*1e9);
- printf("\tVertical link energy -dynamic/access %g (nJ)\n"
- "\t -leakage %g (nW)\n\n",
- fr->v_wire->power.readOp.dynamic*1e9,
- fr->v_wire->power.readOp.leakage*1e9);
- printf("\n\n");
- fr->v_wire->print_wire();
- printf("\n\nBank stats:\n");
+void
+Nuca::print_nuca (nuca_org_t *fr) {
+ printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
+ "----------\n\n");
+ printf("Optimal number of banks - %d\n", fr->bank_count);
+ printf("Grid organization rows x columns - %d x %d\n",
+ fr->rows, fr->columns);
+ printf("Network frequency - %g GHz\n",
+ (1 / fr->nuca_pda.cycle_time)*1e3);
+ printf("Cache dimension (mm x mm) - %g x %g\n",
+ fr->nuca_pda.area.h,
+ fr->nuca_pda.area.w);
+
+ fr->router->print_router();
+
+ printf("\n\nWire stats:\n");
+ if (fr->h_wire->wt == Global) {
+ printf("\tWire type - Full swing global wires with least "
+ "possible delay\n");
+ } else if (fr->h_wire->wt == Global_5) {
+ printf("\tWire type - Full swing global wires with "
+ "5%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_10) {
+ printf("\tWire type - Full swing global wires with "
+ "10%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_20) {
+ printf("\tWire type - Full swing global wires with "
+ "20%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_30) {
+ printf("\tWire type - Full swing global wires with "
+ "30%% delay penalty\n");
+ } else if (fr->h_wire->wt == Low_swing) {
+ printf("\tWire type - Low swing wires\n");
+ }
+
+ printf("\tHorizontal link delay - %g (ns)\n",
+ fr->h_wire->delay*1e9);
+ printf("\tVertical link delay - %g (ns)\n",
+ fr->v_wire->delay*1e9);
+ printf("\tDelay/length - %g (ns/mm)\n",
+ fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
+ printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->h_wire->power.readOp.dynamic*1e9,
+ fr->h_wire->power.readOp.leakage*1e9);
+ printf("\tVertical link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->v_wire->power.readOp.dynamic*1e9,
+ fr->v_wire->power.readOp.leakage*1e9);
+ printf("\n\n");
+ fr->v_wire->print_wire();
+ printf("\n\nBank stats:\n");
}
- nuca_org_t *
-Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- nuca_org_t *res = NULL;
- float d, a, dp, lp, c;
- int v;
- dp = g_ip->dynamic_power_wt_nuca;
- lp = g_ip->leakage_power_wt_nuca;
- a = g_ip->area_wt_nuca;
- d = g_ip->delay_wt_nuca;
- c = g_ip->cycle_time_wt_nuca;
-
- list<nuca_org_t *>::iterator niter;
-
-
- for (niter = n->begin(); niter != n->end(); niter++) {
- fprintf(stderr, "\n-----------------------------"
- "---------------\n");
-
-
- printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
- "bank_dpower = %g \tleak = %g \tcycle = %g\n",
- (*niter)->bank_count,
- (*niter)->nuca_pda.delay,
- (*niter)->nuca_pda.power.readOp.dynamic,
- (*niter)->h_wire->wt,
- (*niter)->bank_pda.power.readOp.dynamic,
- (*niter)->nuca_pda.power.readOp.leakage,
- (*niter)->nuca_pda.cycle_time);
-
-
- if (g_ip->ed == 1) {
- cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
- }
- }
- else if (g_ip->ed == 2) {
- cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
- }
- }
- else {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- v = check_nuca_org((*niter), minval);
- if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v) {
- cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) +
- c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) +
- dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
- lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
- a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
- fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
+nuca_org_t *
+Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
+ double cost = 0;
+ double min_cost = BIGNUM;
+ nuca_org_t *res = NULL;
+ float d, a, dp, lp, c;
+ int v;
+ dp = g_ip->dynamic_power_wt_nuca;
+ lp = g_ip->leakage_power_wt_nuca;
+ a = g_ip->area_wt_nuca;
+ d = g_ip->delay_wt_nuca;
+ c = g_ip->cycle_time_wt_nuca;
+
+ list<nuca_org_t *>::iterator niter;
+
+
+ for (niter = n->begin(); niter != n->end(); niter++) {
+ fprintf(stderr, "\n-----------------------------"
+ "---------------\n");
+
+
+ printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
+ "bank_dpower = %g \tleak = %g \tcycle = %g\n",
+ (*niter)->bank_count,
+ (*niter)->nuca_pda.delay,
+ (*niter)->nuca_pda.power.readOp.dynamic,
+ (*niter)->h_wire->wt,
+ (*niter)->bank_pda.power.readOp.dynamic,
+ (*niter)->nuca_pda.power.readOp.leakage,
+ (*niter)->nuca_pda.cycle_time);
+
+
+ if (g_ip->ed == 1) {
+ cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else if (g_ip->ed == 2) {
+ cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ v = check_nuca_org((*niter), minval);
+ if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
+
+ if (v) {
+ cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) +
+ c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
+ dp * ((*niter)->nuca_pda.power.readOp.dynamic /
+ minval->min_dyn) +
+ lp * ((*niter)->nuca_pda.power.readOp.leakage /
+ minval->min_leakage) +
+ a * ((*niter)->nuca_pda.area.get_area() /
+ minval->min_area));
+ fprintf(stderr, "cost = %g\n", cost);
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else {
+ niter = n->erase(niter);
+ if (niter != n->begin())
+ niter --;
+ }
}
- }
- else {
- niter = n->erase(niter);
- if (niter !=n->begin())
- niter --;
- }
}
- }
- return res;
+ return res;
}
- int
-Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
-{
- if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev_nuca) {
- return 0;
- }
- return 1;
+int
+Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
+ if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
+ g_ip->delay_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
+ minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage)*100 >
+ g_ip->leakage_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
+ g_ip->cycle_time_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
+ 100 >
+ g_ip->area_dev_nuca) {
+ return 0;
+ }
+ return 1;
}
- void
-Nuca::calculate_nuca_area (nuca_org_t *nuca)
-{
- nuca->nuca_pda.area.h=
- nuca->rows * ((nuca->h_wire->wire_width +
- nuca->h_wire->wire_spacing)
- * nuca->router->flit_size +
- nuca->bank_pda.area.h);
-
- nuca->nuca_pda.area.w =
- nuca->columns * ((nuca->v_wire->wire_width +
- nuca->v_wire->wire_spacing)
- * nuca->router->flit_size +
- nuca->bank_pda.area.w);
+void
+Nuca::calculate_nuca_area (nuca_org_t *nuca) {
+ nuca->nuca_pda.area.h =
+ nuca->rows * ((nuca->h_wire->wire_width +
+ nuca->h_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.h);
+
+ nuca->nuca_pda.area.w =
+ nuca->columns * ((nuca->v_wire->wire_width +
+ nuca->v_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.w);
}