diff options
Diffstat (limited to 'ext/mcpat/cacti/crossbar.cc')
-rw-r--r-- | ext/mcpat/cacti/crossbar.cc | 220 |
1 files changed, 119 insertions, 101 deletions
diff --git a/ext/mcpat/cacti/crossbar.cc b/ext/mcpat/cacti/crossbar.cc index a3d8532d5..ef2a373d6 100644 --- a/ext/mcpat/cacti/crossbar.cc +++ b/ext/mcpat/cacti/crossbar.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,123 +40,140 @@ Crossbar::Crossbar( double n_out_, double flit_size_, TechnologyParameter::DeviceType *dt - ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - Vdd = dt->Vdd; - CB_ADJ = 1; +): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + Vdd = dt->Vdd; + CB_ADJ = 1; } -Crossbar::~Crossbar(){} +Crossbar::~Crossbar() {} -double Crossbar::output_buffer() -{ +double Crossbar::output_buffer() { - //Wire winit(4, 4); - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; - Wire w1(g_ip->wt, l_eff); - //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; - double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ); - double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; - // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor - TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - TriS2 = s1; //driver transistor + //Wire winit(4, 4); + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; + Wire w1(g_ip->wt, l_eff); + //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; + double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ? + l_eff * ADJ / w1.repeater_spacing : ADJ); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor + TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + TriS2 = s1; //driver transistor - if (TriS1 < 1) - TriS1 = 1; + if (TriS1 < 1) + TriS1 = 1; - double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) + - gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0); + double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) + + gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0); // input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + // gate_C(TriS2*g_tp.min_w_nmos_, 0)+ // drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + // gate_C(TriS2*min_w_pmos, 0); - tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(TriS2*g_tp.min_w_nmos_, 0)+ - drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(TriS2*min_w_pmos, 0); - double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); - double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0); - - tri_inp_cap = input_cap; - tri_out_cap = output_cap; - tri_ctr_cap = ctr_cap; - return input_cap + output_cap + ctr_cap; + tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(TriS2 * g_tp.min_w_nmos_, 0) + + drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(TriS2 * min_w_pmos, 0); + double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1, + g_tp.cell_h_def) + + drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); + double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0); + + tri_inp_cap = input_cap; + tri_out_cap = output_cap; + tri_ctr_cap = ctr_cap; + return input_cap + output_cap + ctr_cap; } -void Crossbar::compute_power() -{ - - Wire winit(4, 4); - double tri_cap = output_buffer(); - assert(tri_cap > 0); - //area of a tristate logic - double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def); - g_area *= 2; // to model area of output transistors - g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def); - g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def); - double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def); - // effective no. of tristate buffers that need to be laid side by side - int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch)); - double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out); - Wire w1(g_ip->wt, wire_len); - - area.w = wire_len; - area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ; - Wire w2(g_ip->wt, area.h); - - double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp); - if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb; - - if (aspect_ratio_cb < ASPECT_THRESHOLD) { - if (n_out > 2 && n_inp > 2) { - CB_ADJ+=0.2; - //cout << "CB ADJ " << CB_ADJ << endl; - if (CB_ADJ < 4) { - this->compute_power(); - } +void Crossbar::compute_power() { + + Wire winit(4, 4); + double tri_cap = output_buffer(); + assert(tri_cap > 0); + //area of a tristate logic + double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_, + TriS2 * min_w_pmos, g_tp.cell_h_def); + g_area *= 2; // to model area of output transistors + g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_, + TriS1 * min_w_pmos, g_tp.cell_h_def); + g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_, + TriS1 * 2 * min_w_pmos, g_tp.cell_h_def); + double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def); + // effective no. of tristate buffers that need to be laid side by side + int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch)); + double wire_len = MAX(width * ntri * n_out, + flit_size * g_tp.wire_outside_mat.pitch * n_out); + Wire w1(g_ip->wt, wire_len); + + area.w = wire_len; + area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ; + Wire w2(g_ip->wt, area.h); + + double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp); + if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb; + + if (aspect_ratio_cb < ASPECT_THRESHOLD) { + if (n_out > 2 && n_inp > 2) { + CB_ADJ += 0.2; + //cout << "CB ADJ " << CB_ADJ << endl; + if (CB_ADJ < 4) { + this->compute_power(); + } + } } - } - - - - power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size; - power.readOp.leakage = n_inp * n_out * flit_size * ( - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.leakage + w2.power.readOp.leakage); - power.readOp.gate_leakage = n_inp * n_out * flit_size * ( - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); - - // delay calculation - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; - Wire wdriver(g_ip->wt, l_eff); - double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1); - double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap; - delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - - Wire wreset(); + + + + power.readOp.dynamic = + (w1.power.readOp.dynamic + w2.power.readOp.dynamic + + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + + tri_int_cap) * Vdd * Vdd) * flit_size; + power.readOp.leakage = n_inp * n_out * flit_size * ( + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, + 1, inv) * Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nand) * Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nor) * Vdd + + w1.power.readOp.leakage + w2.power.readOp.leakage); + power.readOp.gate_leakage = n_inp * n_out * flit_size * ( + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, + 1, inv) * Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nand) * Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nor) * Vdd + + w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); + + // delay calculation + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; + Wire wdriver(g_ip->wt, l_eff); + double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) + + tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1); + double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out * + tri_inp_cap + n_inp * tri_out_cap; + delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth / + deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE); + + Wire wreset(); } -void Crossbar::print_crossbar() -{ - cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; - cout << "Flit size : " << flit_size << " bits" << endl; - cout << "Width : " << area.w << " u" << endl; - cout << "Height : " << area.h << " u" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; - cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl; - cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; +void Crossbar::print_crossbar() { + cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Width : " << area.w << " u" << endl; + cout << "Height : " << area.h << " u" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * + MIN(n_inp, n_out) << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" + << endl; + cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 + << " (mW)" << endl; + cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; } |