ext: add McPAT source

this patch adds the source for mcpat, a power, area, and timing modeling framework.
author: Anthony Gutierrez <atgutier@umich.edu> 2014-04-01 12:44:30 -0400
committer: Anthony Gutierrez <atgutier@umich.edu> 2014-04-01 12:44:30 -0400
commit: e553a7bfa7f0eb47b78632cd63e6e1e814025c9a (patch)
tree: f69a8e3e0ed55b95bf276b6f857793b9ef7b6490 /ext/mcpat/cacti/basic_circuit.cc
parent: 8d665ee166bf5476bb9b73a0016843ff9953c266 (diff)
download: gem5-e553a7bfa7f0eb47b78632cd63e6e1e814025c9a.tar.xz
1 files changed, 829 insertions, 0 deletions
diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc
new file mode 100644
index 000000000..6efd5dd27
--- /dev/null
+++ b/ext/mcpat/cacti/basic_circuit.cc
@@ -0,0 +1,829 @@
+/*****************************************************************************
+ *                                McPAT/CACTI
+ *                      SOFTWARE LICENSE AGREEMENT
+ *            Copyright 2012 Hewlett-Packard Development Company, L.P.
+ *                          All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+#include "basic_circuit.h"
+#include "parameter.h"
+
+uint32_t _log2(uint64_t num)
+{
+  uint32_t log2 = 0;
+
+  if (num == 0)
+  {
+    std::cerr << "log0?" << std::endl;
+    exit(1);
+  }
+
+  while (num > 1)
+  {
+    num = (num >> 1);
+    log2++;
+  }
+
+  return log2;
+}
+
+
+bool is_pow2(int64_t val)
+{
+  if (val <= 0)
+  {
+    return false;
+  }
+  else if (val == 1)
+  {
+    return true;
+  }
+  else
+  {
+    return (_log2(val) != _log2(val-1));
+  }
+}
+
+
+int powers (int base, int n)
+{
+  int i, p;
+
+  p = 1;
+  for (i = 1; i <= n; ++i)
+    p *= base;
+  return p;
+}
+
+/*----------------------------------------------------------------------*/
+
+double logtwo (double x)
+{
+  assert(x > 0);
+  return ((double) (log (x) / log (2.0)));
+}
+
+/*----------------------------------------------------------------------*/
+
+
+double gate_C(
+    double width,
+    double wirelength,
+    bool   _is_dram,
+    bool   _is_cell,
+    bool   _is_wl_tr)
+{
+  const TechnologyParameter::DeviceType * dt;
+
+  if (_is_dram && _is_cell)
+  {
+    dt = &g_tp.dram_acc;   //DRAM cell access transistor
+  }
+  else if (_is_dram && _is_wl_tr)
+  {
+    dt = &g_tp.dram_wl;    //DRAM wordline transistor
+  }
+  else if (!_is_dram && _is_cell)
+  {
+    dt = &g_tp.sram_cell;  // SRAM cell access transistor
+  }
+  else
+  {
+    dt = &g_tp.peri_global;
+  }
+
+  return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+}
+
+
+// returns gate capacitance in Farads
+// actually this function is the same as gate_C() now
+double gate_C_pass(
+    double width,       // gate width in um (length is Lphy_periph_global)
+    double wirelength,  // poly wire length going to gate in lambda
+    bool   _is_dram,
+    bool   _is_cell,
+    bool   _is_wl_tr)
+{
+  // v5.0
+  const TechnologyParameter::DeviceType * dt;
+
+  if ((_is_dram) && (_is_cell))
+  {
+    dt = &g_tp.dram_acc;   //DRAM cell access transistor
+  }
+  else if ((_is_dram) && (_is_wl_tr))
+  {
+    dt = &g_tp.dram_wl;    //DRAM wordline transistor
+  }
+  else if ((!_is_dram) && _is_cell)
+  {
+    dt = &g_tp.sram_cell;  // SRAM cell access transistor
+  }
+  else
+  {
+    dt = &g_tp.peri_global;
+  }
+
+  return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+}
+
+
+
+double drain_C_(
+    double width,
+    int nchannel,
+    int stack,
+    int next_arg_thresh_folding_width_or_height_cell,
+    double fold_dimension,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  double w_folded_tr;
+  const  TechnologyParameter::DeviceType * dt;
+
+  if ((_is_dram) && (_is_cell))
+  {
+    dt = &g_tp.dram_acc;   // DRAM cell access transistor
+  }
+  else if ((_is_dram) && (_is_wl_tr))
+  {
+    dt = &g_tp.dram_wl;    // DRAM wordline transistor
+  }
+  else if ((!_is_dram) && _is_cell)
+  {
+    dt = &g_tp.sram_cell;  // SRAM cell access transistor
+  }
+  else
+  {
+    dt = &g_tp.peri_global;
+  }
+
+  double c_junc_area = dt->C_junc;
+  double c_junc_sidewall = dt->C_junc_sidewall;
+  double c_fringe    = 2*dt->C_fringe;
+  double c_overlap   = 2*dt->C_overlap;
+  double drain_C_metal_connecting_folded_tr = 0;
+
+  // determine the width of the transistor after folding (if it is getting folded)
+  if (next_arg_thresh_folding_width_or_height_cell == 0)
+  { // interpret fold_dimension as the the folding width threshold
+    // i.e. the value of transistor width above which the transistor gets folded
+    w_folded_tr = fold_dimension;
+  }
+  else
+  { // interpret fold_dimension as the height of the cell that this transistor is part of.
+    double h_tr_region  = fold_dimension - 2 * g_tp.HPOWERRAIL;
+    // TODO : w_folded_tr must come from Component::compute_gate_area()
+    double ratio_p_to_n = 2.0 / (2.0 + 1.0);
+    if (nchannel)
+    {
+      w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+    }
+    else
+    {
+      w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+    }
+  }
+  int num_folded_tr = (int) (ceil(width / w_folded_tr));
+
+  if (num_folded_tr < 2)
+  {
+    w_folded_tr = width;
+  }
+
+  double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +  // only for drain
+                         (stack - 1) * g_tp.spacing_poly_to_poly;
+  double drain_h_for_sidewall = w_folded_tr;
+  double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
+  if (num_folded_tr > 1)
+  {
+    total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
+                     (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
+
+    if (num_folded_tr%2 == 0)
+    {
+      drain_h_for_sidewall = 0;
+    }
+    total_drain_height_for_cap_wrt_gate *= num_folded_tr;
+    drain_C_metal_connecting_folded_tr   = g_tp.wire_local.C_per_um * total_drain_w;
+  }
+
+  double drain_C_area     = c_junc_area * total_drain_w * w_folded_tr;
+  double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
+  double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
+
+  return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
+}
+
+
+double tr_R_on(
+    double width,
+    int nchannel,
+    int stack,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  const TechnologyParameter::DeviceType * dt;
+
+  if ((_is_dram) && (_is_cell))
+  {
+    dt = &g_tp.dram_acc;   //DRAM cell access transistor
+  }
+  else if ((_is_dram) && (_is_wl_tr))
+  {
+    dt = &g_tp.dram_wl;    //DRAM wordline transistor
+  }
+  else if ((!_is_dram) && _is_cell)
+  {
+    dt = &g_tp.sram_cell;  // SRAM cell access transistor
+  }
+  else
+  {
+    dt = &g_tp.peri_global;
+  }
+
+  double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+  return (stack * restrans / width);
+}
+
+
+/* This routine operates in reverse: given a resistance, it finds
+ * the transistor width that would have this R.  It is used in the
+ * data wordline to estimate the wordline driver size. */
+
+// returns width in um
+double R_to_w(
+    double res,
+    int   nchannel,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  const TechnologyParameter::DeviceType * dt;
+
+  if ((_is_dram) && (_is_cell))
+  {
+    dt = &g_tp.dram_acc;   //DRAM cell access transistor
+  }
+  else if ((_is_dram) && (_is_wl_tr))
+  {
+    dt = &g_tp.dram_wl;    //DRAM wordline transistor
+  }
+  else if ((!_is_dram) && (_is_cell))
+  {
+    dt = &g_tp.sram_cell;  // SRAM cell access transistor
+  }
+  else
+  {
+    dt = &g_tp.peri_global;
+  }
+
+  double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+  return (restrans / res);
+}
+
+
+double pmos_to_nmos_sz_ratio(
+    bool _is_dram,
+    bool _is_wl_tr)
+{
+  double p_to_n_sizing_ratio;
+  if ((_is_dram) && (_is_wl_tr))
+  { //DRAM wordline transistor
+    p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
+  }
+  else
+  { //DRAM or SRAM all other transistors
+    p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
+  }
+  return p_to_n_sizing_ratio;
+}
+
+
+// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
+double horowitz(
+    double inputramptime, // input rise time
+    double tf,            // time constant of gate
+    double vs1,           // threshold voltage
+    double vs2,           // threshold voltage
+    int    rise)          // whether input rises or fall
+{
+  if (inputramptime == 0 && vs1 == vs2)
+  {
+    return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
+  }
+  double a, b, td;
+
+  a = inputramptime / tf;
+  if (rise == RISE)
+  {
+    b = 0.5;
+    td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
+  }
+  else
+  {
+    b = 0.4;
+    td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
+  }
+  return (td);
+}
+
+double cmos_Ileak(
+    double nWidth,
+    double pWidth,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  TechnologyParameter::DeviceType * dt;
+
+  if ((!_is_dram)&&(_is_cell))
+  { //SRAM cell access transistor
+    dt = &(g_tp.sram_cell);
+  }
+  else if ((_is_dram)&&(_is_wl_tr))
+  { //DRAM wordline transistor
+    dt = &(g_tp.dram_wl);
+  }
+  else
+  { //DRAM or SRAM all other transistors
+    dt = &(g_tp.peri_global);
+  }
+  return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
+}
+
+
+double simplified_nmos_leakage(
+    double nwidth,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  TechnologyParameter::DeviceType * dt;
+
+  if ((!_is_dram)&&(_is_cell))
+  { //SRAM cell access transistor
+    dt = &(g_tp.sram_cell);
+  }
+  else if ((_is_dram)&&(_is_wl_tr))
+  { //DRAM wordline transistor
+    dt = &(g_tp.dram_wl);
+  }
+  else
+  { //DRAM or SRAM all other transistors
+    dt = &(g_tp.peri_global);
+  }
+  return nwidth * dt->I_off_n;
+}
+
+int factorial(int n, int m)
+{
+        int fa = m, i;
+        for (i=m+1; i<=n; i++)
+                fa *=i;
+        return fa;
+}
+
+int combination(int n, int m)
+{
+  int ret;
+  ret = factorial(n, m+1) / factorial(n - m);
+  return ret;
+}
+
+double simplified_pmos_leakage(
+    double pwidth,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  TechnologyParameter::DeviceType * dt;
+
+  if ((!_is_dram)&&(_is_cell))
+  { //SRAM cell access transistor
+    dt = &(g_tp.sram_cell);
+  }
+  else if ((_is_dram)&&(_is_wl_tr))
+  { //DRAM wordline transistor
+    dt = &(g_tp.dram_wl);
+  }
+  else
+  { //DRAM or SRAM all other transistors
+    dt = &(g_tp.peri_global);
+  }
+  return pwidth * dt->I_off_p;
+}
+
+double cmos_Ig_n(
+    double nWidth,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  TechnologyParameter::DeviceType * dt;
+
+  if ((!_is_dram)&&(_is_cell))
+  { //SRAM cell access transistor
+    dt = &(g_tp.sram_cell);
+  }
+  else if ((_is_dram)&&(_is_wl_tr))
+  { //DRAM wordline transistor
+    dt = &(g_tp.dram_wl);
+  }
+  else
+  { //DRAM or SRAM all other transistors
+    dt = &(g_tp.peri_global);
+  }
+  return nWidth*dt->I_g_on_n;
+}
+
+double cmos_Ig_p(
+    double pWidth,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr)
+{
+  TechnologyParameter::DeviceType * dt;
+
+  if ((!_is_dram)&&(_is_cell))
+  { //SRAM cell access transistor
+    dt = &(g_tp.sram_cell);
+  }
+  else if ((_is_dram)&&(_is_wl_tr))
+  { //DRAM wordline transistor
+    dt = &(g_tp.dram_wl);
+  }
+  else
+  { //DRAM or SRAM all other transistors
+    dt = &(g_tp.peri_global);
+  }
+  return pWidth*dt->I_g_on_p;
+}
+
+double cmos_Isub_leakage(
+    double nWidth,
+    double pWidth,
+    int    fanin,
+    enum Gate_type g_type,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr,
+    enum Half_net_topology topo)
+{
+        assert (fanin>=1);
+        double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
+        double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
+    double Isub=0;
+    int    num_states;
+    int    num_off_tx;
+
+    num_states = int(pow(2.0, fanin));
+
+    switch (g_type)
+    {
+    case nmos:
+        if (fanin==1)
+        {
+                Isub = nmos_leak/num_states;
+        }
+        else
+        {
+                if (topo==parallel)
+                {
+                        Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
+                }
+                else
+                {
+                        for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
+                        {
+                                //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+                                Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+                        }
+                        Isub /=num_states;
+                }
+
+        }
+        break;
+    case pmos:
+        if (fanin==1)
+        {
+                Isub = pmos_leak/num_states;
+        }
+        else
+        {
+                if (topo==parallel)
+                {
+                        Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
+                }
+                else
+                {
+                        for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
+                        {
+                                //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+                                Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+                        }
+                        Isub /=num_states;
+                }
+
+        }
+        break;
+    case inv:
+        Isub = (nmos_leak + pmos_leak)/2;
+        break;
+    case nand:
+        Isub += fanin*pmos_leak;//the pullup network
+        for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
+        {
+                //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+            Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+        }
+        Isub /=num_states;
+        break;
+    case nor:
+        for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
+        {
+                //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+                Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+        }
+        Isub += fanin*nmos_leak;//the pulldown network
+        Isub /=num_states;
+        break;
+    case tri:
+        Isub += (nmos_leak + pmos_leak)/2;//enabled
+        Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
+        Isub /=2;
+        break;
+    case tg:
+        Isub = (nmos_leak + pmos_leak)/2;
+        break;
+    default:
+        assert(0);
+        break;
+          }
+
+    return Isub;
+}
+
+
+double cmos_Ig_leakage(
+    double nWidth,
+    double pWidth,
+    int    fanin,
+    enum Gate_type g_type,
+    bool _is_dram,
+    bool _is_cell,
+    bool _is_wl_tr,
+    enum Half_net_topology topo)
+{
+        assert (fanin>=1);
+                double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
+                double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
+            double Ig_on=0;
+            int    num_states;
+            int    num_on_tx;
+
+            num_states = int(pow(2.0, fanin));
+
+            switch (g_type)
+            {
+            case nmos:
+                if (fanin==1)
+                {
+                        Ig_on = nmos_leak/num_states;
+                }
+                else
+                {
+                        if (topo==parallel)
+                        {
+                        for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
+                        {
+                                Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+                        }
+                        }
+                        else
+                        {
+                                Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+                            //num_on_tx is the number of on tx
+                                for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
+                                {
+                                        Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+                                }
+                                Ig_on /=num_states;
+                        }
+                }
+                break;
+            case pmos:
+                if (fanin==1)
+                {
+                        Ig_on = pmos_leak/num_states;
+                }
+                else
+                {
+                        if (topo==parallel)
+                    {
+                  for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
+                  {
+                          Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+                  }
+                    }
+                    else
+                    {
+                          Ig_on += pmos_leak * fanin;//pull down network when all TXs are on.
+                      //num_on_tx is the number of on tx
+                          for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
+                          {
+                                  Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+                          }
+                          Ig_on /=num_states;
+                    }
+                }
+                break;
+
+            case inv:
+                Ig_on = (nmos_leak + pmos_leak)/2;
+                break;
+            case nand:
+                //pull up network
+                for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
+                {
+                        Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+                }
+
+                //pull down network
+                Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+                //num_on_tx is the number of on tx
+                for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
+                {
+                        Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+                }
+                Ig_on /=num_states;
+                break;
+            case nor:
+                // num_on_tx is the number of on tx in pull up network
+                Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
+                for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)
+                {
+                        Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;
+
+                }
+                //pull down network
+                for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
+                {
+                        Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+                }
+                Ig_on /=num_states;
+                break;
+            case tri:
+                Ig_on += (2*nmos_leak + 2*pmos_leak)/2;//enabled
+                Ig_on += (nmos_leak + pmos_leak)/2; //disabled upper bound of leakage power
+                Ig_on /=2;
+                break;
+            case tg:
+                Ig_on = (nmos_leak + pmos_leak)/2;
+                break;
+            default:
+                assert(0);
+                break;
+                  }
+
+            return Ig_on;
+}
+
+double shortcircuit_simple(
+    double vt,
+    double velocity_index,
+    double c_in,
+    double c_out,
+    double w_nmos,
+    double w_pmos,
+    double i_on_n,
+    double i_on_p,
+    double i_on_n_in,
+    double i_on_p_in,
+    double vdd)
+{
+
+        double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
+        double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+
+        fo_n	= i_on_n/i_on_n_in;
+        fo_p	= i_on_p/i_on_p_in;
+        fanout	= c_out/c_in;
+        beta_ratio = i_on_p/i_on_n;
+        vt_to_vdd_ratio = vt/vdd;
+
+        //p_short_circuit_discharge_low 	= 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+        p_short_circuit_discharge_low 	= 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+        p_short_circuit_charge_low 		= 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
+//	double t1, t2, t3, t4, t5;
+//	t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
+//	t2=pow(velocity_index,2.0);
+//	t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
+//	t4=t1/t2/t3;
+//	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
+
+        p_short_circuit_discharge_high 	= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+        p_short_circuit_charge_high 	= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+
+//	t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
+//	t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+//	t3=t1/t2;
+//	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
+//	p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
+//	p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
+
+        p_short_circuit_discharge = p_short_circuit_discharge_low;
+        p_short_circuit_charge = p_short_circuit_charge_low;
+        p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
+
+  return (p_short_circuit);
+}
+
+double shortcircuit(
+    double vt,
+    double velocity_index,
+    double c_in,
+    double c_out,
+    double w_nmos,
+    double w_pmos,
+    double i_on_n,
+    double i_on_p,
+    double i_on_n_in,
+    double i_on_p_in,
+    double vdd)
+{
+
+        double p_short_circuit=0, p_short_circuit_discharge;//, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
+        double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+        double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
+
+        fo_n		= i_on_n/i_on_n_in;
+        fo_p		= i_on_p/i_on_p_in;
+        fanout		= 1;
+        beta_ratio 	= i_on_p/i_on_n;
+        vt_to_vdd_ratio = vt/vdd;
+        e 			= 	2.71828;
+        f_alpha		=	1/(velocity_index+2) -velocity_index/(2*(velocity_index+3)) +velocity_index/(velocity_index+4)*(velocity_index/2-1);
+        k_v			=	0.9/0.8+(vdd-vt)/0.8*log(10*(vdd-vt)/e);
+        g_v_alpha	=	(velocity_index + 1)*pow((1-velocity_index),velocity_index)*pow((1-velocity_index),velocity_index/2)/f_alpha/pow((1-velocity_index-velocity_index),(velocity_index/2+velocity_index+2));
+        h_v_alpha	=   pow(2, velocity_index)*(velocity_index+1)*pow((1-velocity_index),velocity_index)/pow((1-velocity_index-velocity_index),(velocity_index+1));
+
+        //p_short_circuit_discharge_low 	= 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+//	p_short_circuit_discharge_low 	= 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+//	p_short_circuit_charge_low 		= 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
+//	double t1, t2, t3, t4, t5;
+//	t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
+//	t2=pow(velocity_index,2.0);
+//	t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
+//	t4=t1/t2/t3;
+//
+//	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
+//
+//
+//	p_short_circuit_discharge_high 	= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+//	p_short_circuit_charge_high 	= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+//
+//	p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
+//	p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
+//
+//	p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
+//
+//	p_short_circuit = p_short_circuit_discharge;
+
+        p_short_circuit_discharge = k_v*vdd*vdd*c_in*fo_p*fo_p/((vdd-vt)*g_v_alpha*fanout*beta_ratio/2/k_v + h_v_alpha*fo_p);
+  return (p_short_circuit);
+}
author	Anthony Gutierrez <atgutier@umich.edu>	2014-04-01 12:44:30 -0400
committer	Anthony Gutierrez <atgutier@umich.edu>	2014-04-01 12:44:30 -0400
commit	e553a7bfa7f0eb47b78632cd63e6e1e814025c9a (patch)
tree	f69a8e3e0ed55b95bf276b6f857793b9ef7b6490 /ext/mcpat/cacti/basic_circuit.cc
parent	8d665ee166bf5476bb9b73a0016843ff9953c266 (diff)
download	gem5-e553a7bfa7f0eb47b78632cd63e6e1e814025c9a.tar.xz