1 files changed, 434 insertions, 340 deletions
diff --git a/ext/mcpat/iocontrollers.cc b/ext/mcpat/iocontrollers.cc
index 70b0f2dcb..4a175d841 100644
--- a/ext/mcpat/iocontrollers.cc
+++ b/ext/mcpat/iocontrollers.cc
@@ -2,6 +2,7 @@
  *                                McPAT
  *                      SOFTWARE LICENSE AGREEMENT
  *            Copyright 2012 Hewlett-Packard Development Company, L.P.
+ *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
  *                          All Rights Reserved
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  ***************************************************************************/
 #include <algorithm>
@@ -34,14 +35,12 @@
 #include <iostream>
 #include <string>
 
-#include "XML_Parse.h"
 #include "basic_circuit.h"
-#include "basic_components.h"
+#include "common.h"
 #include "const.h"
 #include "io.h"
 #include "iocontrollers.h"
 #include "logic.h"
-#include "parameter.h"
 
 /*
 SUN Niagara 2 I/O power analysis:
@@ -69,378 +68,473 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F
  *
  */
 
-NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_)
-:XML(XML_interface),
- interface_ip(*interface_ip_)
- {
-          local_result = init_interface(&interface_ip);
-
-          double frontend_area, phy_area, mac_area, SerDer_area;
-      double frontend_dyn, mac_dyn, SerDer_dyn;
-      double frontend_gates, mac_gates, SerDer_gates;
-          double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
-          double NMOS_sizing, PMOS_sizing;
-
-          set_niu_param();
-
-          if (niup.type == 0) //high performance NIU
-          {
-                  //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm.
-                  mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
-                  //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS
-                  frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
-                  //SerDer is very hard to scale
-                  SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
-                  phy_area = frontend_area + SerDer_area;
-                  //total area
-                  area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
-                  //Power
-                  //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
-                  mac_dyn      = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
-                  //Cadence ChipEstimate using 65nm soft IP;
-                  frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
-                  //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
-                  //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
-                  SerDer_dyn   = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
-                  SerDer_dyn   /= niup.clockRate;//covert to energy per clock cycle of whole NIU
-
-                  //Cadence ChipEstimate using 65nm
-                  mac_gates       = 111700;
-                  frontend_gates  = 320000;
-                  SerDer_gates    = 200000;
-                  NMOS_sizing 	  = 5*g_tp.min_w_nmos_;
-                  PMOS_sizing	  = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
-
-          }
-          else
-          {//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect
-                  // ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not
-                  // a simple summation of all IPs. Ignore this effect
-                  mac_area      = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  frontend_area = 0.1  * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer
-                  SerDer_area   = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique"
-                  //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology
-                  //total area
-                  area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
-                  //Power
-                  //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
-                  mac_dyn      = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
-                  //Cadence ChipEstimate using 65nm soft IP;
-                  frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
-                  //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
-                  SerDer_dyn   = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
-                  SerDer_dyn   /= niup.clockRate;//covert to energy per clock cycle of whole NIU
-
-                  mac_gates       = 111700;
-                  frontend_gates  = 52000;
-                  SerDer_gates    = 199260;
-
-                  NMOS_sizing 	  = g_tp.min_w_nmos_;
-                  PMOS_sizing	  = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
-          }
-
-          power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
-          power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
-          double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
-          power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
-          power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
-
-void NIUController::computeEnergy(bool is_tdp)
-{
-        if (is_tdp)
-    {
+NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
+    : McPATComponent(_xml_data, interface_ip_) {
+    name = "NIU";
+    set_niu_param();
+}
 
+void NIUController::computeArea() {
+    double mac_area;
+    double frontend_area;
+    double SerDer_area;
+
+    if (niup.type == 0) { //high performance NIU
+        //Area estimation based on average of die photo from Niagara 2 and
+        //Cadence ChipEstimate using 65nm.
+        mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+        //Area estimation based on average of die photo from Niagara 2, ISSCC
+        //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
+        //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
+        //With Robust VCO Tuning Technique" Frontend is PCS
+        frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
+            (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
+        //Area estimation based on average of die photo from Niagara 2 and
+        //Cadence ChipEstimate hard IP @65nm.
+        //SerDer is very hard to scale
+        SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
+                                       0.065);//* (interface_ip.F_sz_um/0.065);
+    } else {
+        //Low power implementations are mostly from Cadence ChipEstimator;
+        //Ignore the multiple IP effect
+        // ---When there are multiple IP (same kind or not) selected, Cadence
+        //ChipEstimator results are not a simple summation of all IPs.
+        //Ignore this effect
+        mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+        frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
+        SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um/0.065);
+        //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
+        //Transceiver and XAUI Interface With Robust VCO Tuning Technique"
+        //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
+        //scale perfectly with the technology
+    }
 
-                power	= power_t;
-        power.readOp.dynamic *= niup.duty_cycle;
+    //total area
+    output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
+ }
 
+void NIUController::computeEnergy() {
+    double mac_dyn;
+    double frontend_dyn;
+    double SerDer_dyn;
+    double frontend_gates;
+    double mac_gates;
+    double SerDer_gates;
+    double NMOS_sizing;
+    double PMOS_sizing;
+    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+
+    if (niup.type == 0) { //high performance NIU
+        //Power
+        //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
+        //E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
+        //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
+        mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
+            1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
+        //Cadence ChipEstimate using 65nm soft IP;
+        frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
+            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+        //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
+        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+        SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
+            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+
+        //Cadence ChipEstimate using 65nm
+        mac_gates = 111700;
+        frontend_gates = 320000;
+        SerDer_gates = 200000;
+        NMOS_sizing = 5 * g_tp.min_w_nmos_;
+        PMOS_sizing	= 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+    } else {
+        //Power
+        //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
+        ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
+        //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
+        mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
+            / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
+        //Cadence ChipEstimate using 65nm soft IP;
+        frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
+            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+        //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
+        SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
+            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+
+        mac_gates = 111700;
+        frontend_gates = 52000;
+        SerDer_gates = 199260;
+        NMOS_sizing = g_tp.min_w_nmos_;
+        PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
     }
-    else
-    {
-        rt_power = power_t;
-        rt_power.readOp.dynamic *= niup.perc_load;
-    }
+
+    //covert to energy per clock cycle of whole NIU
+    SerDer_dyn /= niup.clockRate;
+
+    power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
+    power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
+        cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+        g_tp.peri_global.Vdd;//unit W
+    double long_channel_device_reduction =
+        longer_channel_device_reduction(Uncore_device);
+    power.readOp.longer_channel_leakage =
+        power.readOp.leakage * long_channel_device_reduction;
+    power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
+        cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+        g_tp.peri_global.Vdd;//unit W
+
+    // Output power
+    output_data.subthreshold_leakage_power =
+        longer_channel_device ? power.readOp.longer_channel_leakage :
+        power.readOp.leakage;
+    output_data.gate_leakage_power = power.readOp.gate_leakage;
+    output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
+    output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
 }
 
-void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
-        string indent_str(indent, ' ');
-        string indent_str_next(indent+2, ' ');
-        bool long_channel = XML->sys.longer_channel_device;
-
-        if (is_tdp)
-        {
-                cout << "NIU:" << endl;
-                cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
-                cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate  << " W" << endl;
-                cout << indent_str<< "Subthreshold Leakage = "
-                        << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
-                //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
-                cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
-                cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl;
-                cout<<endl;
-        }
-        else
-        {
+void NIUController::set_niu_param() {
+    int num_children = xml_data->nChildNode("param");
+    int i;
+    for (i = 0; i < num_children; i++) {
+        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+        XMLCSTR node_name = paramNode->getAttribute("name");
+        XMLCSTR value = paramNode->getAttribute("value");
 
-        }
+        if (!node_name)
+            warnMissingParamName(paramNode->getAttribute("id"));
 
-}
+        ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
+        ASSIGN_INT_IF("num_units", niup.num_units);
+        ASSIGN_INT_IF("type", niup.type);
 
-void NIUController::set_niu_param()
-{
-          niup.clockRate       = XML->sys.niu.clockrate;
-          niup.clockRate       *= 1e6;
-          niup.num_units       = XML->sys.niu.number_units;
-          niup.duty_cycle      = XML->sys.niu.duty_cycle;
-          niup.perc_load       = XML->sys.niu.total_load_perc;
-          niup.type            = XML->sys.niu.type;
-//	  niup.executionTime   = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
-}
+        else {
+            warnUnrecognizedParam(node_name);
+        }
+    }
 
-PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_)
-:XML(XML_interface),
- interface_ip(*interface_ip_)
- {
-          local_result = init_interface(&interface_ip);
-          double frontend_area, phy_area, ctrl_area, SerDer_area;
-      double ctrl_dyn, frontend_dyn, SerDer_dyn;
-      double ctrl_gates,frontend_gates, SerDer_gates;
-          double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
-          double NMOS_sizing, PMOS_sizing;
-
-          /* Assuming PCIe is bit-slice based architecture
-           * This is the reason for /8 in both area and power calculation
-           * to get per lane numbers
-           */
-
-          set_pcie_param();
-          if (pciep.type == 0) //high performance NIU
-          {
-                  //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm.
-                  ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
-                  frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
-                  //SerDer is very hard to scale
-                  SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
-                  phy_area = frontend_area + SerDer_area;
-                  //total area
-                  //Power
-                  //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
-                  ctrl_dyn      = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
-                  //	  //Cadence ChipEstimate using 65nm soft IP;
-                  //	  frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
-                  //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
-                  SerDer_dyn   = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
-                  SerDer_dyn   /= pciep.clockRate;//covert to energy per clock cycle
-
-                  //power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels;
-                  //Cadence ChipEstimate using 65nm
-                  ctrl_gates       = 900000/8*pciep.num_channels;
-                  //	  frontend_gates   = 120000/8;
-                  //	  SerDer_gates     = 200000/8;
-                  NMOS_sizing 	  = 5*g_tp.min_w_nmos_;
-                  PMOS_sizing	  = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-          }
-          else
-          {
-                  ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
-          SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //total area
-                  //Power
-                  //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
-                  ctrl_dyn      = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
-                  //	  //Cadence ChipEstimate using 65nm soft IP;
-                  //	  frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
-                  //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
-                  SerDer_dyn   = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
-                  SerDer_dyn   /= pciep.clockRate;//covert to energy per clock cycle
-
-                  //Cadence ChipEstimate using 65nm
-                  ctrl_gates       = 200000/8*pciep.num_channels;
-                  //	  frontend_gates   = 120000/8;
-                  SerDer_gates     = 200000/8*pciep.num_channels;
-                  NMOS_sizing 	  = g_tp.min_w_nmos_;
-                  PMOS_sizing	  = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
-          }
-          area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6);
-          power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels;
-          power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
-          double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
-          power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
-          power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
+    // Change from MHz to Hz
+    niup.clockRate *= 1e6;
 
-void PCIeController::computeEnergy(bool is_tdp)
-{
-        if (is_tdp)
-    {
+    num_children = xml_data->nChildNode("stat");
+    for (i = 0; i < num_children; i++) {
+        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+        XMLCSTR node_name = statNode->getAttribute("name");
+        XMLCSTR value = statNode->getAttribute("value");
 
+        if (!node_name)
+            warnMissingStatName(statNode->getAttribute("id"));
 
-                power	= power_t;
-        power.readOp.dynamic *= pciep.duty_cycle;
+        ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
+        ASSIGN_FP_IF("perc_load", nius.perc_load);
 
-    }
-    else
-    {
-        rt_power = power_t;
-        rt_power.readOp.dynamic *= pciep.perc_load;
+        else {
+            warnUnrecognizedStat(node_name);
+        }
     }
 }
 
-void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
-        string indent_str(indent, ' ');
-        string indent_str_next(indent+2, ' ');
-        bool long_channel = XML->sys.longer_channel_device;
-
-        if (is_tdp)
-        {
-                cout << "PCIe:" << endl;
-                cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
-                cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate  << " W" << endl;
-                cout << indent_str<< "Subthreshold Leakage = "
-                        << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
-                //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
-                cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
-                cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl;
-                cout<<endl;
-        }
-        else
-        {
+PCIeController::PCIeController(XMLNode* _xml_data,
+                               InputParameter* interface_ip_)
+    : McPATComponent(_xml_data, interface_ip_) {
+    name = "PCIe";
+    set_pcie_param();
+}
 
-        }
+void PCIeController::computeArea() {
+    double ctrl_area;
+    double SerDer_area;
+
+    /* Assuming PCIe is bit-slice based architecture
+     * This is the reason for /8 in both area and power calculation
+     * to get per lane numbers
+     */
+
+    if (pciep.type == 0) { //high performance PCIe
+        //Area estimation based on average of die photo from Niagara 2 and
+        //Cadence ChipEstimate @ 65nm.
+        ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+        //Area estimation based on average of die photo from Niagara 2 and
+        //Cadence ChipEstimate hard IP @65nm.
+        //SerDer is very hard to scale
+        SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
+                                       0.065);//* (interface_ip.F_sz_um/0.065);
+    } else {
+        ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+        //Area estimation based on average of die photo from Niagara 2, and
+        //Cadence ChipEstimate @ 65nm.
+        SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+    }
 
+    // Total area
+    output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
+                        pciep.num_channels) * 1e6;
 }
 
-void PCIeController::set_pcie_param()
-{
-          pciep.clockRate       = XML->sys.pcie.clockrate;
-          pciep.clockRate       *= 1e6;
-          pciep.num_units       = XML->sys.pcie.number_units;
-          pciep.num_channels    = XML->sys.pcie.num_channels;
-          pciep.duty_cycle      = XML->sys.pcie.duty_cycle;
-          pciep.perc_load       = XML->sys.pcie.total_load_perc;
-          pciep.type            = XML->sys.pcie.type;
-          pciep.withPHY         = XML->sys.pcie.withPHY;
-//	  pciep.executionTime   = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
+void PCIeController::computeEnergy() {
+    double ctrl_dyn;
+    double SerDer_dyn;
+    double ctrl_gates;
+    double SerDer_gates = 0;
+    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+    double NMOS_sizing;
+    double PMOS_sizing;
+
+    /* Assuming PCIe is bit-slice based architecture
+     * This is the reason for /8 in both area and power calculation
+     * to get per lane numbers
+     */
+
+    if (pciep.type == 0) { //high performance PCIe
+        //Power
+        //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
+        ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
+            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+        //	  //Cadence ChipEstimate using 65nm soft IP;
+        //	  frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
+        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+        //PCIe 2.0 max per lane speed is 4Gb/s
+        SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
+            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
+
+        //Cadence ChipEstimate using 65nm
+        ctrl_gates = 900000 / 8 * pciep.num_channels;
+        //	  frontend_gates   = 120000/8;
+        //	  SerDer_gates     = 200000/8;
+        NMOS_sizing = 5 * g_tp.min_w_nmos_;
+        PMOS_sizing	= 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+    } else {
+        //Power
+        //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
+        ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
+            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+        //	  //Cadence ChipEstimate using 65nm soft IP;
+        //	  frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
+        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+        //PCIe 2.0 max per lane speed is 4Gb/s
+        SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
+            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
+
+        //Cadence ChipEstimate using 65nm
+        ctrl_gates = 200000 / 8 * pciep.num_channels;
+        //	  frontend_gates   = 120000/8;
+        SerDer_gates = 200000 / 8 * pciep.num_channels;
+        NMOS_sizing = g_tp.min_w_nmos_;
+        PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
 
+    }
+
+    //covert to energy per clock cycle
+    SerDer_dyn /= pciep.clockRate;
+
+    power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
+        pciep.num_channels;
+    power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
+        cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+        g_tp.peri_global.Vdd;//unit W
+    double long_channel_device_reduction =
+        longer_channel_device_reduction(Uncore_device);
+    power.readOp.longer_channel_leakage =
+        power.readOp.leakage * long_channel_device_reduction;
+    power.readOp.gate_leakage = (ctrl_gates +
+                                 (pciep.withPHY ? SerDer_gates : 0)) *
+        cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+        g_tp.peri_global.Vdd;//unit W
+
+    // Output power
+    output_data.subthreshold_leakage_power =
+        longer_channel_device ? power.readOp.longer_channel_leakage :
+        power.readOp.leakage;
+    output_data.gate_leakage_power = power.readOp.gate_leakage;
+    output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
+    output_data.runtime_dynamic_energy =
+        power.readOp.dynamic * pcies.perc_load;
 }
 
-FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_)
-:XML(XML_interface),
- interface_ip(*interface_ip_)
- {
-          local_result = init_interface(&interface_ip);
-          double frontend_area, phy_area, ctrl_area, SerDer_area;
-      double ctrl_dyn, frontend_dyn, SerDer_dyn;
-      double ctrl_gates,frontend_gates, SerDer_gates;
-          double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
-          double NMOS_sizing, PMOS_sizing;
-
-          /* Assuming PCIe is bit-slice based architecture
-           * This is the reason for /8 in both area and power calculation
-           * to get per lane numbers
-           */
-
-          set_fc_param();
-          if (fcp.type == 0) //high performance NIU
-          {
-                  cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<<endl;
-                  exit(0);
-                  NMOS_sizing 	  = 5*g_tp.min_w_nmos_;
-                  PMOS_sizing	  = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-          }
-          else
-          {
-                  ctrl_area   = 0.243 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-                  //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from CAST
-          SerDer_area = 0.36/8 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
-          //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support 8x lanes with each lane
-          //speed up to 250MB/s (PCIe1.1x) This is already saturate the 200MB/s of the flash controller core above.
-                  ctrl_gates      = 129267;
-                  SerDer_gates    = 200000/8;
-                  NMOS_sizing 	  = g_tp.min_w_nmos_;
-                  PMOS_sizing	  = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
-                  //Power
-                  //Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s This is power not energy!
-                  ctrl_dyn      = 0.125*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
-                  //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
-                  SerDer_dyn   = 0.01*1.6*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
-                  //max  Per controller speed is 1.6Gb/s (200MB/s)
-          }
-          double number_channel = 1+(fcp.num_channels-1)*0.2;
-          area.set_area((ctrl_area + (fcp.withPHY? SerDer_area:0))*1e6*number_channel);
-          power_t.readOp.dynamic = (ctrl_dyn + (fcp.withPHY? SerDer_dyn:0))*number_channel;
-          power_t.readOp.leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
-          double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
-          power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
-          power_t.readOp.gate_leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
+void PCIeController::set_pcie_param() {
+    int num_children = xml_data->nChildNode("param");
+    int i;
+    for (i = 0; i < num_children; i++) {
+        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+        XMLCSTR node_name = paramNode->getAttribute("name");
+        XMLCSTR value = paramNode->getAttribute("value");
+
+        if (!node_name)
+            warnMissingParamName(paramNode->getAttribute("id"));
+
+        ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
+        ASSIGN_INT_IF("num_units", pciep.num_units);
+        ASSIGN_INT_IF("num_channels", pciep.num_channels);
+        ASSIGN_INT_IF("type", pciep.type);
+        ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
+
+        else {
+            warnUnrecognizedParam(node_name);
+        }
+    }
 
-void FlashController::computeEnergy(bool is_tdp)
-{
-        if (is_tdp)
-    {
+    // Change from MHz to Hz
+    pciep.clockRate *= 1e6;
 
+    num_children = xml_data->nChildNode("stat");
+    for (i = 0; i < num_children; i++) {
+        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+        XMLCSTR node_name = statNode->getAttribute("name");
+        XMLCSTR value = statNode->getAttribute("value");
 
-                power	= power_t;
-        power.readOp.dynamic *= fcp.duty_cycle;
+        if (!node_name)
+            warnMissingStatName(statNode->getAttribute("id"));
 
-    }
-    else
-    {
-        rt_power = power_t;
-        rt_power.readOp.dynamic *= fcp.perc_load;
+        ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
+        ASSIGN_FP_IF("perc_load", pcies.perc_load);
+
+        else {
+            warnUnrecognizedStat(node_name);
+        }
     }
 }
 
-void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
-        string indent_str(indent, ' ');
-        string indent_str_next(indent+2, ' ');
-        bool long_channel = XML->sys.longer_channel_device;
-
-        if (is_tdp)
-        {
-                cout << "Flash Controller:" << endl;
-                cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
-                cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already
-                cout << indent_str<< "Subthreshold Leakage = "
-                        << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
-                //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
-                cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
-                cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
-                cout<<endl;
-        }
-        else
-        {
+FlashController::FlashController(XMLNode* _xml_data,
+                                 InputParameter* interface_ip_)
+    : McPATComponent(_xml_data, interface_ip_) {
+    name = "Flash Controller";
+    set_fc_param();
+}
 
-        }
+void FlashController::computeArea() {
+    double ctrl_area;
+    double SerDer_area;
+
+    /* Assuming Flash is bit-slice based architecture
+     * This is the reason for /8 in both area and power calculation
+     * to get per lane numbers
+     */
+
+    if (fcp.type == 0) { //high performance flash controller
+        cout << "Current McPAT does not support high performance flash "
+             << "controller since even low power designs are enough for "
+             << "maintain throughput" <<endl;
+        exit(0);
+    } else {
+        ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+        //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
+        //from CAST
+        SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
+            (interface_ip.F_sz_um / 0.065);
+    }
+
+    double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
+    output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
+        1e6 * number_channel;
+}
 
+void FlashController::computeEnergy() {
+    double ctrl_dyn;
+    double SerDer_dyn;
+    double ctrl_gates;
+    double SerDer_gates;
+    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+    double NMOS_sizing;
+    double PMOS_sizing;
+
+    /* Assuming Flash is bit-slice based architecture
+     * This is the reason for /8 in both area and power calculation
+     * to get per lane numbers
+     */
+
+    if (fcp.type == 0) { //high performance flash controller
+        cout << "Current McPAT does not support high performance flash "
+             << "controller since even low power designs are enough for "
+             << "maintain throughput" <<endl;
+        exit(0);
+        NMOS_sizing = 5 * g_tp.min_w_nmos_;
+        PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+    } else {
+        //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
+        //support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
+        //This is already saturate the 200MB/s of the flash controller core
+        //above.
+        ctrl_gates = 129267;
+        SerDer_gates = 200000 / 8;
+        NMOS_sizing = g_tp.min_w_nmos_;
+        PMOS_sizing	= g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+
+        //Power
+        //Cadence ChipEstimate using 65nm the controller 125mW for every
+        //200MB/s This is power not energy!
+        ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
+            1.1 * (interface_ip.F_sz_nm / 65.0);
+        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+        SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
+            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+        //max  Per controller speed is 1.6Gb/s (200MB/s)
+    }
+
+    double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
+    power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
+        number_channel;
+    power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
+                            number_channel) *
+        cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+        g_tp.peri_global.Vdd;//unit W
+    double long_channel_device_reduction =
+        longer_channel_device_reduction(Uncore_device);
+    power.readOp.longer_channel_leakage =
+        power.readOp.leakage * long_channel_device_reduction;
+    power.readOp.gate_leakage =
+        ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
+        cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+        g_tp.peri_global.Vdd;//unit W
+
+    // Output power
+    output_data.subthreshold_leakage_power =
+        longer_channel_device ? power.readOp.longer_channel_leakage :
+        power.readOp.leakage;
+    output_data.gate_leakage_power = power.readOp.gate_leakage;
+    output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
+    output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;
 }
 
 void FlashController::set_fc_param()
 {
-//	  fcp.clockRate       = XML->sys.flashc.mc_clock;
-//	  fcp.clockRate       *= 1e6;
-          fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
-          fcp.num_channels    = ceil(fcp.peakDataTransferRate/200);
-          fcp.num_mcs         = XML->sys.flashc.number_mcs;
-          fcp.duty_cycle      = XML->sys.flashc.duty_cycle;
-          fcp.perc_load       = XML->sys.flashc.total_load_perc;
-          fcp.type            = XML->sys.flashc.type;
-          fcp.withPHY         = XML->sys.flashc.withPHY;
-//	  flashcp.executionTime   = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
+    int num_children = xml_data->nChildNode("param");
+    int i;
+    for (i = 0; i < num_children; i++) {
+        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+        XMLCSTR node_name = paramNode->getAttribute("name");
+        XMLCSTR value = paramNode->getAttribute("value");
+
+        if (!node_name)
+            warnMissingParamName(paramNode->getAttribute("id"));
+
+        ASSIGN_INT_IF("num_channels", fcp.num_channels);
+        ASSIGN_INT_IF("type", fcp.type);
+        ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
+
+        else {
+            warnUnrecognizedParam(node_name);
+        }
+    }
+
+    num_children = xml_data->nChildNode("stat");
+    for (i = 0; i < num_children; i++) {
+        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+        XMLCSTR node_name = statNode->getAttribute("name");
+        XMLCSTR value = statNode->getAttribute("value");
+
+        if (!node_name)
+            warnMissingStatName(statNode->getAttribute("id"));
 
+        ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
+        ASSIGN_FP_IF("perc_load", fcs.perc_load);
+
+        else {
+            warnUnrecognizedStat(node_name);
+        }
+    }
 }