diff options
Diffstat (limited to 'ext/mcpat/memoryctrl.cc')
-rw-r--r-- | ext/mcpat/memoryctrl.cc | 1125 |
1 files changed, 501 insertions, 624 deletions
diff --git a/ext/mcpat/memoryctrl.cc b/ext/mcpat/memoryctrl.cc index ae3bc75ec..dec24512e 100644 --- a/ext/mcpat/memoryctrl.cc +++ b/ext/mcpat/memoryctrl.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,18 +26,19 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ + #include <algorithm> #include <cassert> #include <cmath> #include <iostream> #include <string> -#include "XML_Parse.h" #include "basic_circuit.h" #include "basic_components.h" +#include "common.h" #include "const.h" #include "io.h" #include "logic.h" @@ -69,668 +71,543 @@ * */ -MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:l_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_) -{ - - local_result = init_interface(&l_ip); - compute(); - +MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_) + : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) { + name = "Transaction Engine"; + local_result = init_interface(&l_ip, name); + + // Set up stats for the power calculations + tdp_stats.reset(); + tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate; + tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate; + rtp_stats.reset(); + rtp_stats.readAc.access = mcs.reads; + rtp_stats.writeAc.access = mcs.writes; } +void MCBackend::computeArea() { + // The area is in nm^2 + if (mcp.mc_type == MC) { + if (mcp.type == 0) { + output_data.area = (2.7927 * log(mcp.peak_transfer_rate * 2) - + 19.862) / 2.0 * mcp.dataBusWidth / 128.0 * + (l_ip.F_sz_um / 0.09) * mcp.num_channels; + } else { + output_data.area = 0.15 * mcp.dataBusWidth / 72.0 * + (l_ip.F_sz_um / 0.065) * (l_ip.F_sz_um / 0.065) * + mcp.num_channels; + } + } else { + //skip old model + cout << "Unknown memory controllers" << endl; + exit(0); + //area based on Cadence ChipEstimator for 8bit bus + output_data.area = 0.243 * mcp.dataBusWidth / 8; + } +} -void MCBackend::compute() -{ - //double max_row_addr_width = 20.0;//Current address 12~18bits - double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend, - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - if (mc_type == MC) - { - if (mcp.type == 0) - { - //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09); - area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2 - //assuming the approximately same scaling factor as seen in processors. - //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip. - //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. - //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et - mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend - C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; - power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller - power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W +void MCBackend::computeEnergy() { + double C_MCB, mc_power; + double backend_dyn; + double backend_gates; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing = g_tp.min_w_nmos_; + double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + double area_um2 = output_data.area * 1e6; + + if (mcp.mc_type == MC) { + if (mcp.type == 0) { + //assuming the approximately same scaling factor as seen in processors. + //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. + //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et + mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend + C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; + //per access energy in memory controller + power.readOp.dynamic = C_MCB * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (mcp.dataBusWidth/*+mcp.addressBusWidth*/); + power.readOp.leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + } else { + //Average on DDR2/3 protocol controller and DDRC 1600/800A in + //Cadence ChipEstimate + backend_dyn = 0.9e-9 / 800e6 * mcp.clockRate / 12800 * + mcp.peak_transfer_rate* mcp.dataBusWidth / 72.0 * + g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 * + (l_ip.F_sz_nm/65.0); + //Scaling to technology and DIMM feature. The base IP support + //DDR3-1600(PC3 12800) + //5000 is from Cadence ChipEstimator + backend_gates = 50000 * mcp.dataBusWidth / 64.0; + + power.readOp.dynamic = backend_dyn; + power.readOp.leakage = (backend_gates) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = (backend_gates) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W } - else - { NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2 - backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate - //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800) - backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator - - power_t.readOp.dynamic = backend_dyn; - power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W + } else { + //skip old model + cout<<"Unknown memory controllers"<<endl;exit(0); + //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend + C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; + power.readOp.leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + power.readOp.dynamic *= 1.2; + power.readOp.leakage *= 1.2; + power.readOp.gate_leakage *= 1.2; + //flash controller has about 20% more backend power since BCH ECC in + //flash is complex and power hungry + } + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + + // Output leakage power calculations + output_data.subthreshold_leakage_power = + longer_channel_device ? power.readOp.longer_channel_leakage : + power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + + // Peak dynamic power calculation + output_data.peak_dynamic_power = power.readOp.dynamic * + (tdp_stats.readAc.access + tdp_stats.writeAc.access); + + // Runtime dynamic energy calculation + output_data.runtime_dynamic_energy = + power.readOp.dynamic * + (rtp_stats.readAc.access + rtp_stats.writeAc.access) * + mcp.llcBlockSize * BITS_PER_BYTE / mcp.dataBusWidth + + // Original McPAT code: Assume 10% of peak power is consumed by routine + // job including memory refreshing and scrubbing + power.readOp.dynamic * 0.1 * execution_time; +} - } - } - else - {//skip old model - cout<<"Unknown memory controllers"<<endl;exit(0); - area.set_area(0.243*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus - //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend - C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; - power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.dynamic *= 1.2; - power_t.readOp.leakage *= 1.2; - power_t.readOp.gate_leakage *= 1.2; - //flash controller has about 20% more backend power since BCH ECC in flash is complex and power hungry - } - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; +MCPHY::MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_) + : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) { + name = "Physical Interface (PHY)"; + local_result = init_interface(&l_ip, name); + + // Set up stats for the power calculations + // TODO: Figure out why TDP stats aren't used + tdp_stats.reset(); + tdp_stats.readAc.access = 0.5 * mcp.num_channels; + tdp_stats.writeAc.access = 0.5 * mcp.num_channels; + rtp_stats.reset(); + rtp_stats.readAc.access = mcs.reads; + rtp_stats.writeAc.access = mcs.writes; } -void MCBackend::computeEnergy(bool is_tdp) -{ - //backend uses internal data buswidth - if (is_tdp) - { - //init stats for Peak - stats_t.readAc.access = 0.5*mcp.num_channels; - stats_t.writeAc.access = 0.5*mcp.num_channels; - tdp_stats = stats_t; +void MCPHY::computeArea() { + if (mcp.mc_type == MC) { + if (mcp.type == 0) { + //Based on die photos from Niagara 1 and 2. + //TODO merge this into undifferentiated core.PHY only achieves + //square root of the ideal scaling. + output_data.area = (6.4323 * log(mcp.peak_transfer_rate * 2) - + 48.134) * mcp.dataBusWidth / 128.0 * + (l_ip.F_sz_um / 0.09) * mcp.num_channels / 2;//TODO:/2 + } else { + //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm + //for upto DDR3 2133 (PC3 17066) + double non_IO_percentage = 0.2; + output_data.area = 1.3 * non_IO_percentage / 2133.0e6 * + mcp.clockRate / 17066 * mcp.peak_transfer_rate * + mcp.dataBusWidth / 16.0 * (l_ip.F_sz_um / 0.040)* + (l_ip.F_sz_um / 0.040) * mcp.num_channels;//um^2 } - else - { - //init stats for runtime power (RTP) - stats_t.readAc.access = mcp.reads; - stats_t.writeAc.access = mcp.writes; - tdp_stats = stats_t; - } - if (is_tdp) - { - power = power_t; - power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*power_t.readOp.dynamic; - - } - else - { - rt_power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*mcp.llcBlockSize*8.0/mcp.dataBusWidth*power_t.readOp.dynamic; - rt_power = rt_power + power_t*pppm_lkg; - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; - //Assume 10% of peak power is consumed by routine job including memory refreshing and scrubbing + } else { + //area based on Cadence ChipEstimator for 8bit bus + output_data.area = 0.4e6 / 2 * mcp.dataBusWidth / 8 / 1e6; } } - -MCPHY::MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:l_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_) -{ - - local_result = init_interface(&l_ip); - compute(); -} - -void MCPHY::compute() -{ - //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio() ; - /* - * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006; - * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s - */ - double power_per_gb_per_s, phy_dyn,phy_gates, NMOS_sizing, PMOS_sizing; - - if (mc_type == MC) - { - if (mcp.type == 0) - { - power_per_gb_per_s = mcp.LVDS? 0.01:0.04; - //Based on die photos from Niagara 1 and 2. - //TODO merge this into undifferentiated core.PHY only achieves square root of the ideal scaling. - //area = (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(l_ip.F_sz_um/0.09); - area.set_area((6.4323*log(mcp.peakDataTransferRate*2)-48.134)*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6/2);//TODO:/2 - //This is from curve fitting based on Niagara 1 and 2's PHY die photo. - //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down - //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits. - power_t.readOp.dynamic = power_per_gb_per_s*sqrt(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - - } - else - { - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto DDR3 2133 (PC3 17066) - double non_IO_percentage = 0.2; - area.set_area(1.3*non_IO_percentage/2133.0e6*mcp.clockRate/17066*mcp.peakDataTransferRate*mcp.dataBusWidth/16.0*(l_ip.F_sz_um/0.040)* (l_ip.F_sz_um/0.040)*mcp.num_channels*1e6);//um^2 - phy_gates = 200000*mcp.dataBusWidth/64.0; - power_per_gb_per_s = 0.01; - //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down - power_t.readOp.dynamic = power_per_gb_per_s*(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - power_t.readOp.leakage = (mcp.withPHY? phy_gates:0)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = (mcp.withPHY? phy_gates:0)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - } - - } - else - { - area.set_area(0.4e6/2*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus - } +void MCPHY::computeEnergy() { + //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + /* + * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006; + * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s + */ + double power_per_gb_per_s, phy_dyn,phy_gates; + double NMOS_sizing = g_tp.min_w_nmos_; + double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + double area_um2 = output_data.area * 1e6; + + if (mcp.mc_type == MC) { + if (mcp.type == 0) { + power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04; + //This is from curve fitting based on Niagara 1 and 2's PHY die photo. + //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down + //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits. + power.readOp.dynamic = power_per_gb_per_s * + sqrt(l_ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 * + g_tp.peri_global.Vdd / 1.2; + power.readOp.leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + } else { + phy_gates = 200000 * mcp.dataBusWidth / 64.0; + power_per_gb_per_s = 0.01; + //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down + power.readOp.dynamic = power_per_gb_per_s * (l_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; + power.readOp.leakage = (mcp.withPHY ? phy_gates : 0) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = (mcp.withPHY ? phy_gates : 0) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + } + } // double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface // power_t.readOp.dynamic *= phy_factor; // power_t.readOp.leakage *= phy_factor; // power_t.readOp.gate_leakage *= phy_factor; - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + // Leakage power calculations + output_data.subthreshold_leakage_power = + longer_channel_device ? power.readOp.longer_channel_leakage : + power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + + // Peak dynamic power calculation + double data_transfer_unit = (mcp.mc_type == MC)? 72:16;/*DIMM data width*/ + output_data.peak_dynamic_power = power.readOp.dynamic * + (mcp.peak_transfer_rate * BITS_PER_BYTE / 1e3) * mcp.dataBusWidth / + data_transfer_unit * mcp.num_channels / mcp.clockRate; + + // Runtime dynamic energy calculation + output_data.runtime_dynamic_energy = + power.readOp.dynamic * + (rtp_stats.readAc.access + rtp_stats.writeAc.access) * + mcp.llcBlockSize * BITS_PER_BYTE / 1e9 + + // Original McPAT code: Assume 10% of peak power is consumed by routine + // job including memory refreshing and scrubbing + power.readOp.dynamic * 0.1 * execution_time; } +MCFrontEnd::MCFrontEnd(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_) + : McPATComponent(_xml_data), frontendBuffer(NULL), readBuffer(NULL), + writeBuffer(NULL), MC_arb(NULL), interface_ip(*interface_ip_), + mcp(mcp_), mcs(mcs_) { + int tag, data; + bool is_default = true;//indication for default setup + + /* MC frontend engine channels share the same engines but logically partitioned + * For all hardware inside MC. different channels do not share resources. + * TODO: add docodeing/mux stage to steer memory requests to different channels. + */ + + name = "Front End"; + + // Memory Request Reorder Buffer + tag = mcp.addressbus_width + EXTRA_TAG_BITS + mcp.opcodeW; + data = int(ceil((physical_address_width + mcp.opcodeW) / BITS_PER_BYTE)); + + interface_ip.cache_sz = data * mcp.req_window_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.reorder_buffer_assoc; + interface_ip.nbanks = mcp.reorder_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = mcp.num_channels; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer", + Uncore_device, mcp.clockRate); + children.push_back(frontendBuffer); + + frontendBuffer->tdp_stats.reset(); + frontendBuffer->tdp_stats.readAc.access = + frontendBuffer->l_ip.num_search_ports + + frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->tdp_stats.writeAc.access = + frontendBuffer->l_ip.num_search_ports; + frontendBuffer->tdp_stats.searchAc.access = + frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->rtp_stats.reset(); + // TODO: These stats assume that access power is calculated per buffer + // bit, which requires the stats to take into account the number of + // bits for each buffer slot. This should be revised... + //For each channel, each memory word need to check the address data to + //achieve best scheduling results. + //and this need to be done on all physical DIMMs in each logical memory + //DIMM *mcp.dataBusWidth/72 + frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats.searchAc.access = + frontendBuffer->rtp_stats.readAc.access + + frontendBuffer->rtp_stats.writeAc.access; + + // Read Buffers + //Support key words first operation + data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); + + interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.read_buffer_assoc; + interface_ip.nbanks = mcp.read_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = mcp.read_buffer_tag_width > 0; + interface_ip.tag_w = mcp.read_buffer_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer", + Uncore_device, mcp.clockRate); + children.push_back(readBuffer); + + readBuffer->tdp_stats.reset(); + readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports * + mcs.duty_cycle; + readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports * + mcs.duty_cycle; + readBuffer->rtp_stats.reset(); + readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + + // Write Buffer + //Support key words first operation + data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); + + interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.write_buffer_assoc; + interface_ip.nbanks = mcp.write_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = mcp.write_buffer_tag_width > 0; + interface_ip.tag_w = mcp.write_buffer_tag_width; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer", + Uncore_device, mcp.clockRate); + children.push_back(writeBuffer); + + writeBuffer->tdp_stats.reset(); + writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports * + mcs.duty_cycle; + writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports * + mcs.duty_cycle; + writeBuffer->rtp_stats.reset(); + writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + + // TODO: Set up selection logic as a leaf node in tree + //selection and arbitration logic + MC_arb = + new selection_logic(xml_data, is_default, + mcp.req_window_size_per_channel, 1, &interface_ip, + "Arbitration Logic", (mcs.reads + mcs.writes), + mcp.clockRate, Uncore_device); + // MC_arb is not included in the roll-up due to the uninitialized area + //children.push_back(MC_arb); +} -void MCPHY::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - //init stats for Peak - stats_t.readAc.access = 0.5*mcp.num_channels; //time share on buses - stats_t.writeAc.access = 0.5*mcp.num_channels; - tdp_stats = stats_t; - } - else - { - //init stats for runtime power (RTP) - stats_t.readAc.access = mcp.reads; - stats_t.writeAc.access = mcp.writes; - tdp_stats = stats_t; - } - - if (is_tdp) - { - double data_transfer_unit = (mc_type == MC)? 72:16;/*DIMM data width*/ - power = power_t; - power.readOp.dynamic = power.readOp.dynamic * (mcp.peakDataTransferRate*8*1e6/1e9/*change to Gbs*/)*mcp.dataBusWidth/data_transfer_unit*mcp.num_channels/mcp.clockRate; - // divide by clock rate is for match the final computation where *clock is used - //(stats_t.readAc.access*power_t.readOp.dynamic+ -// stats_t.writeAc.access*power_t.readOp.dynamic); - - } - else - { - rt_power = power_t; -// rt_power.readOp.dynamic = (stats_t.readAc.access*power_t.readOp.dynamic+ -// stats_t.writeAc.access*power_t.readOp.dynamic); - - rt_power.readOp.dynamic=power_t.readOp.dynamic*(stats_t.readAc.access + stats_t.writeAc.access)*(mcp.llcBlockSize)*8/1e9/mcp.executionTime*(mcp.executionTime); - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; +MemoryController::MemoryController(XMLNode* _xml_data, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), interface_ip(*interface_ip_) { + name = "Memory Controller"; + set_mc_param(); + // TODO: Pass params and stats as pointers + children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs)); + children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs)); + + if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) { + children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs)); } } -MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:XML(XML_interface), - interface_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_), - MC_arb(0), - frontendBuffer(0), - readBuffer(0), - writeBuffer(0) -{ - /* All computations are for a single MC - * - */ - - int tag, data; - bool is_default =true;//indication for default setup - - /* MC frontend engine channels share the same engines but logically partitioned - * For all hardware inside MC. different channels do not share resources. - * TODO: add docodeing/mux stage to steer memory requests to different channels. - */ - - //memory request reorder buffer - tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; - data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0)); - interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel; - interface_ip.line_sz = data; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; - frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); - frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //selection and arbitration logic - MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device); - - //read buffers. - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); - readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //write buffer - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); - writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); +void MemoryController::initialize_params() { + memset(&mcp, 0, sizeof(MCParameters)); } -void MCFrontEnd::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - //init stats for Peak - frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports; - frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; - frontendBuffer->tdp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - readBuffer->tdp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - writeBuffer->tdp_stats = writeBuffer->stats_t; - - } - else - { - //init stats for runtime power (RTP) - frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - //For each channel, each memory word need to check the address data to achieve best scheduling results. - //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 - frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - frontendBuffer->rtp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->rtp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->rtp_stats = writeBuffer->stats_t; - } - - frontendBuffer->power_t.reset(); - readBuffer->power_t.reset(); - writeBuffer->power_t.reset(); - -// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access* -// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ -// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); - - frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access + - frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic - + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic - + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic; - - readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access* - readBuffer->local_result.power.readOp.dynamic+ - readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic); - writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access* - writeBuffer->local_result.power.readOp.dynamic+ - writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic); - - if (is_tdp) - { - power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; - +void MemoryController::set_mc_param() { + initialize_params(); + + int num_children = xml_data->nChildNode("param"); + int tech_type; + int mat_type; + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("mc_clock", mcp.clockRate); + ASSIGN_INT_IF("tech_type", tech_type); + ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type); + ASSIGN_FP_IF("num_mcs", mcp.num_mcs); + ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length); + ASSIGN_INT_IF("databus_width", mcp.databus_width); + ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels); + ASSIGN_INT_IF("req_window_size_per_channel", + mcp.req_window_size_per_channel); + ASSIGN_INT_IF("IO_buffer_size_per_channel", + mcp.IO_buffer_size_per_channel); + ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width); + ASSIGN_INT_IF("opcode_width", mcp.opcodeW); + ASSIGN_INT_IF("type", mcp.type); + ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool); + ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool); + ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate); + ASSIGN_INT_IF("number_ranks", mcp.number_ranks); + ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc); + ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks); + ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc); + ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks); + ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width); + ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc); + ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks); + ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); + } } - else - { - rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; + + if (mcp.mc_type != MC) { + cout << "Unknown memory controller type: Only DRAM controller is " + << "supported for now" << endl; + exit(0); } -} -void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - - if (is_tdp) - { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - - cout <<endl; - cout << indent_str<< "Read Buffer:" << endl; - cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << readBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << readBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <<endl; - cout << indent_str << "Write Buffer:" << endl; - cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << writeBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << writeBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <<endl; - } - else - { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage << " W" << endl; - cout <<endl; - cout << indent_str<< "Read Buffer:" << endl; - cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << readBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage << " W" << endl; - cout <<endl; - cout << indent_str << "Write Buffer:" << endl; - cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << writeBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage << " W" << endl; - } + // Change from MHz to Hz + mcp.clockRate *= 1e6; -} + interface_ip.data_arr_ram_cell_tech_type = tech_type; + interface_ip.data_arr_peri_global_tech_type = tech_type; + interface_ip.tag_arr_ram_cell_tech_type = tech_type; + interface_ip.tag_arr_peri_global_tech_type = tech_type; + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); -MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_) -:XML(XML_interface), - interface_ip(*interface_ip_), - mc_type(mc_type_), - frontend(0), - transecEngine(0), - PHY(0), - pipeLogic(0) -{ - /* All computations are for a single MC - * - */ - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - interface_ip.wt =Global; - set_mc_param(); - frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ frontend->area.get_area()); - transecEngine = new MCBackend(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ transecEngine->area.get_area()); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY = new MCPHY(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ PHY->area.get_area()); - } - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// transecEngine.initialize(&interface_ip); -// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; -// transecEngine.memDataWidth = dataBusWidth; -// transecEngine.memRank = XML->sys.mem.number_ranks; -// //transecEngine.memAccesses=XML->sys.mc.memory_accesses; -// //transecEngine.llcBlocksize=llcBlockSize; -// transecEngine.compute(); -// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ; -// area.set_area(area.get_area()+ transecEngine.area.get_area()); -// ///cout<<"area="<<area<<endl; -//// -// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers -// PHY.initialize(&interface_ip); -// PHY.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; -// PHY.memDataWidth = dataBusWidth; -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// PHY.compute(); -// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ; -// area.set_area(area.get_area()+ PHY.area.get_area()); - ///cout<<"area="<<area<<endl; -// -// interface_ip.pipeline_stages = 5;//normal memory controller has five stages in the pipeline. -// interface_ip.per_stage_vector = addressBusWidth + XML->sys.core[0].opcode_width + dataBusWidth; -// pipeLogic = new pipeline(is_default, &interface_ip); -// //pipeLogic.init_pipeline(is_default, &interface_ip); -// pipeLogic->compute_pipeline(); -// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); -// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead -// -// -//// //clock -//// clockNetwork.init_wire_external(is_default, &interface_ip); -//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -//// clockNetwork.end_wiring_level =5;//toplevel metal -//// clockNetwork.start_wiring_level =5;//toplevel metal -//// clockNetwork.num_regs = pipeLogic.tot_stage_vector; -//// clockNetwork.optimize_wire(); + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle); + ASSIGN_FP_IF("perc_load", mcs.perc_load); + ASSIGN_FP_IF("memory_reads", mcs.reads); + ASSIGN_INT_IF("memory_writes", mcs.writes); -} -void MemoryController::computeEnergy(bool is_tdp) -{ - - frontend->computeEnergy(is_tdp); - transecEngine->computeEnergy(is_tdp); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY->computeEnergy(is_tdp); - } - if (is_tdp) - { - power = power + frontend->power + transecEngine->power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - power = power + PHY->power; - } - } - else - { - rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - rt_power = rt_power + PHY->rt_power; - } - } -} - -void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "Memory Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout<<endl; - cout << indent_str << "Front End Engine:" << endl; - cout << indent_str_next << "Area = " << frontend->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << frontend->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? frontend->power.readOp.longer_channel_leakage:frontend->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << frontend->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <<endl; - if (plevel >2){ - frontend->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Transaction Engine:" << endl; - cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <<endl; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - cout << indent_str << "PHY:" << endl; - cout << indent_str_next << "Area = " << PHY->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << PHY->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? PHY->power.readOp.longer_channel_leakage:PHY->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << PHY->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <<endl; - } - } - else - { - cout << "Memory Controller:" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout<<endl; + else { + warnUnrecognizedStat(node_name); } + } + // Add ECC overhead + mcp.llcBlockSize = int(ceil(mcp.llc_line_length / BITS_PER_BYTE)) + + mcp.llc_line_length; + mcp.dataBusWidth = int(ceil(mcp.databus_width / BITS_PER_BYTE)) + + mcp.databus_width; } -void MemoryController::set_mc_param() -{ - - if (mc_type==MC) - { - mcp.clockRate =XML->sys.mc.mc_clock*2;//DDR double pumped - mcp.clockRate *= 1e6; - mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead - mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width; - mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width; - mcp.opcodeW =16; - mcp.num_mcs = XML->sys.mc.number_mcs; - mcp.num_channels = XML->sys.mc.memory_channels_per_mc; - mcp.reads = XML->sys.mc.memory_reads; - mcp.writes = XML->sys.mc.memory_writes; - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. - mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; - mcp.memRank = XML->sys.mc.number_ranks; - //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers - //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power - //PHY.llcBlocksize=llcBlockSize; - mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared - mcp.LVDS = XML->sys.mc.LVDS; - mcp.type = XML->sys.mc.type; - mcp.withPHY = XML->sys.mc.withPHY; - } -// else if (mc_type==FLASHC) -// { -// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped -// mcp.clockRate *= 1e6; -// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -// -// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead -// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width; -// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; -// mcp.opcodeW =16; -// mcp.num_mcs = XML->sys.flashc.number_mcs; -// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; -// mcp.reads = XML->sys.flashc.memory_reads; -// mcp.writes = XML->sys.flashc.memory_writes; -// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; -// mcp.memRank = XML->sys.flashc.number_ranks; -// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared -// mcp.LVDS = XML->sys.flashc.LVDS; -// mcp.type = XML->sys.flashc.type; -// } - else - { - cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <<endl; - exit(0); - } -} +MCFrontEnd ::~MCFrontEnd() { -MCFrontEnd ::~MCFrontEnd(){ - - if(MC_arb) {delete MC_arb; MC_arb = 0;} - if(frontendBuffer) {delete frontendBuffer; frontendBuffer = 0;} - if(readBuffer) {delete readBuffer; readBuffer = 0;} - if(writeBuffer) {delete writeBuffer; writeBuffer = 0;} + if (MC_arb) { + delete MC_arb; + MC_arb = NULL; + } + if (frontendBuffer) { + delete frontendBuffer; + frontendBuffer = NULL; + } + if (readBuffer) { + delete readBuffer; + readBuffer = NULL; + } + if (writeBuffer) { + delete writeBuffer; + writeBuffer = NULL; + } } -MemoryController ::~MemoryController(){ - - if(frontend) {delete frontend; frontend = 0;} - if(transecEngine) {delete transecEngine; transecEngine = 0;} - if(PHY) {delete PHY; PHY = 0;} - if(pipeLogic) {delete pipeLogic; pipeLogic = 0;} +MemoryController::~MemoryController() { + // TODO: use default constructor to delete children } |