summaryrefslogtreecommitdiff
path: root/ext/mcpat/Niagara1.xml
diff options
context:
space:
mode:
authorAnthony Gutierrez <atgutier@umich.edu>2014-04-01 12:44:30 -0400
committerAnthony Gutierrez <atgutier@umich.edu>2014-04-01 12:44:30 -0400
commite553a7bfa7f0eb47b78632cd63e6e1e814025c9a (patch)
treef69a8e3e0ed55b95bf276b6f857793b9ef7b6490 /ext/mcpat/Niagara1.xml
parent8d665ee166bf5476bb9b73a0016843ff9953c266 (diff)
downloadgem5-e553a7bfa7f0eb47b78632cd63e6e1e814025c9a.tar.xz
ext: add McPAT source
this patch adds the source for mcpat, a power, area, and timing modeling framework.
Diffstat (limited to 'ext/mcpat/Niagara1.xml')
-rw-r--r--ext/mcpat/Niagara1.xml442
1 files changed, 442 insertions, 0 deletions
diff --git a/ext/mcpat/Niagara1.xml b/ext/mcpat/Niagara1.xml
new file mode 100644
index 000000000..ae748e246
--- /dev/null
+++ b/ext/mcpat/Niagara1.xml
@@ -0,0 +1,442 @@
+<?xml version="1.0" ?>
+<component id="root" name="root">
+ <component id="system" name="system">
+ <!--McPAT will skip the components if number is set to 0 -->
+ <param name="number_of_cores" value="8"/>
+ <param name="number_of_L1Directories" value="4"/>
+ <param name="number_of_L2Directories" value="0"/>
+ <param name="number_of_L2s" value="4"/> <!-- This number means how many L2 clusters in each cluster there can be multiple banks/ports -->
+ <param name="number_of_L3s" value="0"/> <!-- This number means how many L3 clusters -->
+ <param name="number_of_NoCs" value="1"/>
+ <param name="homogeneous_cores" value="1"/><!--1 means homo -->
+ <param name="homogeneous_L2s" value="1"/>
+ <param name="homogeneous_L1Directorys" value="1"/>
+ <param name="homogeneous_L2Directorys" value="1"/>
+ <param name="homogeneous_L3s" value="1"/>
+ <param name="homogeneous_ccs" value="1"/><!--cache coherece hardware -->
+ <param name="homogeneous_NoCs" value="1"/>
+ <param name="core_tech_node" value="90"/><!-- nm -->
+ <param name="target_core_clockrate" value="1200"/><!--MHz -->
+ <param name="temperature" value="380"/> <!-- Kelvin -->
+ <param name="number_cache_levels" value="2"/>
+ <param name="interconnect_projection_type" value="0"/><!--0: agressive wire technology; 1: conservative wire technology -->
+ <param name="device_type" value="0"/><!--0: HP(High Performance Type); 1: LSTP(Low standby power) 2: LOP (Low Operating Power) -->
+ <param name="longer_channel_device" value="1"/><!-- 0 no use; 1 use when possible -->
+ <param name="machine_bits" value="64"/>
+ <param name="virtual_address_width" value="64"/>
+ <param name="physical_address_width" value="52"/>
+ <param name="virtual_memory_page_size" value="4096"/>
+ <stat name="total_cycles" value="100000"/>
+ <stat name="idle_cycles" value="0"/>
+ <stat name="busy_cycles" value="100000"/>
+ <!--This page size(B) is complete different from the page size in Main memo secction. this page size is the size of
+ virtual memory from OS/Archi perspective; the page size in Main memo secction is the actuall physical line in a DRAM bank -->
+ <!-- *********************** cores ******************* -->
+ <component id="system.core0" name="core0">
+ <!-- Core property -->
+ <param name="clock_rate" value="1200"/>
+ <param name="instruction_length" value="32"/>
+ <param name="opcode_width" value="9"/>
+ <!-- address width determins the tag_width in Cache, LSQ and buffers in cache controller
+ default value is machine_bits, if not set -->
+ <param name="machine_type" value="1"/><!-- 1 inorder; 0 OOO-->
+ <!-- inorder/OoO -->
+ <param name="number_hardware_threads" value="4"/>
+ <!-- number_instruction_fetch_ports(icache ports) is always 1 in single-thread processor,
+ it only may be more than one in SMT processors. BTB ports always equals to fetch ports since
+ branch information in consective branch instructions in the same fetch group can be read out from BTB once.-->
+ <param name="fetch_width" value="1"/>
+ <!-- fetch_width determins the size of cachelines of L1 cache block -->
+ <param name="number_instruction_fetch_ports" value="1"/>
+ <param name="decode_width" value="1"/>
+ <!-- decode_width determins the number of ports of the
+ renaming table (both RAM and CAM) scheme -->
+ <param name="issue_width" value="1"/>
+ <!-- issue_width determins the number of ports of Issue window and other logic
+ as in the complexity effective proccessors paper; issue_width==dispatch_width -->
+ <param name="commit_width" value="1"/>
+ <!-- commit_width determins the number of ports of register files -->
+ <param name="fp_issue_width" value="1"/>
+ <param name="prediction_width" value="0"/>
+ <!-- number of branch instructions can be predicted simultannouesl-->
+ <!-- Current version of McPAT does not distinguish int and floating point pipelines
+ Theses parameters are reserved for future use.-->
+ <param name="pipelines_per_core" value="1,1"/>
+ <!--integer_pipeline and floating_pipelines, if the floating_pipelines is 0, then the pipeline is shared-->
+ <param name="pipeline_depth" value="6,6"/>
+ <!-- pipeline depth of int and fp, if pipeline is shared, the second number is the average cycles of fp ops -->
+ <!-- issue and exe unit-->
+ <param name="ALU_per_core" value="1"/>
+ <!-- contains an adder, a shifter, and a logical unit -->
+ <param name="MUL_per_core" value="1"/>
+ <!-- For MUL and Div -->
+ <param name="FPU_per_core" value="0.125"/>
+ <!-- buffer between IF and ID stage -->
+ <param name="instruction_buffer_size" value="16"/>
+ <!-- buffer between ID and sche/exe stage -->
+ <param name="decoded_stream_buffer_size" value="16"/>
+ <param name="instruction_window_scheme" value="0"/><!-- 0 PHYREG based, 1 RSBASED-->
+ <!-- McPAT support 2 types of OoO cores, RS based and physical reg based-->
+ <param name="instruction_window_size" value="16"/>
+ <param name="fp_instruction_window_size" value="16"/>
+ <!-- the instruction issue Q as in Alpha 21264; The RS as in Intel P6 -->
+ <param name="ROB_size" value="80"/>
+ <!-- each in-flight instruction has an entry in ROB -->
+ <!-- registers -->
+ <param name="archi_Regs_IRF_size" value="32"/>
+ <param name="archi_Regs_FRF_size" value="32"/>
+ <!-- if OoO processor, phy_reg number is needed for renaming logic,
+ renaming logic is for both integer and floating point insts. -->
+ <param name="phy_Regs_IRF_size" value="80"/>
+ <param name="phy_Regs_FRF_size" value="80"/>
+ <!-- rename logic -->
+ <param name="rename_scheme" value="0"/>
+ <!-- can be RAM based(0) or CAM based(1) rename scheme
+ RAM-based scheme will have free list, status table;
+ CAM-based scheme have the valid bit in the data field of the CAM
+ both RAM and CAM need RAM-based checkpoint table, checkpoint_depth=# of in_flight instructions;
+ Detailed RAT Implementation see TR -->
+ <param name="register_windows_size" value="8"/>
+ <!-- how many windows in the windowed register file, sun processors;
+ no register windowing is used when this number is 0 -->
+ <!-- In OoO cores, loads and stores can be issued whether inorder(Pentium Pro) or (OoO)out-of-order(Alpha),
+ They will always try to exeute out-of-order though. -->
+ <param name="LSU_order" value="inorder"/>
+ <param name="store_buffer_size" value="32"/>
+ <!-- By default, in-order cores do not have load buffers -->
+ <param name="load_buffer_size" value="32"/>
+ <!-- number of ports refer to sustainable concurrent memory accesses -->
+ <param name="memory_ports" value="1"/>
+ <!-- max_allowed_in_flight_memo_instructions determins the # of ports of load and store buffer
+ as well as the ports of Dcache which is connected to LSU -->
+ <!-- dual-pumped Dcache can be used to save the extra read/write ports -->
+ <param name="RAS_size" value="32"/>
+ <!-- general stats, defines simulation periods;require total, idle, and busy cycles for senity check -->
+ <!-- please note: if target architecture is X86, then all the instrucions refer to (fused) micro-ops -->
+ <stat name="total_instructions" value="800000"/>
+ <stat name="int_instructions" value="600000"/>
+ <stat name="fp_instructions" value="20000"/>
+ <stat name="branch_instructions" value="0"/>
+ <stat name="branch_mispredictions" value="0"/>
+ <stat name="load_instructions" value="100000"/>
+ <stat name="store_instructions" value="100000"/>
+ <stat name="committed_instructions" value="800000"/>
+ <stat name="committed_int_instructions" value="600000"/>
+ <stat name="committed_fp_instructions" value="20000"/>
+ <stat name="pipeline_duty_cycle" value="0.6"/><!--<=1, runtime_ipc/peak_ipc; averaged for all cores if homogenous -->
+ <!-- the following cycle stats are used for heterogeneouse cores only,
+ please ignore them if homogeneouse cores -->
+ <stat name="total_cycles" value="100000"/>
+ <stat name="idle_cycles" value="0"/>
+ <stat name="busy_cycles" value="100000"/>
+ <!-- instruction buffer stats -->
+ <!-- ROB stats, both RS and Phy based OoOs have ROB
+ performance simulator should capture the difference on accesses,
+ otherwise, McPAT has to guess based on number of commited instructions. -->
+ <stat name="ROB_reads" value="263886"/>
+ <stat name="ROB_writes" value="263886"/>
+ <!-- RAT accesses -->
+ <stat name="rename_accesses" value="263886"/>
+ <stat name="fp_rename_accesses" value="263886"/>
+ <!-- decode and rename stage use this, should be total ic - nop -->
+ <!-- Inst window stats -->
+ <stat name="inst_window_reads" value="263886"/>
+ <stat name="inst_window_writes" value="263886"/>
+ <stat name="inst_window_wakeup_accesses" value="263886"/>
+ <stat name="fp_inst_window_reads" value="263886"/>
+ <stat name="fp_inst_window_writes" value="263886"/>
+ <stat name="fp_inst_window_wakeup_accesses" value="263886"/>
+ <!-- RF accesses -->
+ <stat name="int_regfile_reads" value="1600000"/>
+ <stat name="float_regfile_reads" value="40000"/>
+ <stat name="int_regfile_writes" value="800000"/>
+ <stat name="float_regfile_writes" value="20000"/>
+ <!-- accesses to the working reg -->
+ <stat name="function_calls" value="5"/>
+ <stat name="context_switches" value="260343"/>
+ <!-- Number of Windowes switches (number of function calls and returns)-->
+ <!-- Alu stats by default, the processor has one FPU that includes the divider and
+ multiplier. The fpu accesses should include accesses to multiplier and divider -->
+ <stat name="ialu_accesses" value="800000"/>
+ <stat name="fpu_accesses" value="10000"/>
+ <stat name="mul_accesses" value="100000"/>
+ <stat name="cdb_alu_accesses" value="1000000"/>
+ <stat name="cdb_mul_accesses" value="0"/>
+ <stat name="cdb_fpu_accesses" value="0"/>
+ <!-- multiple cycle accesses should be counted multiple times,
+ otherwise, McPAT can use internal counter for different floating point instructions
+ to get final accesses. But that needs detailed info for floating point inst mix -->
+ <!-- currently the performance simulator should
+ make sure all the numbers are final numbers,
+ including the explicit read/write accesses,
+ and the implicite accesses such as replacements and etc.
+ Future versions of McPAT may be able to reason the implicite access
+ based on param and stats of last level cache
+ The same rule applies to all cache access stats too! -->
+ <!-- following is AF for max power computation.
+ Do not change them, unless you understand them-->
+ <stat name="IFU_duty_cycle" value="0.25"/>
+ <stat name="LSU_duty_cycle" value="0.25"/>
+ <stat name="MemManU_I_duty_cycle" value="1"/>
+ <stat name="MemManU_D_duty_cycle" value="0.25"/>
+ <stat name="ALU_duty_cycle" value="0.9"/>
+ <stat name="MUL_duty_cycle" value="0.5"/>
+ <stat name="FPU_duty_cycle" value="0.4"/>
+ <stat name="ALU_cdb_duty_cycle" value="0.9"/>
+ <stat name="MUL_cdb_duty_cycle" value="0.5"/>
+ <stat name="FPU_cdb_duty_cycle" value="0.4"/>
+ <component id="system.core0.predictor" name="PBT">
+ <!-- branch predictor; tournament predictor see Alpha implementation -->
+ <param name="local_predictor_size" value="10,3"/>
+ <param name="local_predictor_entries" value="1024"/>
+ <param name="global_predictor_entries" value="4096"/>
+ <param name="global_predictor_bits" value="2"/>
+ <param name="chooser_predictor_entries" value="4096"/>
+ <param name="chooser_predictor_bits" value="2"/>
+ <!-- These parameters can be combined like below in next version
+ <param name="load_predictor" value="10,3,1024"/>
+ <param name="global_predictor" value="4096,2"/>
+ <param name="predictor_chooser" value="4096,2"/>
+ -->
+ </component>
+ <component id="system.core0.itlb" name="itlb">
+ <param name="number_entries" value="64"/>
+ <stat name="total_accesses" value="800000"/>
+ <stat name="total_misses" value="4"/>
+ <stat name="conflicts" value="0"/>
+ <!-- there is no write requests to itlb although writes happen to itlb after miss,
+ which is actually a replacement -->
+ </component>
+ <component id="system.core0.icache" name="icache">
+ <!-- there is no write requests to itlb although writes happen to it after miss,
+ which is actually a replacement -->
+ <param name="icache_config" value="16384,32,4,1,1,3,8,0"/>
+ <!-- the parameters are capacity,block_width, associativity, bank, throughput w.r.t. core clock, latency w.r.t. core clock,output_width, cache policy -->
+ <!-- cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate -->
+ <param name="buffer_sizes" value="16, 16, 16,0"/>
+ <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size-->
+ <stat name="read_accesses" value="200000"/>
+ <stat name="read_misses" value="0"/>
+ <stat name="conflicts" value="0"/>
+ </component>
+ <component id="system.core0.dtlb" name="dtlb">
+ <param name="number_entries" value="64"/>
+ <stat name="total_accesses" value="200000"/>
+ <stat name="total_misses" value="4"/>
+ <stat name="conflicts" value="0"/>
+ </component>
+ <component id="system.core0.dcache" name="dcache">
+ <!-- all the buffer related are optional -->
+ <param name="dcache_config" value="8192,16,4,1,1,3,16,0"/>
+ <param name="buffer_sizes" value="16, 16, 16, 16"/>
+ <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size-->
+ <stat name="read_accesses" value="200000"/>
+ <stat name="write_accesses" value="27276"/>
+ <stat name="read_misses" value="1632"/>
+ <stat name="write_misses" value="183"/>
+ <stat name="conflicts" value="0"/>
+ </component>
+ <component id="system.core0.BTB" name="BTB">
+ <!-- all the buffer related are optional -->
+ <param name="BTB_config" value="8192,4,2,1, 1,3"/>
+ <!-- the parameters are capacity,block_width,associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,-->
+ </component>
+ </component>
+ <component id="system.L1Directory0" name="L1Directory0">
+ <param name="Directory_type" value="0"/>
+ <!--0 cam based shadowed tag. 1 directory cache -->
+ <param name="Dir_config" value="2048,1,0,1, 4, 4,8"/>
+ <!-- the parameters are capacity,block_width, associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,-->
+ <param name="buffer_sizes" value="8, 8, 8, 8"/>
+ <!-- all the buffer related are optional -->
+ <param name="clockrate" value="1200"/>
+ <param name="ports" value="1,1,1"/>
+ <!-- number of r, w, and rw search ports -->
+ <param name="device_type" value="0"/>
+ <!-- altough there are multiple access types,
+ Performance simulator needs to cast them into reads or writes
+ e.g. the invalidates can be considered as writes -->
+ <stat name="read_accesses" value="800000"/>
+ <stat name="write_accesses" value="27276"/>
+ <stat name="read_misses" value="1632"/>
+ <stat name="write_misses" value="183"/>
+ <stat name="conflicts" value="20"/>
+ <stat name="duty_cycle" value="0.45"/>
+ </component>
+ <component id="system.L2Directory0" name="L2Directory0">
+ <param name="Directory_type" value="1"/>
+ <!--0 cam based shadowed tag. 1 directory cache -->
+ <param name="Dir_config" value="1048576,16,16,1,2, 100"/>
+ <!-- the parameters are capacity,block_width, associativity,bank, throughput w.r.t. core clock, latency w.r.t. core clock,-->
+ <param name="buffer_sizes" value="8, 8, 8, 8"/>
+ <!-- all the buffer related are optional -->
+ <param name="clockrate" value="1200"/>
+ <param name="ports" value="1,1,1"/>
+ <!-- number of r, w, and rw search ports -->
+ <param name="device_type" value="0"/>
+ <!-- altough there are multiple access types,
+ Performance simulator needs to cast them into reads or writes
+ e.g. the invalidates can be considered as writes -->
+ <stat name="read_accesses" value="58824"/>
+ <stat name="write_accesses" value="27276"/>
+ <stat name="read_misses" value="1632"/>
+ <stat name="write_misses" value="183"/>
+ <stat name="conflicts" value="100"/>
+ <stat name="duty_cycle" value="0.45"/>
+ </component>
+ <component id="system.L20" name="L20">
+ <!-- all the buffer related are optional -->
+ <param name="L2_config" value="786432,64,16,1, 4,23, 64, 1"/>
+ <!-- consider 4-way bank interleaving for Niagara 1 -->
+ <!-- the parameters are capacity,block_width, associativity, bank, throughput w.r.t. core clock, latency w.r.t. core clock,output_width, cache policy -->
+ <param name="buffer_sizes" value="16, 16, 16, 16"/>
+ <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size-->
+ <param name="clockrate" value="1200"/>
+ <param name="ports" value="1,1,1"/>
+ <!-- number of r, w, and rw ports -->
+ <param name="device_type" value="0"/>
+ <stat name="read_accesses" value="200000"/>
+ <stat name="write_accesses" value="0"/>
+ <stat name="read_misses" value="0"/>
+ <stat name="write_misses" value="0"/>
+ <stat name="conflicts" value="0"/>
+ <stat name="duty_cycle" value="0.5"/>
+ </component>
+
+<!--**********************************************************************-->
+<component id="system.L30" name="L30">
+ <param name="L3_config" value="1048576,64,16,1, 2,100, 64,1"/>
+ <!-- the parameters are capacity,block_width, associativity, bank, throughput w.r.t. core clock, latency w.r.t. core clock,output_width, cache policy -->
+ <param name="clockrate" value="3500"/>
+ <param name="ports" value="1,1,1"/>
+ <!-- number of r, w, and rw ports -->
+ <param name="device_type" value="0"/>
+ <param name="buffer_sizes" value="16, 16, 16, 16"/>
+ <!-- cache controller buffer sizes: miss_buffer_size(MSHR),fill_buffer_size,prefetch_buffer_size,wb_buffer_size-->
+ <stat name="read_accesses" value="58824"/>
+ <stat name="write_accesses" value="27276"/>
+ <stat name="read_misses" value="1632"/>
+ <stat name="write_misses" value="183"/>
+ <stat name="conflicts" value="0"/>
+ <stat name="duty_cycle" value="0.35"/>
+ </component>
+<!--**********************************************************************-->
+ <component id="system.NoC0" name="noc0">
+ <param name="clockrate" value="1200"/>
+ <param name="type" value="1"/>
+ <!-- 1 NoC, O bus -->
+ <param name="horizontal_nodes" value="2"/>
+ <param name="vertical_nodes" value="1"/>
+ <param name="has_global_link" value="0"/>
+ <!-- 1 has global link, 0 does not have global link -->
+ <param name="link_throughput" value="1"/><!--w.r.t clock -->
+ <param name="link_latency" value="1"/><!--w.r.t clock -->
+ <!-- througput >= latency -->
+ <!-- Router architecture -->
+ <param name="input_ports" value="8"/>
+ <param name="output_ports" value="5"/>
+ <param name="virtual_channel_per_port" value="1"/>
+ <!-- input buffer; in classic routers only input ports need buffers -->
+ <param name="flit_bits" value="136"/>
+ <param name="input_buffer_entries_per_vc" value="2"/><!--VCs within the same ports share input buffers whose size is propotional to the number of VCs-->
+ <param name="chip_coverage" value="1"/>
+ <!-- When multiple NOC present, one NOC will cover part of the whole chip. chip_coverage <=1 -->
+ <stat name="total_accesses" value="360000"/>
+ <!-- This is the number of total accesses within the whole network not for each router -->
+ <stat name="duty_cycle" value="0.6"/>
+ </component>
+
+<!--**********************************************************************-->
+ <component id="system.mem" name="mem">
+ <!-- Main memory property -->
+ <param name="mem_tech_node" value="32"/>
+ <param name="device_clock" value="200"/><!--MHz, this is clock rate of the actual memory device, not the FSB -->
+ <param name="peak_transfer_rate" value="3200"/><!--MB/S-->
+ <param name="internal_prefetch_of_DRAM_chip" value="4"/>
+ <!-- 2 for DDR, 4 for DDR2, 8 for DDR3...-->
+ <!-- the device clock, peak_transfer_rate, and the internal prefetch decide the DIMM property -->
+ <!-- above numbers can be easily found from Wikipedia -->
+ <param name="capacity_per_channel" value="4096"/> <!-- MB -->
+ <!-- capacity_per_Dram_chip=capacity_per_channel/number_of_dimms/number_ranks/Dram_chips_per_rank
+ Current McPAT assumes single DIMMs are used.-->
+ <param name="number_ranks" value="2"/>
+ <param name="num_banks_of_DRAM_chip" value="8"/>
+ <param name="Block_width_of_DRAM_chip" value="64"/> <!-- B -->
+ <param name="output_width_of_DRAM_chip" value="8"/>
+ <!--number of Dram_chips_per_rank=" 72/output_width_of_DRAM_chip-->
+ <!--number of Dram_chips_per_rank=" 72/output_width_of_DRAM_chip-->
+ <param name="page_size_of_DRAM_chip" value="8"/> <!-- 8 or 16 -->
+ <param name="burstlength_of_DRAM_chip" value="8"/>
+ <stat name="memory_accesses" value="1052"/>
+ <stat name="memory_reads" value="1052"/>
+ <stat name="memory_writes" value="1052"/>
+ </component>
+ <component id="system.mc" name="mc">
+ <!-- Memeory controllers are for DDR(2,3...) DIMMs -->
+ <!-- current version of McPAT uses published values for base parameters of memory controller
+ improvments on MC will be added in later versions. -->
+ <param name="type" value="0"/> <!-- 1: low power; 0 high performance -->
+ <param name="mc_clock" value="200"/><!--DIMM IO bus clock rate MHz DDR2-400 for Niagara 1-->
+ <param name="peak_transfer_rate" value="3200"/><!--MB/S-->
+ <param name="block_size" value="64"/><!--B-->
+ <param name="number_mcs" value="4"/>
+ <!-- current McPAT only supports homogeneous memory controllers -->
+ <param name="memory_channels_per_mc" value="1"/>
+ <param name="number_ranks" value="2"/>
+ <param name="withPHY" value="0"/>
+ <!-- # of ranks of each channel-->
+ <param name="req_window_size_per_channel" value="32"/>
+ <param name="IO_buffer_size_per_channel" value="32"/>
+ <param name="databus_width" value="128"/>
+ <param name="addressbus_width" value="51"/>
+ <!-- McPAT will add the control bus width to the addressbus width automatically -->
+ <stat name="memory_accesses" value="33333"/>
+ <stat name="memory_reads" value="16667"/>
+ <stat name="memory_writes" value="16667"/>
+ <!-- McPAT does not track individual mc, instead, it takes the total accesses and calculate
+ the average power per MC or per channel. This is sufficent for most application.
+ Further trackdown can be easily added in later versions. -->
+ </component>
+<!--**********************************************************************-->
+ <component id="system.niu" name="niu">
+ <!-- On chip 10Gb Ethernet NIC, including XAUI Phy and MAC controller -->
+ <!-- For a minimum IP packet size of 84B at 10Gb/s, a new packet arrives every 67.2ns.
+ the low bound of clock rate of a 10Gb MAC is 150Mhz -->
+ <param name="type" value="0"/> <!-- 1: low power; 0 high performance -->
+ <param name="clockrate" value="350"/>
+ <param name="number_units" value="0"/> <!-- unlike PCIe and memory controllers, each Ethernet controller only have one port -->
+ <stat name="duty_cycle" value="1.0"/> <!-- achievable max load <= 1.0 -->
+ <stat name="total_load_perc" value="0.7"/> <!-- ratio of total achived load to total achivable bandwidth -->
+ <!-- McPAT does not track individual nic, instead, it takes the total accesses and calculate
+ the average power per nic or per channel. This is sufficent for most application. -->
+ </component>
+<!--**********************************************************************-->
+ <component id="system.pcie" name="pcie">
+ <!-- On chip PCIe controller, including Phy-->
+ <!-- For a minimum PCIe packet size of 84B at 8Gb/s per lane (PCIe 3.0), a new packet arrives every 84ns.
+ the low bound of clock rate of a PCIe per lane logic is 120Mhz -->
+ <param name="type" value="0"/> <!-- 1: low power; 0 high performance -->
+ <param name="withPHY" value="1"/>
+ <param name="clockrate" value="350"/>
+ <param name="number_units" value="0"/>
+ <param name="num_channels" value="8"/> <!-- 2 ,4 ,8 ,16 ,32 -->
+ <stat name="duty_cycle" value="1.0"/> <!-- achievable max load <= 1.0 -->
+ <stat name="total_load_perc" value="0.7"/> <!-- Percentage of total achived load to total achivable bandwidth -->
+ <!-- McPAT does not track individual pcie controllers, instead, it takes the total accesses and calculate
+ the average power per pcie controller or per channel. This is sufficent for most application. -->
+ </component>
+<!--**********************************************************************-->
+ <component id="system.flashc" name="flashc">
+ <param name="number_flashcs" value="0"/>
+ <param name="type" value="1"/> <!-- 1: low power; 0 high performance -->
+ <param name="withPHY" value="1"/>
+ <param name="peak_transfer_rate" value="200"/><!--Per controller sustainable reak rate MB/S -->
+ <stat name="duty_cycle" value="1.0"/> <!-- achievable max load <= 1.0 -->
+ <stat name="total_load_perc" value="0.7"/> <!-- Percentage of total achived load to total achivable bandwidth -->
+ <!-- McPAT does not track individual flash controller, instead, it takes the total accesses and calculate
+ the average power per fc or per channel. This is sufficent for most application -->
+ </component>
+<!--**********************************************************************-->
+
+ </component>
+</component>