diff options
Diffstat (limited to 'ext/mcpat/core.cc')
-rw-r--r-- | ext/mcpat/core.cc | 7640 |
1 files changed, 3757 insertions, 3883 deletions
diff --git a/ext/mcpat/core.cc b/ext/mcpat/core.cc index ba9106061..b25c23cac 100644 --- a/ext/mcpat/core.cc +++ b/ext/mcpat/core.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,491 +34,570 @@ #include <cassert> #include <cmath> #include <iostream> +#include <sstream> #include <string> -#include "XML_Parse.h" #include "basic_circuit.h" +#include "basic_components.h" +#include "common.h" #include "const.h" #include "core.h" #include "io.h" #include "parameter.h" -//#include "globalvar.h" - -InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IB (0), - BTB (0), - ID_inst (0), - ID_operand (0), - ID_misc (0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false, is_default = true; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; - //Assuming all L1 caches are virtually idxed physically tagged. - //cache - - size = (int)XML->sys.core[ithCore].icache.icache_config[0]; - line = (int)XML->sys.core[ithCore].icache.icache_config[1]; - assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; - banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - // interface_ip.obj_func_dyn_energy = 0; - // interface_ip.obj_func_dyn_power = 0; - // interface_ip.obj_func_leak_power = 0; - // interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty); - scktRatio = g_tp.sckt_co_eff; - chip_PR_overhead = g_tp.chip_layout_overhead; - macro_PR_overhead = g_tp.macro_layout_overhead; - icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area); - area.set_area(area.get_area()+ icache.caches->local_result.area); - //output_data_csv(icache.caches.local_result); - - - /* - *iCache controllers - *miss buffer Each MSHR contains enough state - *to handle one or more accesses of any type to a single memory line. - *Due to the generality of the MSHR mechanism, - *the amount of state involved is non-trivial: - *including the address, pointers to the cache entry and destination register, - *written data, and various other pieces of state. - */ - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area); - area.set_area(area.get_area()+ icache.missb->local_result.area); - //output_data_csv(icache.missb.local_result); - - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = icache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area); - area.set_area(area.get_area()+ icache.ifb->local_result.area); - //output_data_csv(icache.ifb.local_result); - - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area); - area.set_area(area.get_area()+ icache.prefetchb->local_result.area); - //output_data_csv(icache.prefetchb.local_result); - - //Instruction buffer - data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - interface_ip.pure_cam = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64? - XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions. - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - IB->area.set_area(IB->area.get_area()+ IB->local_result.area); - area.set_area(area.get_area()+ IB->local_result.area); - //output_data_csv(IB.IB.local_result); - - // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; - // inst_decoder.init_decoder(is_default, &interface_ip); - // inst_decoder.full_decoder_power(); - - if (coredynp.predictionW>0) - { - /* - * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged - * It is only a cache without all the buffers in the cache controller since it is more like a - * look up table than a cache with cache controller. When access miss, no load from other places - * such as main memory (not actively fill the misses), it is passively updated under two circumstances: - * 1) when BPT@ID stage finds out current is a taken branch while BTB missed - * 2) When BPT@ID stage predicts differently than BTB - * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) - * 4) when EXEU find out wrong target has been provided from BTB. - * - */ - size = XML->sys.core[ithCore].BTB.BTB_config[0]; - line = XML->sys.core[ithCore].BTB.BTB_config[1]; - assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; - banks = XML->sys.core[ithCore].BTB.BTB_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); -// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:size; - interface_ip.line_sz = debug?64:line; - interface_ip.assoc = debug?8:assoc; - interface_ip.nbanks = debug?1:banks; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty); - BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area); - area.set_area(area.get_area()+ BTB->local_result.area); - ///cout<<"area="<<area<<endl; - - BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp); - area.set_area(area.get_area()+ BPT->area.get_area()); - } - - ID_inst = new inst_decoder(is_default, &interface_ip, - coredynp.opcode_length, 1/*Decoder should not know how many by itself*/, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_operand = new inst_decoder(is_default, &interface_ip, - coredynp.arch_ireg_width, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_misc = new inst_decoder(is_default, &interface_ip, - 8/* Prefix field etc upto 14B*/, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer. - //So the dynamic power should be multiplied by a few times. - area.set_area(area.get_area()+ (ID_inst->area.get_area() - +ID_operand->area.get_area() - +ID_misc->area.get_area())*coredynp.decodeW); -} +int RegFU::RFWIN_ACCESS_MULTIPLIER = 16; + +// The five bits are: busy, Issued, Finished, speculative, valid +int SchedulerU::ROB_STATUS_BITS = 5; + +InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL), + BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int idx, tag, data, size, line, assoc, banks; + bool is_default = true; + + clockRate = core_params.clockRate; + name = "Instruction Fetch Unit"; + // Check if there is an icache child: + int i; + icache = NULL; + for( i = 0; i < xml_data->nChildNode("component"); i++ ) { + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR name = childXML->getAttribute("name"); + if (strcmp(name, "Instruction Cache") == 0 || + strcmp(name, "icache") == 0) { + icache = new CacheUnit(childXML, &interface_ip); + children.push_back(icache); + } + } + } + set_params_stats(); -BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - globalBPT(0), - localBPT(0), - L1_localBPT(0), - L2_localBPT(0), - chooser(0), - RAS(0), - exist(exist_) -{ + //Instruction buffer + data = core_params.instruction_length * core_params.peak_issueW; + line = int(ceil(data / BITS_PER_BYTE)); + size = core_params.num_hthreads * core_params.instruction_buffer_size * + line; + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; + } + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.instruction_buffer_assoc; + interface_ip.nbanks = core_params.instruction_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0; + interface_ip.tag_w = core_params.instruction_buffer_tag_width; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + core_params.number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + + IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + IB->area.set_area(IB->area.get_area() + IB->local_result.area); + area.set_area(area.get_area() + IB->local_result.area); + + if (core_params.predictionW > 0) { /* - * Branch Predictor, accessed during ID stage. - * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264, - * including global predictor, local two level predictor, and Chooser. - * The Branch predictor also includes a RAS (return address stack) for function calls - * Branch predictors are tagged by thread ID and modeled as 1-way associative $ - * However RAS return address stacks are duplicated for each thread. - * TODO:Data Width need to be computed more precisely * + * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged + * It is only a cache without all the buffers in the cache controller since it is more like a + * look up table than a cache with cache controller. When access miss, no load from other places + * such as main memory (not actively fill the misses), it is passively updated under two circumstances: + * 1) when BPT@ID stage finds out current is a taken branch while BTB missed + * 2) When BPT@ID stage predicts differently than BTB + * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) + * 4) when EXEU find out wrong target has been provided from BTB. + * */ - if (!exist) return; - int tag, data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.assoc = 1; - interface_ip.pure_cam = false; - if (coredynp.multithreaded) - { - - tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - } - else - { - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - - } - //Global predictor - data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + size = inst_fetch_params.btb_size; + line = inst_fetch_params.btb_block_size; + assoc = inst_fetch_params.btb_assoc; + banks = inst_fetch_params.btb_num_banks; + idx = int(ceil(log2(size / line / assoc))); + tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads))) + + EXTRA_TAG_BITS; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = assoc; + interface_ip.nbanks = banks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area); - area.set_area(area.get_area()+ globalBPT->local_result.area); - - //Local BPT (Level 1) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area); - area.set_area(area.get_area()+ L1_localBPT->local_result.area); - - //Local BPT (Level 2) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area); - area.set_area(area.get_area()+ L2_localBPT->local_result.area); - - //Chooser - data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty); - chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area); - area.set_area(area.get_area()+ chooser->local_result.area); - - //RAS return address stacks are Duplicated for each thread. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - data = int(ceil(coredynp.pc_width/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate; + interface_ip.latency = inst_fetch_params.btb_latency / clockRate; + + BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + BTB->local_result.area); + + BPT = new BranchPredictor(xml_data, &interface_ip, + core_params, core_stats); + area.set_area(area.get_area() + BPT->area.get_area()); + } + + ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder", + is_default, &interface_ip, + core_params.opcode_width, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + + ID_operand = new InstructionDecoder(xml_data, + "Instruction Operand Decoder", + is_default, &interface_ip, + core_params.arch_ireg_width, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + + ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder", + is_default, &interface_ip, + core_params.micro_opcode_length, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + area.set_area(area.get_area()+ (ID_inst->area.get_area() + + ID_operand->area.get_area() + + ID_misc->area.get_area()) + * core_params.decodeW); +} + +void +InstFetchU::set_params_stats() { + int num_children = xml_data->nChildNode("component"); + int i; + memset(&inst_fetch_params,0,sizeof(InstFetchParameters)); + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "BranchTargetBuffer") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("size", inst_fetch_params.btb_size); + ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size); + ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc); + ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks); + ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency); + ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput); + ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports); + + else { + warnUnrecognizedParam(node_name); + } + } + + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("read_accesses", + inst_fetch_stats.btb_read_accesses); + ASSIGN_FP_IF("write_accesses", + inst_fetch_stats.btb_write_accesses); + else { + warnUnrecognizedStat(node_name); + } + } + } + } + + // Parameter sanity check + if (inst_fetch_params.btb_size <= 0) { + errorNonPositiveParam("size"); + } + + if (inst_fetch_params.btb_block_size <= 0) { + errorNonPositiveParam("block_size"); + } + + if (inst_fetch_params.btb_assoc <= 0) { + errorNonPositiveParam("assoc"); + } + + if (inst_fetch_params.btb_num_banks <= 0) { + errorNonPositiveParam("num_banks"); + } +} + +BranchPredictor::BranchPredictor(XMLNode* _xml_data, + InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_) + : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL), + L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int size; + + clockRate = core_params.clockRate; + name = "Branch Predictor"; + + // Common interface parameters for the branch predictor structures + interface_ip.pure_cam = false; + + if (core_params.multithreaded) { + tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS); + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + } else { + interface_ip.specific_tag = 0; + interface_ip.tag_w = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + } + + // Parse params and stats from XML + set_params_stats(); + + // Common interface parameters for the branch predictor structures + interface_ip.assoc = branch_pred_params.assoc; + interface_ip.nbanks = branch_pred_params.nbanks; + + //Global predictor + data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE)); + size = data * branch_pred_params.global_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + globalBPT->local_result.area); + + //Local BPT (Level 1) + data = int(ceil(branch_pred_params.local_l1_predictor_size / + BITS_PER_BYTE)); + size = data * branch_pred_params.local_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + L1_localBPT = new ArrayST(xml_data, &interface_ip, + "Local Predictor, Level 1", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + L1_localBPT->area.set_area(L1_localBPT->area.get_area() + + L1_localBPT->local_result.area); + area.set_area(area.get_area()+ L1_localBPT->local_result.area); + + //Local BPT (Level 2) + data = int(ceil(branch_pred_params.local_l2_predictor_size / + BITS_PER_BYTE)); + size = data * branch_pred_params.local_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + L2_localBPT = new ArrayST(xml_data, &interface_ip, + "Local Predictor, Level 2", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + L2_localBPT->local_result.area); + + //Chooser + data = int(ceil(branch_pred_params.chooser_predictor_bits / + BITS_PER_BYTE)); + size = data * branch_pred_params.chooser_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + chooser->local_result.area); + + //RAS return address stacks are Duplicated for each thread. + data = int(ceil(core_params.pc_width / BITS_PER_BYTE)); + size = data * core_params.RAS_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate, + core_params.opt_local, core_params.core_ty); + RAS->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + RAS->local_result.area * + core_params.num_hthreads); + +} + +void +BranchPredictor::set_params_stats() { + int num_children = xml_data->nChildNode("component"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "BranchPredictor") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("assoc", branch_pred_params.assoc); + ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks); + ASSIGN_INT_IF("local_l1_predictor_size", + branch_pred_params.local_l1_predictor_size); + ASSIGN_INT_IF("local_l2_predictor_size", + branch_pred_params.local_l2_predictor_size); + ASSIGN_INT_IF("local_predictor_entries", + branch_pred_params.local_predictor_entries); + ASSIGN_INT_IF("global_predictor_entries", + branch_pred_params.global_predictor_entries); + ASSIGN_INT_IF("global_predictor_bits", + branch_pred_params.global_predictor_bits); + ASSIGN_INT_IF("chooser_predictor_entries", + branch_pred_params.chooser_predictor_entries); + ASSIGN_INT_IF("chooser_predictor_bits", + branch_pred_params.chooser_predictor_bits); + + else { + warnUnrecognizedParam(node_name); + } + } + // The core reads in the number of branches and the number of + // function calls and these values are passed through the + // core_stats variable, so we don't need to read them in here + } + } +} + +SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), int_inst_window(NULL), + fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL), + fp_instruction_selection(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int size; + int line; + bool is_default = true; + string tmp_name; + + clockRate = core_params.clockRate; + name = "Instruction Scheduler"; + if ((core_params.core_ty == Inorder && core_params.multithreaded)) { + //Instruction issue queue, in-order multi-issue or multithreaded + //processor also has this structure. Unified window for Inorder + //processors + //This tag width is the normal thread state bits based on + //Niagara Design + tag = int(log2(core_params.num_hthreads) * core_params.perThreadState); + data = core_params.instruction_length; + line = int(ceil(data / BITS_PER_BYTE)); + size = core_params.instruction_window_size * line; + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; + } + + //NOTE: x86 inst can be very lengthy, up to 15B. + //Source: Intel® 64 and IA-32 Architectures + //Software Developer’s Manual + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_issueW; + interface_ip.num_wr_ports = core_params.peak_issueW; interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); - RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); - area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); + interface_ip.num_search_ports = core_params.peak_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + int_inst_window = new ArrayST(xml_data, &interface_ip, + "InstFetchQueue", Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + int_inst_window->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + int_inst_window->local_result.area * + core_params.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; -} + /* + * selection logic + * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up + * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who + * at the issue stage. + */ -SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - int_inst_window(0), - fp_inst_window(0), - ROB(0), - instruction_selection(0), - exist(exist_) - { - if (!exist) return; - int tag, data; - bool is_default=true; - string tmp_name; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if ((coredynp.core_ty==Inorder && coredynp.multithreaded)) - { - //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors - tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design - data = XML->sys.core[ithCore].instruction_length; - //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures - //Software Developer’s Manual - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - //output_data_csv(iRS.RS.local_result); - Iw_height =int_inst_window->local_result.cache_ht; - - /* - * selection logic - * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up - * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who - * at the issue stage. - */ - - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads, - &interface_ip, Core_device, coredynp.core_ty); + int_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.instruction_window_size, + core_params.peak_issueW * + core_params.num_hthreads, + &interface_ip, + "Int Instruction Selection Logic", + core_stats.inst_window_wakeup_accesses, + clockRate, Core_device, core_params.core_ty); + + if (core_params.fp_instruction_window_size > 0) { + fp_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.fp_instruction_window_size, + core_params.fp_issueW * + core_params.num_hthreads, + &interface_ip, + "FP Instruction Selection Logic", + core_stats.fp_inst_window_wakeup_accesses, + clockRate, Core_device, + core_params.core_ty); } + } - if (coredynp.core_ty==OOO) - { + if (core_params.core_ty == OOO) { /* * CAM based instruction window * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored @@ -525,3611 +605,3405 @@ SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* in * It is written once and read twice(two operands) before an instruction can be issued. * X86 instruction can be very long up to 15B. add instruction length in XML */ - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - tmp_name = "InstIssueQueue"; - } - else - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+ - 2*coredynp.int_data_width)/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - - tmp_name = "IntReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 2*1.0/clockRate; - interface_ip.latency = 2*1.0/clockRate; + if (core_params.scheu_ty == PhysicalRegFile) { + tag = core_params.phy_ireg_width; + data = int((ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_ireg_width - + core_params.arch_ireg_width)) / + (double)NUM_SOURCE_OPERANDS) / + BITS_PER_BYTE)); + tmp_name = "Integer Instruction Window"; + } else { + tag = core_params.phy_ireg_width; + data = int(ceil(((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_ireg_width - + core_params.arch_ireg_width) + + 2 * core_params.int_data_width) / + (double)NUM_SOURCE_OPERANDS) / + BITS_PER_BYTE)); + tmp_name = "Integer Reservation Station"; + } + + size = data * core_params.instruction_window_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_issueW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.peak_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate; + interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate; + int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + int_inst_window->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + int_inst_window->local_result.area * + core_params.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; + + //FU inst window + if (core_params.scheu_ty == PhysicalRegFile) { + tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width; + data = int(ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_freg_width - + core_params.arch_freg_width)) / BITS_PER_BYTE)); + tmp_name = "FP Instruction Window"; + } else { + tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width; + data = int(ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_freg_width - + core_params.arch_freg_width) + + NUM_SOURCE_OPERANDS * core_params.fp_data_width) / + BITS_PER_BYTE)); + tmp_name = "FP Reservation Station"; + } + + size = data * core_params.fp_instruction_window_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - Iw_height =int_inst_window->local_result.cache_ht; - //FU inst window - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0)); - tmp_name = "FPIssueQueue"; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.fp_issueW; + interface_ip.num_wr_ports = core_params.fp_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.fp_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fp_inst_window = + new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + fp_inst_window->output_data.area *= core_params.num_fp_pipelines; + area.set_area(area.get_area() + fp_inst_window->local_result.area + *core_params.num_fp_pipelines); + fp_Iw_height = fp_inst_window->local_result.cache_ht; + + if (core_params.ROB_size > 0) { + /* + * if ROB_size = 0, then the target processor does not support hardware-based + * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which + * means branch must be resolved before instruction issued into instruction window, since + * there is no change to flush miss-predict branch path after instructions are issued in this situation. + * + * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. + * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. + * However, this approach is abandoned due to its high power and poor scalablility. + * McPAT uses current implementation of ROB as circular buffer. + * ROB is written once when instruction is issued and read once when the instruction is committed. * + */ + int robExtra = int(ceil(ROB_STATUS_BITS + + log2(core_params.num_hthreads))); + + if (core_params.scheu_ty == PhysicalRegFile) { + //PC is to id the instruction for recover exception. + //inst is used to map the renamed dest. registers. so that + //commit stage can know which reg/RRAT to update + data = int(ceil((robExtra + core_params.pc_width + + core_params.phy_ireg_width) / BITS_PER_BYTE)); + } else { + //in RS based OOO, ROB also contains value of destination reg + data = int(ceil((robExtra + core_params.pc_width + + core_params.phy_ireg_width + + core_params.fp_data_width) / BITS_PER_BYTE)); + } + + interface_ip.cache_sz = data * core_params.ROB_size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.ROB_assoc; + interface_ip.nbanks = core_params.ROB_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.ROB_tag_width > 0; + interface_ip.tag_w = core_params.ROB_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_commitW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ROB->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + ROB->local_result.area * + core_params.num_pipelines); + ROB_height = ROB->local_result.cache_ht; + } + + int_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.instruction_window_size, + core_params.peak_issueW, &interface_ip, + "Int Instruction Selection Logic", + core_stats.inst_window_wakeup_accesses, + clockRate, Core_device, core_params.core_ty); + + if (core_params.fp_instruction_window_size > 0) { + fp_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.fp_instruction_window_size, + core_params.fp_issueW, &interface_ip, + "FP Instruction Selection Logic", + core_stats.fp_inst_window_wakeup_accesses, + clockRate, Core_device, + core_params.core_ty); } - else - { - tag = 2*coredynp.phy_ireg_width; - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+ - 2*coredynp.fp_data_width)/8.0)); - tmp_name = "FPReservationStation"; + + } +} + +LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int line; + int size; + int ldst_opcode = core_params.opcode_width; + + clockRate = core_params.clockRate; + name = "Load/Store Unit"; + + // Check if there is a dcache child: + int i; + dcache = NULL; + for( i = 0; i < xml_data->nChildNode("component"); i++ ) { + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR name = childXML->getAttribute("name"); + if (strcmp(name, "Data Cache") == 0 || + strcmp(name, "dcache") == 0) { + dcache = new CacheUnit(childXML, &interface_ip); + children.push_back(dcache); + } } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + } + + /* + * LSU--in-order processors do not have separate load queue: unified lsq + * partitioned among threads + * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ + */ + tag = ldst_opcode + virtual_address_width + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + line = int(ceil(data_path_width / BITS_PER_BYTE)); + size = core_params.store_buffer_size * line * core_params.num_hthreads; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.store_buffer_assoc; + interface_ip.nbanks = core_params.store_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + LSQ->local_result.area); + area.set_area(area.get_area()*cdb_overhead); + lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead); + + if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) { + tag = ldst_opcode + virtual_address_width + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + line = int(ceil(data_path_width / BITS_PER_BYTE)); + size = core_params.load_buffer_size * line * core_params.num_hthreads; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.load_buffer_assoc; + interface_ip.nbanks = core_params.load_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_issueW; - interface_ip.num_wr_ports = coredynp.fp_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - fp_Iw_height =fp_inst_window->local_result.cache_ht; - - if (XML->sys.core[ithCore].ROB_size >0) - { - /* - * if ROB_size = 0, then the target processor does not support hardware-based - * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which - * means branch must be resolved before instruction issued into instruction window, since - * there is no change to flush miss-predict branch path after instructions are issued in this situation. - * - * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. - * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. - * However, this approach is abandoned due to its high power and poor scalablility. - * McPAT uses current implementation of ROB as circular buffer. - * ROB is written once when instruction is issued and read once when the instruction is committed. * - */ - int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); - //5 bits are: busy, Issued, Finished, speculative, valid - if(coredynp.scheu_ty==PhysicalRegFile) - { - //PC is to id the instruction for recover exception. - //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0)); - data = int(ceil((robExtra+coredynp.pc_width + - coredynp.phy_ireg_width)/8.0)); - } - else - { - //in RS based OOO, ROB also contains value of destination reg -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - data = int(ceil((robExtra + coredynp.pc_width + - coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - } - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_commitW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - ROB_height =ROB->local_result.cache_ht; - } - - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device, + clockRate, core_params.opt_local, + core_params.core_ty); + LoadQ->area.set_area(LoadQ->area.get_area() + + LoadQ->local_result.area); + area.set_area(area.get_area()*cdb_overhead); + lsq_height = (LSQ->local_result.cache_ht + + LoadQ->local_result.cache_ht) * sqrt(cdb_overhead); } + } -LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - LSQ(0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false; - int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; - - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - //Dcache - size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; - line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; - assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; - banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area); - area.set_area(area.get_area()+ dcache.caches->local_result.area); - //output_data_csv(dcache.caches.local_result); - - //dCache controllers - //miss buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area); - area.set_area(area.get_area()+ dcache.missb->local_result.area); - //output_data_csv(dcache.missb.local_result); - - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area); - area.set_area(area.get_area()+ dcache.ifb->local_result.area); - //output_data_csv(dcache.ifb.local_result); - - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area); - area.set_area(area.get_area()+ dcache.prefetchb->local_result.area); - //output_data_csv(dcache.prefetchb.local_result); - - //WBB - - if (cache_p==Write_back) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area); - area.set_area(area.get_area()+ dcache.wbb->local_result.area); - //output_data_csv(dcache.wbb.local_result); - } - - /* - * LSU--in-order processors do not have separate load queue: unified lsq - * partitioned among threads - * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - */ - tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS; - data = XML->sys.machine_bits; - interface_ip.is_cache = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty); - LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(LSQ.LSQ.local_result); - lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(LoadQ.LoadQ.local_result); - lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - } +MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int line; + + clockRate = core_params.clockRate; + name = "Memory Management Unit"; + + set_params_stats(); + + // These are shared between ITLB and DTLB + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + //Itlb TLBs are partioned among threads according to Nigara and Nehalem + tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + data = physical_address_width - int(floor(log2(virtual_memory_page_size))); + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = mem_man_params.itlb_number_entries * line; + interface_ip.line_sz = line; + interface_ip.assoc = mem_man_params.itlb_assoc; + interface_ip.nbanks = mem_man_params.itlb_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.number_instruction_fetch_ports; + interface_ip.throughput = mem_man_params.itlb_throughput / clockRate; + interface_ip.latency = mem_man_params.itlb_latency / clockRate; + itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + itlb->local_result.area); + + //dtlb + tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + data = physical_address_width - int(floor(log2(virtual_memory_page_size))); + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line; + interface_ip.line_sz = line; + interface_ip.assoc = mem_man_params.dtlb_assoc; + interface_ip.nbanks = mem_man_params.dtlb_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate; + interface_ip.latency = mem_man_params.dtlb_latency / clockRate; + dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + dtlb->local_result.area); } -MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - itlb(0), - dtlb(0), - exist(exist_) -{ - if (!exist) return; - int tag, data; - bool debug= false; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.specific_tag = 1; - //Itlb TLBs are partioned among threads according to Nigara and Nehalem - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); - itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area); - area.set_area(area.get_area()+ itlb->local_result.area); - //output_data_csv(itlb.tlb.local_result); - - //dtlb - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); - dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area); - area.set_area(area.get_area()+ dtlb->local_result.area); - //output_data_csv(dtlb.tlb.local_result); +void +MemManU::set_params_stats() { + memset(&mem_man_params, 0, sizeof(MemoryManagementParams)); + memset(&mem_man_stats, 0, sizeof(MemoryManagementStats)); + int num_children = xml_data->nChildNode("component"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "InstructionTLB") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("number_entries", + mem_man_params.itlb_number_entries); + ASSIGN_FP_IF("latency", mem_man_params.itlb_latency); + ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput); + ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc); + ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks); + + else { + warnUnrecognizedParam(node_name); + } + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("total_accesses", + mem_man_stats.itlb_total_accesses); + ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses); + ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts); + else { + warnUnrecognizedStat(node_name); + } + } + } STRCMP(type, "DataTLB") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("number_entries", + mem_man_params.dtlb_number_entries); + ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency); + ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput); + ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc); + ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks); + + else { + warnUnrecognizedParam(node_name); + } + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("read_accesses", + mem_man_stats.dtlb_read_accesses); + ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses); + ASSIGN_FP_IF("write_accesses", + mem_man_stats.dtlb_write_accesses); + ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses); + ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts); + + else { + warnUnrecognizedStat(node_name); + } + } + } + } } -RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IRF (0), - FRF (0), - RFWIN (0), - exist(exist_) - { - /* - * processors have separate architectural register files for each thread. - * therefore, the bypass buses need to travel across all the register files. - */ - if (!exist) return; - int data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - //**********************************IRF*************************************** - data = coredynp.int_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); - area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(IRF.RF.local_result); - - //**********************************FRF*************************************** - data = coredynp.fp_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; +RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + /* + * processors have separate architectural register files for each thread. + * therefore, the bypass buses need to travel across all the register files. + */ + if (!exist) return; + int data; + int line; + + clockRate = core_params.clockRate; + name = "Register File Unit"; + + //**********************************IRF************************************ + data = core_params.int_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.num_IRF_entry * line; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.phy_Regs_IRF_assoc; + interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0; + interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports; + interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + IRF->output_data.area *= core_params.num_hthreads * + core_params.num_pipelines * cdb_overhead; + area.set_area(area.get_area() + IRF->local_result.area * + core_params.num_hthreads * core_params.num_pipelines * + cdb_overhead); + + //**********************************FRF************************************ + data = core_params.fp_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.num_FRF_entry * line; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.phy_Regs_FRF_assoc; + interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0; + interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports; + interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + FRF->output_data.area *= core_params.num_hthreads * + core_params.num_fp_pipelines * cdb_overhead; + area.set_area(area.get_area() + FRF->local_result.area * + core_params.num_hthreads * core_params.num_fp_pipelines * + cdb_overhead); + int_regfile_height = IRF->local_result.cache_ht * + core_params.num_hthreads * sqrt(cdb_overhead); + fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads * + sqrt(cdb_overhead); + //since a EXU is associated with each pipeline, the cdb should not have + //longer length. + + if (core_params.regWindowing) { + //*********************************REG_WIN***************************** + //ECC, and usually 2 regs are transfered together during window + //shifting.Niagara Mega cell + data = core_params.int_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.register_window_size * + IRF->l_ip.cache_sz * core_params.num_hthreads; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.register_window_assoc; + interface_ip.nbanks = core_params.register_window_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.register_window_tag_width > 0; + interface_ip.tag_w = core_params.register_window_tag_width; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.register_window_rw_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); - area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(FRF.RF.local_result); - int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); - fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); - //since a EXU is associated with each pipeline, the cdb should not have longer length. - if (coredynp.regWindowing) - { - //*********************************REG_WIN************************************ - data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 4.0/clockRate; - interface_ip.latency = 4.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty); - RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - //output_data_csv(RFWIN.RF.local_result); - } + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = + core_params.register_window_throughput / clockRate; + interface_ip.latency = + core_params.register_window_latency / clockRate; + RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device, + clockRate, core_params.opt_local, + core_params.core_ty); + RFWIN->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + RFWIN->local_result.area * + core_params.num_pipelines); + } +} +EXECU::EXECU(XMLNode* _xml_data, + InputParameter* interface_ip_, double lsq_height_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL), + exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL), + int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL), + fpTagBypass(NULL), interface_ip(*interface_ip_), + lsq_height(lsq_height_), core_params(_core_params), + core_stats(_core_stats), exist(exist_) { + if (!exist) return; + double fu_height = 0.0; + clockRate = core_params.clockRate; + name = "Execution Unit"; + rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats); + if (core_params.core_ty == OOO || + (core_params.core_ty == Inorder && core_params.multithreaded)) { + scheu = new SchedulerU(xml_data, &interface_ip, core_params, + core_stats); + area.set_area(area.get_area() + scheu->area.get_area() ); + } + exeu = new FunctionalUnit(xml_data, &interface_ip, core_params, + core_stats, ALU); + area.set_area(area.get_area() + exeu->area.get_area() + + rfu->area.get_area()); + fu_height = exeu->FU_height; + if (core_params.num_fpus > 0) { + fp_u = new FunctionalUnit(xml_data, &interface_ip, + core_params, core_stats, FPU); + area.set_area(area.get_area() + fp_u->area.get_area()); + } + if (core_params.num_muls > 0) { + mul = new FunctionalUnit(xml_data, &interface_ip, + core_params, core_stats, MUL); + area.set_area(area.get_area() + mul->area.get_area()); + fu_height += mul->FU_height; + } + /* + * broadcast logic, including int-broadcast; int_tag-broadcast; + * fp-broadcast; fp_tag-broadcast + * integer by pass has two paths and fp has 3 paths. + * on the same bus there are multiple tri-state drivers and muxes that go + * to different components on the same bus + */ + interface_ip.wt = core_params.execu_broadcast_wt; + interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type; + interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type; + interface_ip.throughput = core_params.broadcast_numerator / clockRate; + interface_ip.latency = core_params.broadcast_numerator / clockRate; + double scheu_Iw_height = 0.0; + double scheu_ROB_height = 0.0; + double scheu_fp_Iw_height = 0.0; + if (scheu) { + scheu_Iw_height = scheu->Iw_height; + scheu_ROB_height = scheu->ROB_height; + scheu_fp_Iw_height = scheu->fp_Iw_height; + } - } - -EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - lsq_height(lsq_height_), - coredynp(dyn_p_), - rfu(0), - scheu(0), - fp_u(0), - exeu(0), - mul(0), - int_bypass(0), - intTagBypass(0), - int_mul_bypass(0), - intTag_mul_Bypass(0), - fp_bypass(0), - fpTagBypass(0), - exist(exist_) -{ - if (!exist) return; - double fu_height = 0.0; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - rfu = new RegFU(XML, ithCore, &interface_ip,coredynp); - scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp); - exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU); - area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() ); - fu_height = exeu->FU_height; - if (coredynp.num_fpus >0) - { - fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU); - area.set_area(area.get_area()+ fp_u->area.get_area()); - } - if (coredynp.num_muls >0) - { - mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL); - area.set_area(area.get_area()+ mul->area.get_area()); - fu_height += mul->FU_height; - } - /* - * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast - * integer by pass has two paths and fp has 3 paths. - * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus - */ - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used - interface_ip.wire_os_mat_type = 2; - interface_ip.throughput = 10.0/clockRate; //Do not care - interface_ip.latency = 10.0/clockRate; - } - - if (coredynp.core_ty==Inorder) - { - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - {//OOO - if (coredynp.scheu_ty==PhysicalRegFile) - { - /* For physical register based OOO, - * data broadcast interconnects cover across functional units, lsq, inst windows and register files, - * while tag broadcast interconnects also cover across ROB - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - { - /* - * In RS based processor both data and tag are broadcast together, - * covering functional units, lsq, nst windows, register files, and ROBs - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - - - } - area.set_area(area.get_area()+ bypass.area.get_area()); -} + // Common bypass logic parameters + double base_w = core_params.execu_bypass_base_width; + double base_h = core_params.execu_bypass_base_height; + int level = core_params.execu_bypass_start_wiring_level; + double route_over_perc = core_params.execu_bypass_route_over_perc; + Wire_type wire_type = core_params.execu_bypass_wire_type; + int data_w; + double len; + + if (core_params.core_ty == Inorder) { + data_w = int(ceil(data_path_width / 32.0)*32); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, false, + route_over_perc, core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, false, + route_over_perc, core_params.opt_local, + core_params.core_ty, wire_type); + + if (core_params.num_muls > 0) { + data_w = int(ceil(data_path_width / 32.0)*32*1.5); + len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu_Iw_height; + intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } + + if (core_params.num_fpus > 0) { + data_w = int(ceil(data_path_width / 32.0)*32*1.5); + len = rfu->fp_regfile_height + fp_u->FU_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_Iw_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } else {//OOO + if (core_params.scheu_ty == PhysicalRegFile) { + /* For physical register based OOO, + * data broadcast interconnects cover across functional units, lsq, + * inst windows and register files, + * while tag broadcast interconnects also cover across ROB + */ + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + if (core_params.num_muls > 0) { + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + intTag_mul_Bypass = new Interconnect(xml_data, + "Mul Bypass Tag", + Core_device, base_w, + base_h, data_w, len, + &interface_ip, level, + clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } -RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - iFRAT(0), - fFRAT(0), - iRRAT(0), - fRRAT(0), - ifreeL(0), - ffreeL(0), - idcl(0), - fdcl(0), - RAHT(0), - exist(exist_) - { - /* - * Although renaming logic maybe be used in in-order processors, - * McPAT assumes no renaming logic is used since the performance gain is very limited and - * the only major inorder processor with renaming logic is Itainium - * that is a VLIW processor and different from current McPAT's model. - * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT; - * i,f prefix mean int and fp - * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires. - * FRAT will be read twice and written once per instruction; - * RRAT will be write once per instruction when committing and reads out all when context switch - * checkpointing is implicit - * Renaming logic is duplicated for each different hardware threads - * - * No Dual-RAT is needed in RS-based OOO processors, - * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, - * to make sure all the renamings associated with the ROB to be released are updated at the same time. - * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, - * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, - * - * Both RAM and CAM have same DCL - */ - if (!exist) return; - int tag, data, out_w; -// interface_ip.wire_is_mat_type = 0; -// interface_ip.wire_os_mat_type = 0; -// interface_ip.wt = Global_30; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if (coredynp.core_ty==OOO) - { - //integer pipeline - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions - data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); -// data = int(ceil(coredynp.phy_ireg_width/8.0)); - out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - -// //RAHT According to Intel, combine GC with FRAT is very costly. -// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry); -// out_w = data; -// interface_ip.is_cache = false; -// interface_ip.pure_cam = false; -// interface_ip.pure_ram = true; -// interface_ip.line_sz = data; -// interface_ip.cache_sz = data*coredynp.globalCheckpoint; -// interface_ip.assoc = 1; -// interface_ip.nbanks = 1; -// interface_ip.out_w = out_w*8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = 1.0/clockRate; -// interface_ip.latency = 1.0/clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = 1;//the extra one port is for GCs -// interface_ip.num_rd_ports = 2*coredynp.decodeW; -// interface_ip.num_wr_ports = coredynp.decodeW; -// interface_ip.num_se_rd_ports = 0; -// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT"); -// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); -// area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT floating point - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + if (core_params.num_fpus > 0) { + data_w = int(ceil(core_params.fp_data_width)); + len = rfu->fp_regfile_height + fp_u->FU_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_freg_width; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } else { + /* + * In RS based processor both data and tag are broadcast together, + * covering functional units, lsq, nst windows, register files, and ROBs + */ + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + if (core_params.num_muls > 0) { + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + intTag_mul_Bypass = new Interconnect(xml_data, + "Mul Bypass Tag", + Core_device, base_w, + base_h, data_w, len, + &interface_ip, level, + clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width; - data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT for FP - tag = coredynp.arch_freg_width; - data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + if (core_params.num_fpus > 0) { + data_w = int(ceil(core_params.fp_data_width)); + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_freg_width; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } + } + if (int_bypass) { + children.push_back(int_bypass); + } + if (intTagBypass) { + children.push_back(intTagBypass); + } + if (int_mul_bypass) { + children.push_back(int_mul_bypass); + } + if (intTag_mul_Bypass) { + children.push_back(intTag_mul_Bypass); + } + if (fp_bypass) { + children.push_back(fp_bypass); + } + if (fpTagBypass) { + children.push_back(fpTagBypass); + } - } + area.set_area(area.get_area() + int_bypass->area.get_area() + + intTagBypass->area.get_area()); + if (core_params.num_muls > 0) { + area.set_area(area.get_area() + int_mul_bypass->area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + if (core_params.num_fpus > 0) { + area.set_area(area.get_area() + fp_bypass->area.get_area() + + fpTagBypass->area.get_area()); + } +} - //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; +RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL), + fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL), + RAHT(NULL), interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int out_w; + int size; + + // Assumption: + // We make an implicit design assumption based on the specific structure + // that is being modeled. + // 1. RAM-based RATs are direct mapped. However, if the associated + // scheduler is a reservation station style, the RATs are fully + // associative. + // 2. Non-CAM based RATs and free lists do not have tags. + // 3. Free lists are direct mapped. + + const int RAM_BASED_RAT_ASSOC = 1; + const int RS_RAT_ASSOC = 0; + const int NON_CAM_BASED_TAG_WIDTH = 0; + const int FREELIST_ASSOC = 1; + + clockRate = core_params.clockRate; + name = "Rename Unit"; + if (core_params.core_ty == OOO) { + //integer pipeline + if (core_params.scheu_ty == PhysicalRegFile) { + if (core_params.rm_ty == RAMbased) { + //FRAT with global checkpointing (GCs) please see paper tech + //report for detailed explaintions + + data = int(ceil(core_params.phy_ireg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + + size = data * core_params.archi_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iRRAT->area.get_area()); - - //RRAT for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT floating point + data = int(ceil(core_params.phy_freg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fRRAT->area.get_area()); - - //Freelist of renaming unit always RAM based - //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist - // 2)When instruction commits the Phyregisters/ROB needed to be recycled. - //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((core_params.rm_ty == CAMbased)) { + //IRAT + tag = core_params.arch_ireg_width; + //the address of CAM needed to be sent out + data = int(ceil((core_params.arch_ireg_width + 1 * + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width; - //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); - - //freelist for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ffreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT for FP + tag = core_params.arch_freg_width; + //the address of CAM needed to be sent out + data = int(ceil((core_params.arch_freg_width + 1 * + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ffreeL->area.get_area()); - - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); - - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased){ - /* - * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, - * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time. - * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than - * CAM based RAT so that it is more scalable as number of ROB/physical regs increases. - */ - tag = coredynp.phy_ireg_width; - data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= coredynp.commitW;//TODO - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->local_result.adjust_area(); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FP - tag = coredynp.phy_freg_width; - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->local_result.adjust_area(); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width; - data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO - interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT - tag = coredynp.arch_freg_width; - data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + } - } - //No RRAT for RS based OOO - //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + //RRAT is always RAM based, does not have GCs, and is used only for + //record latest non-speculative mapping + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_IRF_size * + NUM_SOURCE_OPERANDS; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.retire_rat_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; + interface_ip.num_rd_ports = core_params.commitW; + interface_ip.num_wr_ports = core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + iRRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iRRAT->area.get_area()); + + //RRAT for FP + data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size * + NUM_SOURCE_OPERANDS; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.retire_rat_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + fRRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fRRAT->area.get_area()); + + //Freelist of renaming unit always RAM based + //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist + // 2)When instruction commits the Phyregisters/ROB needed to be recycled. + //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.num_ifreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = + core_params.decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ifreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ifreeL->area.get_area()); + + //freelist for FP + data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.num_ffreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = + core_params.fp_decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ffreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ffreeL->area.get_area()); + + } else if (core_params.scheu_ty == ReservationStation) { + if (core_params.rm_ty == RAMbased) { + tag = core_params.phy_ireg_width; + data = int(ceil(core_params.phy_ireg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RS_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.commitW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->local_result.adjust_area(); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FP + tag = core_params.phy_freg_width; + data = int(ceil(core_params.phy_freg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RS_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.fp_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->local_result.adjust_area(); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((core_params.rm_ty == CAMbased)) { + //FRAT + //the address of CAM needed to be sent out + tag = core_params.arch_ireg_width; + data = int(ceil (core_params.arch_ireg_width + + 1 * core_params.globalCheckpoint / + BITS_PER_BYTE)); + out_w = int(ceil (core_params.arch_ireg_width / + BITS_PER_BYTE)); + size = data * core_params.phy_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT + tag = core_params.arch_freg_width; + //the address of CAM needed to be sent out + data = int(ceil(core_params.arch_freg_width + + 1 * core_params.globalCheckpoint / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); + } + //No RRAT for RS based OOO + //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.num_ifreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = + core_params.decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ifreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ifreeL->area.get_area()); } -} - if (coredynp.core_ty==Inorder&& coredynp.issueW>1) - { - /* Dependency check logic will only present when decode(issue) width>1. - * Multiple issue in order processor can do without renaming, but dcl is a must. - */ - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); } + idcl = + new dep_resource_conflict_check(xml_data, + "Instruction Dependency Check?", + &interface_ip, core_params, + core_params.phy_ireg_width, + clockRate); + fdcl = + new dep_resource_conflict_check(xml_data, + "FP Dependency Check?", &interface_ip, + core_params, + core_params.phy_freg_width, clockRate); } -Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - ifu (0), - lsu (0), - mmu (0), - exu (0), - rnu (0), - corepipe (0), - undiffCore (0), - l2cache (0) -{ - /* - * initialize, compute and optimize individual components. - */ - - double pipeline_area_per_unit; - if (XML->sys.Private_L2) - { - l2cache = new SharedCache(XML,ithCore, &interface_ip); - - } -// interface_ip.wire_is_mat_type = 2; -// interface_ip.wire_os_mat_type = 2; -// interface_ip.wt =Global_30; - set_core_param(); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp); - lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp); - mmu = new MemManU (XML, ithCore, &interface_ip,coredynp); - exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp); - undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp); - if (coredynp.core_ty==OOO) - { - rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp); - } - corepipe = new Pipeline(&interface_ip,coredynp); - - if (coredynp.core_ty==OOO) - { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0; - if (rnu->exist) - { - rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); - } - } - else { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0; - } - - //area.set_area(area.get_area()+ corepipe->area.get_area()); - if (ifu->exist) - { - ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + ifu->area.get_area()); - } - if (lsu->exist) - { - lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + lsu->area.get_area()); - } - if (exu->exist) - { - exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+exu->area.get_area()); - } - if (mmu->exist) - { - mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+mmu->area.get_area()); - } - - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { - - area.set_area(area.get_area() + rnu->area.get_area()); - } - } - - if (undiffCore->exist) - { - area.set_area(area.get_area() + undiffCore->area.get_area()); - } - - if (XML->sys.Private_L2) - { - area.set_area(area.get_area() + l2cache->area.get_area()); - - } -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = corepipe.tot_stage_vector; -// clockNetwork.optimize_wire(); -} +Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_) + : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL), + exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL), + ithCore(_ithCore), interface_ip(*interface_ip_) { + + ostringstream os; + os << ithCore; + name = "Core " + os.str(); + + int i = 0; + XMLNode* childXML; + for (i = 0; i < xml_data->nChildNode("component"); i++) { + childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR comp_name = childXML->getAttribute("id"); + if (!comp_name) + continue; + + STRCMP(comp_name, "system.L20") { + l2cache = new CacheUnit(childXML, &interface_ip); + children.push_back(l2cache); + } + } + } + set_core_param(); + clockRate = core_params.clockRate; + + ifu = new InstFetchU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(ifu); + lsu = new LoadStoreU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(lsu); + mmu = new MemManU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(mmu); + exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height, + core_params, core_stats); + children.push_back(exu); + undiffCore = new UndiffCore(xml_data, &interface_ip, core_params); + children.push_back(undiffCore); + if (core_params.core_ty == OOO) { + rnu = new RENAMINGU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(rnu); + } + corepipe = new Pipeline(xml_data, &interface_ip, core_params); + children.push_back(corepipe); + + double pipeline_area_per_unit; + if (core_params.core_ty == OOO) { + pipeline_area_per_unit = (corepipe->area.get_area() * + core_params.num_pipelines) / 5.0; + if (rnu->exist) { + rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); + } + } else { + pipeline_area_per_unit = (corepipe->area.get_area() * + core_params.num_pipelines) / 4.0; + } -void BranchPredictor::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double r_access; - double w_access; - if (is_tdp) - { - r_access = coredynp.predictionW*coredynp.BR_duty_cycle; - w_access = 0*coredynp.BR_duty_cycle; - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->tdp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->tdp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->tdp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->tdp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = r_access; - RAS->stats_t.writeAc.access = w_access; - RAS->tdp_stats = RAS->stats_t; - } - else - { - //The resolution of BPT accesses is coarse, but this is - //because most simulators cannot track finer grained details - r_access = XML->sys.core[ithCore].branch_instructions; - w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0 - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->rtp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->rtp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->rtp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->rtp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; - RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; - RAS->rtp_stats = RAS->stats_t; - } - - globalBPT->power_t.reset(); - L1_localBPT->power_t.reset(); - L2_localBPT->power_t.reset(); - chooser->power_t.reset(); - RAS->power_t.reset(); - - globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access + - globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic; - L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access + - L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic; - - L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access + - L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic; - - chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access + - chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic; - RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access + - RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - - power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power; - } - else - { - globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power; + // Move all of this to computeArea + //area.set_area(area.get_area()+ corepipe->area.get_area()); + if (ifu->exist) { + ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + ifu->area.get_area()); + } + if (lsu->exist) { + lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + lsu->area.get_area()); + } + if (exu->exist) { + exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + exu->area.get_area()); + } + if (mmu->exist) { + mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + mmu->area.get_area()); } -} -void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - if (is_tdp) - { - cout << indent_str<< "Global Predictor:" << endl; - cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str << "Local Predictor:" << endl; - cout << indent_str << "L1_Local Predictor:" << endl; - cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str << "L2_Local Predictor:" << endl; - cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - - cout << indent_str << "Chooser:" << endl; - cout << indent_str_next << "Area = " << chooser->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str << "RAS:" << endl; - cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - else - { -// cout << indent_str_next << "Global Predictor Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Global Predictor Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Global Predictor Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Local Predictor Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Local Predictor Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Local Predictor Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Chooser Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Chooser Subthreshold Leakage = " << chooser->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "RAS Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "RAS Subthreshold Leakage = " << RAS->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" << endl; - } + if (core_params.core_ty == OOO) { + if (rnu->exist) { -} - -void InstFetchU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle; - icache.caches->stats_t.readAc.miss = 0; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->tdp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports; - icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports; - icache.missb->tdp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports; - icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; - icache.ifb->tdp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports; - icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; - icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; - IB->tdp_stats = IB->stats_t; - - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses; - BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses; + area.set_area(area.get_area() + rnu->area.get_area()); } + } - ID_inst->stats_t.readAc.access = coredynp.decodeW; - ID_operand->stats_t.readAc.access = coredynp.decodeW; - ID_misc->stats_t.readAc.access = coredynp.decodeW; - ID_inst->tdp_stats = ID_inst->stats_t; - ID_operand->tdp_stats = ID_operand->stats_t; - ID_misc->tdp_stats = ID_misc->stats_t; - + if (undiffCore->exist) { + area.set_area(area.get_area() + undiffCore->area.get_area()); + } + if (l2cache) { + area.set_area(area.get_area() + l2cache->area.get_area()); } - else - { - //init stats for Runtime Dynamic (RTP) - icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses; - icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->rtp_stats = icache.caches->stats_t; +} - icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->rtp_stats = icache.missb->stats_t; - icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->rtp_stats = icache.ifb->stats_t; +void BranchPredictor::computeEnergy() { + if (!exist) return; + + // ASSUMPTION: All instructions access the branch predictors at Fetch and + // only branch instrucions update the predictors regardless + // of the correctness of the prediction. + double tdp_read_accesses = + core_params.predictionW * core_stats.BR_duty_cycle; + globalBPT->tdp_stats.reset(); + globalBPT->tdp_stats.readAc.access = tdp_read_accesses; + globalBPT->tdp_stats.writeAc.access = 0; + globalBPT->rtp_stats.reset(); + globalBPT->rtp_stats.readAc.access = core_stats.total_instructions; + globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + globalBPT->power_t.reset(); + globalBPT->power_t.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->tdp_stats.readAc.access + + globalBPT->local_result.power.writeOp.dynamic * + globalBPT->tdp_stats.writeAc.access; + globalBPT->power_t = globalBPT->power_t + + globalBPT->local_result.power * pppm_lkg; + globalBPT->rt_power.reset(); + globalBPT->rt_power.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->rtp_stats.readAc.access + + globalBPT->local_result.power.writeOp.dynamic * + globalBPT->rtp_stats.writeAc.access; + + L1_localBPT->tdp_stats.reset(); + L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses; + L1_localBPT->tdp_stats.writeAc.access = 0; + L1_localBPT->rtp_stats.reset(); + L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions; + L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + L1_localBPT->power_t.reset(); + L1_localBPT->power_t.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->tdp_stats.readAc.access + + L1_localBPT->local_result.power.writeOp.dynamic * + L1_localBPT->tdp_stats.writeAc.access; + L1_localBPT->power_t = L1_localBPT->power_t + + L1_localBPT->local_result.power * pppm_lkg; + L1_localBPT->rt_power.reset(); + L1_localBPT->rt_power.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->rtp_stats.readAc.access + + L1_localBPT->local_result.power.writeOp.dynamic * + L1_localBPT->rtp_stats.writeAc.access; + + L2_localBPT->tdp_stats.reset(); + L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses; + L2_localBPT->tdp_stats.writeAc.access = 0; + L2_localBPT->rtp_stats.reset(); + L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions; + L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + L2_localBPT->power_t.reset(); + L2_localBPT->power_t.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->tdp_stats.readAc.access + + L2_localBPT->local_result.power.writeOp.dynamic * + L2_localBPT->tdp_stats.writeAc.access; + L2_localBPT->power_t = L2_localBPT->power_t + + L2_localBPT->local_result.power * pppm_lkg; + L2_localBPT->rt_power.reset(); + L2_localBPT->rt_power.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->rtp_stats.readAc.access + + L2_localBPT->local_result.power.writeOp.dynamic * + L2_localBPT->rtp_stats.writeAc.access; + + chooser->tdp_stats.reset(); + chooser->tdp_stats.readAc.access = tdp_read_accesses; + chooser->tdp_stats.writeAc.access = 0; + chooser->rtp_stats.reset(); + chooser->rtp_stats.readAc.access = core_stats.total_instructions; + chooser->rtp_stats.writeAc.access = core_stats.branch_instructions; + chooser->power_t.reset(); + chooser->power_t.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->tdp_stats.readAc.access + + chooser->local_result.power.writeOp.dynamic * + chooser->tdp_stats.writeAc.access; + chooser->power_t = + chooser->power_t + chooser->local_result.power * pppm_lkg; + chooser->rt_power.reset(); + chooser->rt_power.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->rtp_stats.readAc.access + + chooser->local_result.power.writeOp.dynamic * + chooser->rtp_stats.writeAc.access; + + RAS->tdp_stats.reset(); + RAS->tdp_stats.readAc.access = tdp_read_accesses; + RAS->tdp_stats.writeAc.access = 0; + RAS->rtp_stats.reset(); + RAS->rtp_stats.readAc.access = core_stats.function_calls; + RAS->rtp_stats.writeAc.access = core_stats.function_calls; + RAS->power_t.reset(); + RAS->power_t.readOp.dynamic += + RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access + + RAS->local_result.power.writeOp.dynamic * + RAS->tdp_stats.writeAc.access; + RAS->power_t = RAS->power_t + RAS->local_result.power * + core_params.pppm_lkg_multhread; + RAS->rt_power.reset(); + RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic * + RAS->rtp_stats.readAc.access + + RAS->local_result.power.writeOp.dynamic * + RAS->rtp_stats.writeAc.access; + + output_data.reset(); + if (globalBPT) { + globalBPT->output_data.peak_dynamic_power = + globalBPT->power_t.readOp.dynamic * clockRate; + globalBPT->output_data.runtime_dynamic_energy = + globalBPT->rt_power.readOp.dynamic; + output_data += globalBPT->output_data; + } + if (L1_localBPT) { + L1_localBPT->output_data.peak_dynamic_power = + L1_localBPT->power_t.readOp.dynamic * clockRate; + L1_localBPT->output_data.runtime_dynamic_energy = + L1_localBPT->rt_power.readOp.dynamic; + output_data += L1_localBPT->output_data; + } + if (L2_localBPT) { + L2_localBPT->output_data.peak_dynamic_power = + L2_localBPT->power_t.readOp.dynamic * clockRate; + L2_localBPT->output_data.runtime_dynamic_energy = + L2_localBPT->rt_power.readOp.dynamic; + output_data += L2_localBPT->output_data; + } + if (chooser) { + chooser->output_data.peak_dynamic_power = + chooser->power_t.readOp.dynamic * clockRate; + chooser->output_data.runtime_dynamic_energy = + chooser->rt_power.readOp.dynamic; + output_data += chooser->output_data; + } + if (RAS) { + RAS->output_data.peak_dynamic_power = + RAS->power_t.readOp.dynamic * clockRate; + RAS->output_data.subthreshold_leakage_power = + RAS->power_t.readOp.leakage * core_params.num_hthreads; + RAS->output_data.gate_leakage_power = + RAS->power_t.readOp.gate_leakage * core_params.num_hthreads; + RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic; + output_data += RAS->output_data; + } +} - icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; +void BranchPredictor::displayData(uint32_t indent, int plevel) { + if (!exist) return; - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; - IB->rtp_stats = IB->stats_t; + McPATComponent::displayData(indent, plevel); - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions; - BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions; - BTB->rtp_stats = BTB->stats_t; - } + globalBPT->displayData(indent + 4, plevel); + L1_localBPT->displayData(indent + 4, plevel); + L2_localBPT->displayData(indent + 4, plevel); + chooser->displayData(indent + 4, plevel); + RAS->displayData(indent + 4, plevel); +} - ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_inst->rtp_stats = ID_inst->stats_t; - ID_operand->rtp_stats = ID_operand->stats_t; - ID_misc->rtp_stats = ID_misc->stats_t; +void InstFetchU::computeEnergy() { + if (!exist) return; + if (BPT) { + BPT->computeEnergy(); } - icache.power_t.reset(); + IB->tdp_stats.reset(); + IB->tdp_stats.readAc.access = core_params.peak_issueW; + IB->tdp_stats.writeAc.access = core_params.peak_issueW; + IB->rtp_stats.reset(); + IB->rtp_stats.readAc.access = core_stats.total_instructions; + IB->rtp_stats.writeAc.access = core_stats.total_instructions; IB->power_t.reset(); -// ID_inst->power_t.reset(); -// ID_operand->power_t.reset(); -// ID_misc->power_t.reset(); - if (coredynp.predictionW>0) - { + IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * + IB->tdp_stats.readAc.access + + IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access; + IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg; + IB->rt_power.reset(); + IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic * + IB->rtp_stats.readAc.access + + IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access; + + if (core_params.predictionW > 0) { + BTB->tdp_stats.reset(); + BTB->tdp_stats.readAc.access = core_params.predictionW; + BTB->tdp_stats.writeAc.access = 0; + BTB->rtp_stats.reset(); + BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses; + BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses; BTB->power_t.reset(); + BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic * + BTB->tdp_stats.readAc.access + + BTB->local_result.power.writeOp.dynamic * + BTB->tdp_stats.writeAc.access; + BTB->rt_power.reset(); + BTB->rt_power.readOp.dynamic += + BTB->local_result.power.readOp.dynamic * + BTB->rtp_stats.readAc.access + + BTB->local_result.power.writeOp.dynamic * + BTB->rtp_stats.writeAc.access; } - icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+ - //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache - icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic + - icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic + - icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic; - icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic + - icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic; + ID_inst->tdp_stats.reset(); + ID_inst->tdp_stats.readAc.access = core_params.decodeW; + ID_inst->power_t.reset(); + ID_inst->power_t = ID_misc->power; + ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic * + ID_inst->tdp_stats.readAc.access; + ID_inst->rtp_stats.reset(); + ID_inst->rtp_stats.readAc.access = core_stats.total_instructions; + ID_inst->rt_power.reset(); + ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic * + ID_inst->rtp_stats.readAc.access; + + ID_operand->tdp_stats.reset(); + ID_operand->tdp_stats.readAc.access = core_params.decodeW; + ID_operand->power_t.reset(); + ID_operand->power_t = ID_misc->power; + ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic * + ID_operand->tdp_stats.readAc.access; + ID_operand->rtp_stats.reset(); + ID_operand->rtp_stats.readAc.access = core_stats.total_instructions; + ID_operand->rt_power.reset(); + ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic * + ID_operand->rtp_stats.readAc.access; + + ID_misc->tdp_stats.reset(); + ID_misc->tdp_stats.readAc.access = core_params.decodeW; + ID_misc->power_t.reset(); + ID_misc->power_t = ID_misc->power; + ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic * + ID_misc->tdp_stats.readAc.access; + ID_misc->rtp_stats.reset(); + ID_misc->rtp_stats.readAc.access = core_stats.total_instructions; + ID_misc->rt_power.reset(); + ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic * + ID_misc->rtp_stats.readAc.access; + + power.reset(); + rt_power.reset(); + McPATComponent::computeEnergy(); + + output_data.reset(); + if (icache) { + output_data += icache->output_data; + } + if (IB) { + IB->output_data.peak_dynamic_power = + IB->power_t.readOp.dynamic * clockRate; + IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic; + output_data += IB->output_data; + } + if (BTB) { + BTB->output_data.peak_dynamic_power = + BTB->power_t.readOp.dynamic * clockRate; + BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic; + output_data += BTB->output_data; + } + if (BPT) { + output_data += BPT->output_data; + } + if (ID_inst) { + ID_inst->output_data.peak_dynamic_power = + ID_inst->power_t.readOp.dynamic * clockRate; + ID_inst->output_data.runtime_dynamic_energy = + ID_inst->rt_power.readOp.dynamic; + output_data += ID_inst->output_data; + } + if (ID_operand) { + ID_operand->output_data.peak_dynamic_power = + ID_operand->power_t.readOp.dynamic * clockRate; + ID_operand->output_data.runtime_dynamic_energy = + ID_operand->rt_power.readOp.dynamic; + output_data += ID_operand->output_data; + } + if (ID_misc) { + ID_misc->output_data.peak_dynamic_power = + ID_misc->power_t.readOp.dynamic * clockRate; + ID_misc->output_data.runtime_dynamic_energy = + ID_misc->rt_power.readOp.dynamic; + output_data += ID_misc->output_data; + } +} - IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access + - IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic; +void InstFetchU::displayData(uint32_t indent, int plevel) { + if (!exist) return; - if (coredynp.predictionW>0) - { - BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access + - BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic; + McPATComponent::displayData(indent, plevel); - BPT->computeEnergy(is_tdp); + if (core_params.predictionW > 0) { + BTB->displayData(indent + 4, plevel); + if (BPT->exist) { + BPT->displayData(indent + 4, plevel); } + } + IB->displayData(indent + 4, plevel); + ID_inst->displayData(indent + 4, plevel); + ID_operand->displayData(indent + 4, plevel); + ID_misc->displayData(indent + 4, plevel); +} - if (is_tdp) - { -// icache.power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - icache.power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->power = IB->power_t + IB->local_result.power*pppm_lkg; - power = power + icache.power + IB->power; - if (coredynp.predictionW>0) - { - BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg; - power = power + BTB->power + BPT->power; - } +void RENAMINGU::computeEnergy() { + if (!exist) return; + + idcl->tdp_stats.reset(); + idcl->rtp_stats.reset(); + idcl->power_t.reset(); + idcl->rt_power.reset(); + if (core_params.core_ty == OOO) { + idcl->tdp_stats.readAc.access = core_params.decodeW; + idcl->rtp_stats.readAc.access = 3 * core_params.decodeW * + core_params.decodeW * core_stats.rename_reads; + } else if (core_params.issueW > 1) { + idcl->tdp_stats.readAc.access = core_params.decodeW; + idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions; + } + idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access * + idcl->power.readOp.dynamic; + idcl->power_t.readOp.leakage = idcl->power.readOp.leakage * + core_params.num_hthreads; + idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage * + core_params.num_hthreads; + idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access * + idcl->power.readOp.dynamic; + + fdcl->tdp_stats.reset(); + fdcl->rtp_stats.reset(); + fdcl->power_t.reset(); + fdcl->rt_power.reset(); + if (core_params.core_ty == OOO) { + fdcl->tdp_stats.readAc.access = core_params.decodeW; + fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW * + core_params.fp_issueW * core_stats.fp_rename_writes; + } else if (core_params.issueW > 1) { + fdcl->tdp_stats.readAc.access = core_params.decodeW; + fdcl->rtp_stats.readAc.access = core_stats.fp_instructions; + } + fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access * + fdcl->power.readOp.dynamic; + fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage * + core_params.num_hthreads; + fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage * + core_params.num_hthreads; + fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access * + fdcl->power.readOp.dynamic; + + if (iRRAT) { + iRRAT->tdp_stats.reset(); + iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->rtp_stats.reset(); + iRRAT->rtp_stats.readAc.access = core_stats.rename_writes; + iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes; + iRRAT->power_t.reset(); + iRRAT->power_t.readOp.dynamic += + iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic + + iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; + iRRAT->rt_power.reset(); + iRRAT->rt_power.readOp.dynamic += + iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic + + iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; + iRRAT->power_t.readOp.leakage = + iRRAT->power.readOp.leakage * core_params.num_hthreads; + iRRAT->power_t.readOp.gate_leakage = + iRRAT->power.readOp.gate_leakage * core_params.num_hthreads; + } - ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; - ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; - ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; - - ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; - ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; - ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; - - power = power + (ID_inst->power + - ID_operand->power + - ID_misc->power); - } - else - { -// icache.rt_power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - - icache.rt_power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg; - rt_power = rt_power + icache.rt_power + IB->rt_power; - if (coredynp.predictionW>0) - { - BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg; - rt_power = rt_power + BTB->rt_power + BPT->rt_power; - } + if (ifreeL) { + ifreeL->tdp_stats.reset(); + ifreeL->tdp_stats.readAc.access = core_params.decodeW; + ifreeL->tdp_stats.writeAc.access = core_params.decodeW; + ifreeL->rtp_stats.reset(); + if (core_params.scheu_ty == PhysicalRegFile) { + ifreeL->rtp_stats.readAc.access = core_stats.rename_reads; + ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes; + } else if (core_params.scheu_ty == ReservationStation) { + ifreeL->rtp_stats.readAc.access = + core_stats.rename_reads + core_stats.fp_rename_reads; + ifreeL->rtp_stats.writeAc.access = + 2 * (core_stats.rename_writes + core_stats.fp_rename_writes); + } + ifreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic + + ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; + ifreeL->rt_power.reset(); + ifreeL->rt_power.readOp.dynamic += + ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic + + ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; + ifreeL->power_t.readOp.leakage = + ifreeL->power.readOp.leakage * core_params.num_hthreads; + ifreeL->power_t.readOp.gate_leakage = + ifreeL->power.readOp.gate_leakage * core_params.num_hthreads; + } - ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access; - ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access; - ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; + if (fRRAT) { + fRRAT->tdp_stats.reset(); + fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->rtp_stats.reset(); + fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes; + fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; + fRRAT->power_t.reset(); + fRRAT->power_t.readOp.dynamic += + fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic + + fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; + fRRAT->rt_power.reset(); + fRRAT->rt_power.readOp.dynamic += + fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic + + fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; + fRRAT->power_t.readOp.leakage = + fRRAT->power.readOp.leakage * core_params.num_hthreads; + fRRAT->power_t.readOp.gate_leakage = + fRRAT->power.readOp.gate_leakage * core_params.num_hthreads; + } - rt_power = rt_power + (ID_inst->rt_power + - ID_operand->rt_power + - ID_misc->rt_power); + if (ffreeL) { + ffreeL->tdp_stats.reset(); + ffreeL->tdp_stats.readAc.access = core_params.decodeW; + ffreeL->tdp_stats.writeAc.access = core_params.decodeW; + ffreeL->rtp_stats.reset(); + ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads; + ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes; + ffreeL->power_t.reset(); + ffreeL->power_t.readOp.dynamic += + ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic + + ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; + ffreeL->rt_power.reset(); + ffreeL->rt_power.readOp.dynamic += + ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic + + ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; + ffreeL->power_t.readOp.leakage = + ffreeL->power.readOp.leakage * core_params.num_hthreads; + ffreeL->power_t.readOp.gate_leakage = + ffreeL->power.readOp.gate_leakage * core_params.num_hthreads; } -} -void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - - cout << indent_str<< "Instruction Cache:" << endl; - cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (coredynp.predictionW>0) - { - cout << indent_str<< "Branch Target Buffer:" << endl; - cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (BPT->exist) - { - cout << indent_str<< "Branch Predictor:" << endl; - cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel>3) - { - BPT->displayEnergy(indent+4, plevel, is_tdp); - } - } - } - cout << indent_str<< "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<< "Instruction Decoder:" << endl; - cout << indent_str_next << "Area = " << (ID_inst->area.get_area() + - ID_operand->area.get_area() + - ID_misc->area.get_area())*coredynp.decodeW*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic + - ID_operand->power.readOp.dynamic + - ID_misc->power.readOp.dynamic)*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? (ID_inst->power.readOp.longer_channel_leakage + - ID_operand->power.readOp.longer_channel_leakage + - ID_misc->power.readOp.longer_channel_leakage): - (ID_inst->power.readOp.leakage + - ID_operand->power.readOp.leakage + - ID_misc->power.readOp.leakage)) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage + - ID_operand->power.readOp.gate_leakage + - ID_misc->power.readOp.gate_leakage) << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic + - ID_operand->rt_power.readOp.dynamic + - ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl; - cout <<endl; - } - else - { -// cout << indent_str_next << "Instruction Cache Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Instruction Cache Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Instruction Buffer Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Instruction Buffer Subthreshold Leakage = " << IB->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Instruction Buffer Gate Leakage = " << IB->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Branch Target Buffer Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Branch Target Buffer Subthreshold Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Branch Target Buffer Gate Leakage = " << BTB->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Branch Predictor Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Branch Predictor Subthreshold Leakage = " << BPT->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Branch Predictor Gate Leakage = " << BPT->rt_power.readOp.gate_leakage << " W" << endl; + if (iFRAT) { + tdp_stats.reset(); + if (core_params.rm_ty == RAMbased) { + iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports; + } else if ((core_params.rm_ty == CAMbased)) { + iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; + } + rtp_stats.reset(); + iFRAT->rtp_stats.readAc.access = core_stats.rename_reads; + iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes; + if (core_params.scheu_ty == ReservationStation && + core_params.rm_ty == RAMbased) { + iFRAT->rtp_stats.searchAc.access = + core_stats.committed_int_instructions; + } + iFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access + * (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->tdp_stats.writeAc.access + * iFRAT->local_result.power.writeOp.dynamic + + iFRAT->tdp_stats.searchAc.access + * iFRAT->local_result.power.searchOp.dynamic; + iFRAT->power_t.readOp.leakage = + iFRAT->power.readOp.leakage * core_params.num_hthreads; + iFRAT->power_t.readOp.gate_leakage = + iFRAT->power.readOp.gate_leakage * core_params.num_hthreads; + iFRAT->rt_power.reset(); + iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access + * (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->rtp_stats.writeAc.access + * iFRAT->local_result.power.writeOp.dynamic + + iFRAT->rtp_stats.searchAc.access + * iFRAT->local_result.power.searchOp.dynamic; + } + + if (fFRAT) { + tdp_stats.reset(); + fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports; + if ((core_params.rm_ty == CAMbased)) { + fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports; + } else if (core_params.rm_ty == RAMbased) { + fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports; + if (core_params.scheu_ty == ReservationStation) { + fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports; + } } + rtp_stats.reset(); + fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads; + fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; + if (core_params.scheu_ty == ReservationStation && + core_params.rm_ty == RAMbased) { + fFRAT->rtp_stats.searchAc.access = + core_stats.committed_fp_instructions; + } + fFRAT->power_t.reset(); + fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access + * (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->tdp_stats.writeAc.access + * fFRAT->local_result.power.writeOp.dynamic + + fFRAT->tdp_stats.searchAc.access + * fFRAT->local_result.power.searchOp.dynamic; + fFRAT->power_t.readOp.leakage = + fFRAT->power.readOp.leakage * core_params.num_hthreads; + fFRAT->power_t.readOp.gate_leakage = + fFRAT->power.readOp.gate_leakage * core_params.num_hthreads; + fFRAT->rt_power.reset(); + fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access + * (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->rtp_stats.writeAc.access + * fFRAT->local_result.power.writeOp.dynamic + + fFRAT->rtp_stats.searchAc.access + * fFRAT->local_result.power.searchOp.dynamic; + } + output_data.reset(); + if (iFRAT) { + iFRAT->output_data.peak_dynamic_power = + iFRAT->power_t.readOp.dynamic * clockRate; + iFRAT->output_data.subthreshold_leakage_power = + iFRAT->power_t.readOp.leakage; + iFRAT->output_data.gate_leakage_power = + iFRAT->power_t.readOp.gate_leakage; + iFRAT->output_data.runtime_dynamic_energy = + iFRAT->rt_power.readOp.dynamic; + output_data += iFRAT->output_data; + } + if (fFRAT) { + fFRAT->output_data.peak_dynamic_power = + fFRAT->power_t.readOp.dynamic * clockRate; + fFRAT->output_data.subthreshold_leakage_power = + fFRAT->power_t.readOp.leakage; + fFRAT->output_data.gate_leakage_power = + fFRAT->power_t.readOp.gate_leakage; + fFRAT->output_data.runtime_dynamic_energy = + fFRAT->rt_power.readOp.dynamic; + output_data += fFRAT->output_data; + } + if (iRRAT) { + iRRAT->output_data.peak_dynamic_power = + iRRAT->power_t.readOp.dynamic * clockRate; + iRRAT->output_data.subthreshold_leakage_power = + iRRAT->power_t.readOp.leakage; + iRRAT->output_data.gate_leakage_power = + iRRAT->power_t.readOp.gate_leakage; + iRRAT->output_data.runtime_dynamic_energy = + iRRAT->rt_power.readOp.dynamic; + output_data += iRRAT->output_data; + } + if (fRRAT) { + fRRAT->output_data.peak_dynamic_power = + fRRAT->power_t.readOp.dynamic * clockRate; + fRRAT->output_data.subthreshold_leakage_power = + fRRAT->power_t.readOp.leakage; + fRRAT->output_data.gate_leakage_power = + fRRAT->power_t.readOp.gate_leakage; + fRRAT->output_data.runtime_dynamic_energy = + fRRAT->rt_power.readOp.dynamic; + output_data += fRRAT->output_data; + } + if (ifreeL) { + ifreeL->output_data.peak_dynamic_power = + ifreeL->power_t.readOp.dynamic * clockRate; + ifreeL->output_data.subthreshold_leakage_power = + ifreeL->power_t.readOp.leakage; + ifreeL->output_data.gate_leakage_power = + ifreeL->power_t.readOp.gate_leakage; + ifreeL->output_data.runtime_dynamic_energy = + ifreeL->rt_power.readOp.dynamic; + output_data += ifreeL->output_data; + } + if (ffreeL) { + ffreeL->output_data.peak_dynamic_power = + ffreeL->power_t.readOp.dynamic * clockRate; + ffreeL->output_data.subthreshold_leakage_power = + ffreeL->power_t.readOp.leakage; + ffreeL->output_data.gate_leakage_power = + ffreeL->power_t.readOp.gate_leakage; + ffreeL->output_data.runtime_dynamic_energy = + ffreeL->rt_power.readOp.dynamic; + output_data += ffreeL->output_data; + } + if (idcl) { + idcl->output_data.peak_dynamic_power = + idcl->power_t.readOp.dynamic * clockRate; + idcl->output_data.subthreshold_leakage_power = + idcl->power_t.readOp.leakage; + idcl->output_data.gate_leakage_power = + idcl->power_t.readOp.gate_leakage; + idcl->output_data.runtime_dynamic_energy = + idcl->rt_power.readOp.dynamic; + output_data += idcl->output_data; + } + if (fdcl) { + fdcl->output_data.peak_dynamic_power = + fdcl->power_t.readOp.dynamic * clockRate; + fdcl->output_data.subthreshold_leakage_power = + fdcl->power_t.readOp.leakage; + fdcl->output_data.gate_leakage_power = + fdcl->power_t.readOp.gate_leakage; + fdcl->output_data.runtime_dynamic_energy = + fdcl->rt_power.readOp.dynamic; + output_data += fdcl->output_data; + } + if (RAHT) { + output_data += RAHT->output_data; + } } -void RENAMINGU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double pppm_t[4] = {1,1,1,1}; - if (is_tdp) - {//init stats for Peak - if (coredynp.core_ty==OOO){ - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports; - ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports; - ffreeL->tdp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - //Unified free list for both int and fp - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - } +void RENAMINGU::displayData(uint32_t indent, int plevel) { + if (!exist) return; - } - else - {//init stats for Runtime Dynamic (RTP) - if (coredynp.core_ty==OOO){ - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - - iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes; - ifreeL->rtp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes; - ffreeL->rtp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs. - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - //Unified free list for both int and fp since the ROB act as physcial registers - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + - XML->sys.core[ithCore].fp_rename_reads; - ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes + - XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group - //are terminated early - ifreeL->rtp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads; - fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions; - fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - } + McPATComponent::displayData(indent, plevel); + if (core_params.core_ty == OOO) { + iFRAT->displayData(indent + 4, plevel); + fFRAT->displayData(indent + 4, plevel); + ifreeL->displayData(indent + 4, plevel); + + if (core_params.scheu_ty == PhysicalRegFile) { + iRRAT->displayData(indent + 4, plevel); + fRRAT->displayData(indent + 4, plevel); + ffreeL->displayData(indent + 4, plevel); } - /* Compute engine */ - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - ifreeL->power_t.reset(); - ffreeL->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic - +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic - +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic - +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic); + } + idcl->displayData(indent + 4, plevel); + fdcl->displayData(indent + 4, plevel); +} - } - else if (coredynp.scheu_ty==ReservationStation) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic - +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic - +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - ifreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - } +void SchedulerU::computeEnergy() { + if (!exist) return; - } - else - { - if (coredynp.issueW>1) - { - idcl->power_t.reset(); - fdcl->power_t.reset(); - set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - idcl->power_t = idcl->power * pppm_t; - set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - fdcl->power_t = fdcl->power * pppm_t; - } + double ROB_duty_cycle; + ROB_duty_cycle = 1; - } + if (int_instruction_selection) { + int_instruction_selection->computeEnergy(); + } - //assign value to tpd and rtp - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; - power = power + (iFRAT->power + fFRAT->power) - + (iRRAT->power + fRRAT->power) - + (ifreeL->power + ffreeL->power); - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - power = power + (iFRAT->power + fFRAT->power) - + ifreeL->power; - } - } - else - { - power = power + idcl->power_t + fdcl->power_t; - } + if (fp_instruction_selection) { + fp_instruction_selection->computeEnergy(); + } + if (int_inst_window) { + int_inst_window->tdp_stats.reset(); + int_inst_window->rtp_stats.reset(); + int_inst_window->power_t.reset(); + int_inst_window->rt_power.reset(); + if (core_params.core_ty == OOO) { + int_inst_window->tdp_stats.readAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.writeAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.searchAc.access = + core_params.issueW * core_params.num_pipelines; + + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->tdp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->tdp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->tdp_stats.writeAc.access; + + int_inst_window->rtp_stats.readAc.access = + core_stats.inst_window_reads; + int_inst_window->rtp_stats.writeAc.access = + core_stats.inst_window_writes; + int_inst_window->rtp_stats.searchAc.access = + core_stats.inst_window_wakeup_accesses; + + int_inst_window->rt_power.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->rtp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->rtp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->rtp_stats.writeAc.access; + } else if (core_params.multithreaded) { + int_inst_window->tdp_stats.readAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.writeAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.searchAc.access = + core_params.issueW * core_params.num_pipelines; + + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->tdp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->tdp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->tdp_stats.writeAc.access; + + int_inst_window->rtp_stats.readAc.access = + core_stats.int_instructions + core_stats.fp_instructions; + int_inst_window->rtp_stats.writeAc.access = + core_stats.int_instructions + core_stats.fp_instructions; + int_inst_window->rtp_stats.searchAc.access = + 2 * (core_stats.int_instructions + core_stats.fp_instructions); + + int_inst_window->rt_power.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->rtp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->rtp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->rtp_stats.writeAc.access; } - else - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + (iRRAT->rt_power + fRRAT->rt_power) - + (ifreeL->rt_power + ffreeL->rt_power); - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + ifreeL->rt_power; - } - } - else - { - rt_power = rt_power + idcl->power_t + fdcl->power_t; - } + } - } -} + if (fp_inst_window) { + fp_inst_window->tdp_stats.reset(); + fp_inst_window->tdp_stats.readAc.access = + fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines; + fp_inst_window->tdp_stats.writeAc.access = + fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines; + fp_inst_window->tdp_stats.searchAc.access = + fp_inst_window->l_ip.num_search_ports * + core_params.num_fp_pipelines; + + fp_inst_window->rtp_stats.reset(); + fp_inst_window->rtp_stats.readAc.access = + core_stats.fp_inst_window_reads; + fp_inst_window->rtp_stats.writeAc.access = + core_stats.fp_inst_window_writes; + fp_inst_window->rtp_stats.searchAc.access = + core_stats.fp_inst_window_wakeup_accesses; + + fp_inst_window->power_t.reset(); + fp_inst_window->power_t.readOp.dynamic += + fp_inst_window->power.readOp.dynamic * + fp_inst_window->tdp_stats.readAc.access + + fp_inst_window->power.searchOp.dynamic * + fp_inst_window->tdp_stats.searchAc.access + + fp_inst_window->power.writeOp.dynamic * + fp_inst_window->tdp_stats.writeAc.access; + + fp_inst_window->rt_power.reset(); + fp_inst_window->rt_power.readOp.dynamic += + fp_inst_window->power.readOp.dynamic * + fp_inst_window->rtp_stats.readAc.access + + fp_inst_window->power.searchOp.dynamic * + fp_inst_window->rtp_stats.searchAc.access + + fp_inst_window->power.writeOp.dynamic * + fp_inst_window->rtp_stats.writeAc.access; + } -void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - - if (coredynp.core_ty==OOO) - { - cout << indent_str<< "Int Front End RAT:" << endl; - cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<< "FP Front End RAT:" << endl; - cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<<"Free List:" << endl; - cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - - if (coredynp.scheu_ty==PhysicalRegFile) - { - cout << indent_str<< "Int Retire RAT: " << endl; - cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<< "FP Retire RAT:" << endl; - cout << indent_str_next << "Area = " << fRRAT->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<< "FP Free List:" << endl; - cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - } - else - { - cout << indent_str<< "Int DCL:" << endl; - cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout << indent_str<<"FP DCL:" << endl; - cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl; - } - } - else - { - if (coredynp.core_ty==OOO) - { - cout << indent_str_next << "Int Front End RAT Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Int Front End RAT Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Front End RAT Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Front End RAT Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Free List Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Free List Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; - if (coredynp.scheu_ty==PhysicalRegFile) - { - cout << indent_str_next << "Int Retire RAT Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Int Retire RAT Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Retire RAT Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Retire RAT Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Free List Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "FP Free List Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; - } - } - else - { - cout << indent_str_next << "Int DCL Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Int DCL Subthreshold Leakage = " << idcl->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "FP DCL Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage << " W" << endl; - } - } + if (ROB) { + ROB->tdp_stats.reset(); + ROB->tdp_stats.readAc.access = core_params.commitW * + core_params.num_pipelines * ROB_duty_cycle; + ROB->tdp_stats.writeAc.access = core_params.issueW * + core_params.num_pipelines * ROB_duty_cycle; + ROB->rtp_stats.reset(); + ROB->rtp_stats.readAc.access = core_stats.ROB_reads; + ROB->rtp_stats.writeAc.access = core_stats.ROB_writes; + ROB->power_t.reset(); + ROB->power_t.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * + ROB->tdp_stats.readAc.access + + ROB->local_result.power.writeOp.dynamic * + ROB->tdp_stats.writeAc.access; + ROB->rt_power.reset(); + ROB->rt_power.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * + ROB->rtp_stats.readAc.access + + ROB->local_result.power.writeOp.dynamic * + ROB->rtp_stats.writeAc.access; + } + + output_data.reset(); + if (int_inst_window) { + int_inst_window->output_data.subthreshold_leakage_power = + int_inst_window->power_t.readOp.leakage; + int_inst_window->output_data.gate_leakage_power = + int_inst_window->power_t.readOp.gate_leakage; + int_inst_window->output_data.peak_dynamic_power = + int_inst_window->power_t.readOp.dynamic * clockRate; + int_inst_window->output_data.runtime_dynamic_energy = + int_inst_window->rt_power.readOp.dynamic; + output_data += int_inst_window->output_data; + } + if (fp_inst_window) { + fp_inst_window->output_data.subthreshold_leakage_power = + fp_inst_window->power_t.readOp.leakage; + fp_inst_window->output_data.gate_leakage_power = + fp_inst_window->power_t.readOp.gate_leakage; + fp_inst_window->output_data.peak_dynamic_power = + fp_inst_window->power_t.readOp.dynamic * clockRate; + fp_inst_window->output_data.runtime_dynamic_energy = + fp_inst_window->rt_power.readOp.dynamic; + output_data += fp_inst_window->output_data; + } + if (ROB) { + ROB->output_data.peak_dynamic_power = + ROB->power_t.readOp.dynamic * clockRate; + ROB->output_data.runtime_dynamic_energy = + ROB->rt_power.readOp.dynamic; + output_data += ROB->output_data; + } + // Integer and FP instruction selection logic is not included in the + // roll-up due to the uninitialized area + /* + if (int_instruction_selection) { + output_data += int_instruction_selection->output_data; + } + if (fp_instruction_selection) { + output_data += fp_instruction_selection->output_data; + } + */ } +void SchedulerU::displayData(uint32_t indent, int plevel) { + if (!exist) return; -void SchedulerU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double ROB_duty_cycle; -// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; - ROB_duty_cycle = 1; - //init stats - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines; - fp_inst_window->tdp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->tdp_stats = ROB->stats_t; - - /* - * When inst commits, ROB must be read. - * Because for Physcial register based cores, physical register tag in ROB - * need to be read out and write into RRAT/CAM based RAT. - * For RS based cores, register content that stored in ROB must be - * read out and stored in architectural registers. - * - * if no-register is involved, the ROB read out operation when instruction commits can be ignored. - * assuming 20% insts. belong this type. - * TODO: ROB duty_cycle need to be revisited - */ - } + McPATComponent::displayData(indent, plevel); - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - } + if (core_params.core_ty == OOO) { + int_inst_window->displayData(indent + 4, plevel); + fp_inst_window->displayData(indent + 4, plevel); + if (core_params.ROB_size > 0) { + ROB->displayData(indent + 4, plevel); + } + } else if (core_params.multithreaded) { + int_inst_window->displayData(indent + 4, plevel); + } - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes; - int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses; - int_inst_window->rtp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads; - fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes; - fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; - fp_inst_window->rtp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - - ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; - ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; - /* ROB need to be updated in RS based OOO when new values are produced, - * this update may happen before the commit stage when ROB entry is released - * 1. ROB write at instruction inserted in - * 2. ROB write as results produced (for RS based OOO only) - * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF - * For Physical reg based OOO, no data stored in ROB, but register tags need to be - * read out and used to set the RRAT and to recycle the register tag to free list buffer - */ - ROB->rtp_stats = ROB->stats_t; - } + // Integer and FP instruction selection logic is not included in the + // roll-up due to the uninitialized area + /* + if (int_instruction_selection) { + int_instruction_selection->displayData(indent + 4, plevel); + } + if (fp_instruction_selection) { + fp_instruction_selection->displayData(indent + 4, plevel); + } + */ +} - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions); - int_inst_window->rtp_stats = int_inst_window->stats_t; - } +void LoadStoreU::computeEnergy() { + if (!exist) return; + + LSQ->tdp_stats.reset(); + LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LSQ->rtp_stats.reset(); + // Flush overhead conidered + LSQ->rtp_stats.readAc.access = (core_stats.load_instructions + + core_stats.store_instructions) * 2; + LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions + + core_stats.store_instructions) * 2; + LSQ->power_t.reset(); + //every memory access invloves at least two operations on LSQ + LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access * + (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; + LSQ->rt_power.reset(); + //every memory access invloves at least two operations on LSQ + LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access * + (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; + + if (LoadQ) { + LoadQ->tdp_stats.reset(); + LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LoadQ->rtp_stats.reset(); + LoadQ->rtp_stats.readAc.access = core_stats.load_instructions + + core_stats.store_instructions; + LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions + + core_stats.store_instructions; + LoadQ->power_t.reset(); + //every memory access invloves at least two operations on LoadQ + LoadQ->power_t.readOp.dynamic += + LoadQ->tdp_stats.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->tdp_stats.writeAc.access * + LoadQ->local_result.power.writeOp.dynamic; + LoadQ->rt_power.reset(); + //every memory access invloves at least two operations on LoadQ + LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->rtp_stats.writeAc.access * + LoadQ->local_result.power.writeOp.dynamic; } - //computation engine - if (coredynp.core_ty==OOO) - { - int_inst_window->power_t.reset(); - fp_inst_window->power_t.reset(); - - /* each instruction needs to write to scheduler, read out when all resources and source operands are ready - * two search ops with one for each source operand - * - */ - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic; - - fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access - + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access - + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access - + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power_t.reset(); - ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access + - ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic; - } + McPATComponent::computeEnergy(); + + output_data.reset(); + if (dcache) { + output_data += dcache->output_data; + } + if (LSQ) { + LSQ->output_data.peak_dynamic_power = + LSQ->power_t.readOp.dynamic * clockRate; + LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic; + output_data += LSQ->output_data; + } + if (LoadQ) { + LoadQ->output_data.peak_dynamic_power = + LoadQ->power_t.readOp.dynamic * clockRate; + LoadQ->output_data.runtime_dynamic_energy = + LoadQ->rt_power.readOp.dynamic; + output_data += LoadQ->output_data; + } +} +void LoadStoreU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + McPATComponent::displayData(indent, plevel); + if (LoadQ) { + LoadQ->displayData(indent + 4, plevel); + } + LSQ->displayData(indent + 4, plevel); - } - else if (coredynp.multithreaded) - { - int_inst_window->power_t.reset(); - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - } +} - //assign values - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power + fp_inst_window->power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg; - power = power + ROB->power; - } +void MemManU::computeEnergy() { + if (!exist) return; - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power; - } + itlb->tdp_stats.reset(); + itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports; + itlb->tdp_stats.readAc.miss = 0; + itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access - + itlb->tdp_stats.readAc.miss; + itlb->rtp_stats.reset(); + itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses; + itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses; - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg; - rt_power = rt_power + ROB->rt_power; - } + itlb->power_t.reset(); + //FA spent most power in tag, so use total access not hits + itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access * + itlb->local_result.power.searchOp.dynamic + + itlb->tdp_stats.readAc.miss * + itlb->local_result.power.writeOp.dynamic; + itlb->rt_power.reset(); + //FA spent most power in tag, so use total access not hits + itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access * + itlb->local_result.power.searchOp.dynamic + + itlb->rtp_stats.writeAc.access * + itlb->local_result.power.writeOp.dynamic; + + dtlb->tdp_stats.reset(); + dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + dtlb->tdp_stats.readAc.miss = 0; + dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access - + dtlb->tdp_stats.readAc.miss; + dtlb->rtp_stats.reset(); + dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses + + mem_man_stats.dtlb_write_misses; + dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses + + mem_man_stats.dtlb_read_misses; - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power; - } + dtlb->power_t.reset(); + //FA spent most power in tag, so use total access not hits + dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access * + dtlb->local_result.power.searchOp.dynamic + + dtlb->tdp_stats.readAc.miss * + dtlb->local_result.power.writeOp.dynamic; + dtlb->rt_power.reset(); + //FA spent most power in tag, so use total access not hits + dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access * + dtlb->local_result.power.searchOp.dynamic + + dtlb->rtp_stats.writeAc.access * + dtlb->local_result.power.writeOp.dynamic; + + output_data.reset(); + if (itlb) { + itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic * + clockRate; + itlb->output_data.runtime_dynamic_energy = + itlb->rt_power.readOp.dynamic; + output_data += itlb->output_data; + } + if (dtlb) { + dtlb->output_data.peak_dynamic_power = + dtlb->power_t.readOp.dynamic * clockRate; + dtlb->output_data.runtime_dynamic_energy = + dtlb->rt_power.readOp.dynamic; + output_data += dtlb->output_data; } -// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); -// cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl; -// cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access + -// + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl; -// cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl; } -void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str << "FP Instruction Window:" << endl; - cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (XML->sys.core[ithCore].ROB_size >0) - { - cout << indent_str<<"ROB:" << endl; - cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - } - else if (coredynp.multithreaded) - { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - } - else - { - if (coredynp.core_ty==OOO) - { - cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - if (XML->sys.core[ithCore].ROB_size >0) - { - cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl; - } - } - else if (coredynp.multithreaded) - { - cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - } - } +void MemManU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + + McPATComponent::displayData(indent, plevel); + itlb->displayData(indent + 4, plevel); + dtlb->displayData(indent + 4, plevel); } -void LoadStoreU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.readAc.miss = 0; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.writeAc.miss = 0; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->tdp_stats = dcache.caches->stats_t; - - dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports; - dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports; - dcache.missb->tdp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.ifb->tdp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports; - dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; - if (cache_p==Write_back) - { - dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->tdp_stats = dcache.wbb->stats_t; - } +void RegFU::computeEnergy() { + /* + * Architecture RF and physical RF cannot be present at the same time. + * Therefore, the RF stats can only refer to either ARF or PRF; + * And the same stats can be used for both. + */ + if (!exist) return; + + IRF->tdp_stats.reset(); + IRF->tdp_stats.readAc.access = + core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS * + (core_stats.ALU_duty_cycle * 1.1 + + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * + core_params.num_pipelines; + IRF->tdp_stats.writeAc.access = + core_params.issueW * + (core_stats.ALU_duty_cycle * 1.1 + + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * + core_params.num_pipelines; + IRF->rtp_stats.reset(); + IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads; + IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes; + if (core_params.regWindowing) { + IRF->rtp_stats.readAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + IRF->rtp_stats.writeAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + } + IRF->power_t.reset(); + IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access * + IRF->local_result.power.readOp.dynamic + + IRF->tdp_stats.writeAc.access * + IRF->local_result.power.writeOp.dynamic; + IRF->rt_power.reset(); + IRF->rt_power.readOp.dynamic += + IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic + + IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic; + + FRF->tdp_stats.reset(); + FRF->tdp_stats.readAc.access = + FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 * + core_params.num_fp_pipelines; + FRF->tdp_stats.writeAc.access = + FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 * + core_params.num_fp_pipelines; + FRF->rtp_stats.reset(); + FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads; + FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes; + if (core_params.regWindowing) { + FRF->rtp_stats.readAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + FRF->rtp_stats.writeAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + } + FRF->power_t.reset(); + FRF->power_t.readOp.dynamic += + FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; + FRF->rt_power.reset(); + FRF->rt_power.readOp.dynamic += + FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; + + if (core_params.regWindowing) { + RFWIN->tdp_stats.reset(); + RFWIN->tdp_stats.readAc.access = 0; + RFWIN->tdp_stats.writeAc.access = 0; + RFWIN->rtp_stats.reset(); + RFWIN->rtp_stats.readAc.access = + core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; + RFWIN->rtp_stats.writeAc.access = + core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; + RFWIN->power_t.reset(); + RFWIN->power_t.readOp.dynamic += + RFWIN->tdp_stats.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->tdp_stats.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic; + RFWIN->rt_power.reset(); + RFWIN->rt_power.readOp.dynamic += + RFWIN->rtp_stats.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->rtp_stats.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic; + } - LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LSQ->tdp_stats = LSQ->stats_t; - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LoadQ->tdp_stats = LoadQ->stats_t; - } - } - else - { - //init stats for Runtime Dynamic (RTP) - dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses; - dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses; - dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->rtp_stats = dcache.caches->stats_t; - - if (cache_p==Write_back) - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - - dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->rtp_stats = dcache.wbb->stats_t; - } - else - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - } + output_data.reset(); + if (IRF) { + IRF->output_data.peak_dynamic_power = + IRF->power_t.readOp.dynamic * clockRate; + IRF->output_data.subthreshold_leakage_power *= + core_params.num_hthreads; + IRF->output_data.gate_leakage_power *= core_params.num_hthreads; + IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic; + output_data += IRF->output_data; + } + if (FRF) { + FRF->output_data.peak_dynamic_power = + FRF->power_t.readOp.dynamic * clockRate; + FRF->output_data.subthreshold_leakage_power *= + core_params.num_hthreads; + FRF->output_data.gate_leakage_power *= core_params.num_hthreads; + FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic; + output_data += FRF->output_data; + } + if (RFWIN) { + RFWIN->output_data.peak_dynamic_power = + RFWIN->power_t.readOp.dynamic * clockRate; + RFWIN->output_data.runtime_dynamic_energy = + RFWIN->rt_power.readOp.dynamic; + output_data += RFWIN->output_data; + } +} - LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered - LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2; - LSQ->rtp_stats = LSQ->stats_t; +void RegFU::displayData(uint32_t indent, int plevel) { + if (!exist) return; - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->rtp_stats = LoadQ->stats_t; - } + McPATComponent::displayData(indent, plevel); - } + IRF->displayData(indent + 4, plevel); + FRF->displayData(indent + 4, plevel); + if (core_params.regWindowing) { + RFWIN->displayData(indent + 4, plevel); + } +} - dcache.power_t.reset(); - LSQ->power_t.reset(); - dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic); +void EXECU::computeEnergy() { + if (!exist) return; - if (cache_p==Write_back) - {//write miss will generate a write later - dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic; - } + int_bypass->set_params_stats(core_params.execu_int_bypass_ports, + core_stats.ALU_cdb_duty_cycle, + core_stats.cdb_alu_accesses); - dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic + - dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic + - dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic; - dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic + - dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic; - if (cache_p==Write_back) - { - dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic - + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic; - } + intTagBypass->set_params_stats(core_params.execu_int_bypass_ports, + core_stats.ALU_cdb_duty_cycle, + core_stats.cdb_alu_accesses); - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power_t.reset(); - LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+ - LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ - - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ - - } - else - { - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ - - } - - if (is_tdp) - { -// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg + -// (dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_Isub; - dcache.power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) *pppm_lkg; - if (cache_p==Write_back) - { - dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg; - } + if (core_params.num_muls > 0) { + int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports, + core_stats.MUL_cdb_duty_cycle, + core_stats.cdb_mul_accesses); - LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - power = power + dcache.power + LSQ->power; + intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports, + core_stats.MUL_cdb_duty_cycle, + core_stats.cdb_mul_accesses); + } - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - power = power + LoadQ->power; - } + if (core_params.num_fpus > 0) { + fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports, + core_stats.FPU_cdb_duty_cycle, + core_stats.cdb_fpu_accesses); + + fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports, + core_stats.FPU_cdb_duty_cycle, + core_stats.cdb_fpu_accesses); } - else - { -// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + -// dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_lkg; - dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power )*pppm_lkg; - - if (cache_p==Write_back) - { - dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg; - } - LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - rt_power = rt_power + dcache.rt_power + LSQ->rt_power; + McPATComponent::computeEnergy(); - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - rt_power = rt_power + LoadQ->rt_power; - } + if (rfu) { + rfu->computeEnergy(); + output_data += rfu->output_data; + } + if (scheu) { + scheu->computeEnergy(); + output_data += scheu->output_data; + } + if (fp_u) { + fp_u->computeEnergy(); + output_data += fp_u->output_data; + } + if (exeu) { + exeu->computeEnergy(); + output_data += exeu->output_data; + } + if (mul) { + mul->computeEnergy(); + output_data += mul->output_data; } } +void EXECU::displayData(uint32_t indent, int plevel) { + if (!exist) return; -void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - cout << indent_str << "Data Cache:" << endl; - cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (coredynp.core_ty==Inorder) - { - cout << indent_str << "Load/Store Queue:" << endl; - cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - else - - { - if (XML->sys.core[ithCore].load_buffer_size >0) - { - cout << indent_str << "LoadQ:" << endl; - cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - cout << indent_str<< "StoreQ:" << endl; - cout << indent_str_next << "Area = " << LSQ->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl; - cout <<endl; - } - } - else - { - cout << indent_str_next << "Data Cache Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Data Cache Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Data Cache Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl; - if (coredynp.core_ty==Inorder) - { - cout << indent_str_next << "Load/Store Queue Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Load/Store Queue Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl; - } - else - { - cout << indent_str_next << "LoadQ Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "LoadQ Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "StoreQ Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl; - } - } + McPATComponent::displayData(indent, plevel); + rfu->displayData(indent + 4, plevel); + if (scheu) { + scheu->displayData(indent + 4, plevel); + } + exeu->displayData(indent + 4, plevel); + if (core_params.num_fpus > 0) { + fp_u->displayData(indent + 4, plevel); + } + if (core_params.num_muls > 0) { + mul->displayData(indent + 4, plevel); + } } -void MemManU::computeEnergy(bool is_tdp) -{ - - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports; - itlb->stats_t.readAc.miss = 0; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->tdp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dtlb->stats_t.readAc.miss = 0; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->tdp_stats = dtlb->stats_t; - } - else - { - //init stats for Runtime Dynamic (RTP) - itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; - itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->rtp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; - dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->rtp_stats = dtlb->stats_t; +void Core::computeEnergy() { + ifu->computeEnergy(); + lsu->computeEnergy(); + mmu->computeEnergy(); + exu->computeEnergy(); + if (core_params.core_ty == OOO) { + rnu->computeEnergy(); } - itlb->power_t.reset(); - dtlb->power_t.reset(); - itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic; - dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - power = power + itlb->power + dtlb->power; - } - else - { - itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - rt_power = rt_power + itlb->rt_power + dtlb->rt_power; - } + output_data.reset(); + if (ifu) { + output_data += ifu->output_data; + } + if (lsu) { + output_data += lsu->output_data; + } + if (mmu) { + output_data += mmu->output_data; + } + if (exu) { + output_data += exu->output_data; + } + if (rnu) { + output_data += rnu->output_data; + } + if (corepipe) { + output_data += corepipe->output_data; + } + if (undiffCore) { + output_data += undiffCore->output_data; + } + if (l2cache) { + output_data += l2cache->output_data; + } } -void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - - - if (is_tdp) - { - cout << indent_str << "Itlb:" << endl; - cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<< "Dtlb:" << endl; - cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - else - { - cout << indent_str_next << "Itlb Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Dtlb Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage << " W" << endl; - } - -} +InstFetchU ::~InstFetchU() { -void RegFU::computeEnergy(bool is_tdp) -{ -/* - * Architecture RF and physical RF cannot be present at the same time. - * Therefore, the RF stats can only refer to either ARF or PRF; - * And the same stats can be used for both. - */ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - //Rule of Thumb: about 10% RF related instructions do not need to access ALUs - IRF->tdp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->tdp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->tdp_stats = RFWIN->stats_t; - } - } - else - { - //init stats for Runtime Dynamic (RTP) - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; - FRF->rtp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->rtp_stats = RFWIN->stats_t; - - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + - XML->sys.core[ithCore].function_calls*16; - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + - XML->sys.core[ithCore].function_calls*16; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + - XML->sys.core[ithCore].function_calls*16;; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+ - XML->sys.core[ithCore].function_calls*16;; - FRF->rtp_stats = FRF->stats_t; - } + if (!exist) return; + if (IB) { + delete IB; + IB = NULL; } - IRF->power_t.reset(); - FRF->power_t.reset(); - IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic - +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic); - FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic - +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic); - if (coredynp.regWindowing) - { - RFWIN->power_t.reset(); - RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic + - RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic); - } - - if (is_tdp) - { - IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; - FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; - power = power + (IRF->power + FRF->power); - if (coredynp.regWindowing) - { - RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - power = power + RFWIN->power; - } + if (ID_inst) { + delete ID_inst; + ID_inst = NULL; + } + if (ID_operand) { + delete ID_operand; + ID_operand = NULL; + } + if (ID_misc) { + delete ID_misc; + ID_misc = NULL; + } + if (core_params.predictionW > 0) { + if (BTB) { + delete BTB; + BTB = NULL; } - else - { - IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; - FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; - rt_power = rt_power + (IRF->power_t + FRF->power_t); - if (coredynp.regWindowing) - { - RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - rt_power = rt_power + RFWIN->rt_power; - } + if (BPT) { + delete BPT; + BPT = NULL; } + } + if (icache) { + delete icache; + } } +BranchPredictor ::~BranchPredictor() { -void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { cout << indent_str << "Integer RF:" << endl; - cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - cout << indent_str<< "Floating Point RF:" << endl; - cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (coredynp.regWindowing) - { - cout << indent_str << "Register Windows:" << endl; - cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - } - else - { - cout << indent_str_next << "Integer RF Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Integer RF Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Floating Point RF Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " << FRF->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Floating Point RF Gate Leakage = " << FRF->rt_power.readOp.gate_leakage << " W" << endl; - if (coredynp.regWindowing) - { - cout << indent_str_next << "Register Windows Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Register Windows Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Register Windows Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage << " W" << endl; - } - } + if (!exist) return; + if (globalBPT) { + delete globalBPT; + globalBPT = NULL; + } + if (localBPT) { + delete localBPT; + localBPT = NULL; + } + if (L1_localBPT) { + delete L1_localBPT; + L1_localBPT = NULL; + } + if (L2_localBPT) { + delete L2_localBPT; + L2_localBPT = NULL; + } + if (chooser) { + delete chooser; + chooser = NULL; + } + if (RAS) { + delete RAS; + RAS = NULL; + } } +RENAMINGU ::~RENAMINGU() { -void EXECU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double pppm_t[4] = {1,1,1,1}; -// rfu->power.reset(); -// rfu->rt_power.reset(); -// scheu->power.reset(); -// scheu->rt_power.reset(); -// exeu->power.reset(); -// exeu->rt_power.reset(); - - rfu->computeEnergy(is_tdp); - scheu->computeEnergy(is_tdp); - exeu->computeEnergy(is_tdp); - if (coredynp.num_fpus >0) - { - fp_u->computeEnergy(is_tdp); - } - if (coredynp.num_muls >0) - { - mul->computeEnergy(is_tdp); - } - - if (is_tdp) - { - set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t; - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - power = power + mul->power; - } - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction. - bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ; - power = power + fp_u->power; - } - - power = power + rfu->power + exeu->power + bypass.power + scheu->power; - } - else - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses); - bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t; - - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction. - bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - rt_power = rt_power + mul->rt_power; - } - - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses); - bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t; - rt_power = rt_power + fp_u->rt_power; - } - rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power; - } + if (!exist) return; + if (iFRAT) { + delete iFRAT; + iFRAT = NULL; + } + if (fFRAT) { + delete fFRAT; + fFRAT = NULL; + } + if (iRRAT) { + delete iRRAT; + iRRAT = NULL; + } + if (iFRAT) { + delete iFRAT; + iFRAT = NULL; + } + if (ifreeL) { + delete ifreeL; + ifreeL = NULL; + } + if (ffreeL) { + delete ffreeL; + ffreeL = NULL; + } + if (idcl) { + delete idcl; + idcl = NULL; + } + if (fdcl) { + delete fdcl; + fdcl = NULL; + } + if (RAHT) { + delete RAHT; + RAHT = NULL; + } } -void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - cout << indent_str << "Register Files:" << endl; - cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel>3){ - rfu->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Instruction Scheduler:" << endl; - cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel>3){ - scheu->displayEnergy(indent+4,is_tdp); - } - exeu->displayEnergy(indent,is_tdp); - if (coredynp.num_fpus>0) - { - fp_u->displayEnergy(indent,is_tdp); - } - if (coredynp.num_muls >0) - { - mul->displayEnergy(indent,is_tdp); - } - cout << indent_str << "Results Broadcast Bus:" << endl; - cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - } - else - { - cout << indent_str_next << "Register Files Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Register Files Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Register Files Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " << scheu->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Instruction Sheduler Gate Leakage = " << scheu->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " << bypass.rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " << bypass.rt_power.readOp.gate_leakage << " W" << endl; - } +LoadStoreU ::~LoadStoreU() { + if (!exist) return; + if (LSQ) { + delete LSQ; + LSQ = NULL; + } + if (dcache) { + delete dcache; + dcache = NULL; + } } -void Core::computeEnergy(bool is_tdp) -{ - //power_point_product_masks - double pppm_t[4] = {1,1,1,1}; - double rtp_pipeline_coe; - double num_units = 4.0; - if (is_tdp) - { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->power = rnu->power + corepipe->power*pppm_t; - power = power + rnu->power; - } - } - - if (ifu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe->power*pppm_t; -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; -// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; - power = power + ifu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (lsu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - lsu->power = lsu->power + corepipe->power*pppm_t; -// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + lsu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (exu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - exu->power = exu->power + corepipe->power*pppm_t; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + exu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (mmu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - mmu->power = mmu->power + corepipe->power*pppm_t; -// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + mmu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } +MemManU ::~MemManU() { - power = power + undiffCore->power; + if (!exist) return; + if (itlb) { + delete itlb; + itlb = NULL; + } + if (dtlb) { + delete dtlb; + dtlb = NULL; + } +} - if (XML->sys.Private_L2) - { +RegFU ::~RegFU() { - l2cache->computeEnergy(is_tdp); - set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1); - //l2cache->power = l2cache->power*pppm_t; - power = power + l2cache->power*pppm_t; - } - } - else - { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t; - - rt_power = rt_power + rnu->rt_power; - } - } - else - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - } + if (!exist) return; + if (IRF) { + delete IRF; + IRF = NULL; + } + if (FRF) { + delete FRF; + FRF = NULL; + } + if (RFWIN) { + delete RFWIN; + RFWIN = NULL; + } +} - if (ifu->exist) - { - ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + ifu->rt_power ; - } - if (lsu->exist) - { - lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + lsu->rt_power; - } - if (exu->exist) - { - exu->rt_power = exu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + exu->rt_power; - } - if (mmu->exist) - { - mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + mmu->rt_power ; - } +SchedulerU ::~SchedulerU() { - rt_power = rt_power + undiffCore->power; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - if (XML->sys.Private_L2) - { + if (!exist) return; + if (int_inst_window) { + delete int_inst_window; + int_inst_window = NULL; + } + if (fp_inst_window) { + delete int_inst_window; + int_inst_window = NULL; + } + if (ROB) { + delete ROB; + ROB = NULL; + } + if (int_instruction_selection) { + delete int_instruction_selection; + int_instruction_selection = NULL; + } + if (fp_instruction_selection) { + delete fp_instruction_selection; + fp_instruction_selection = NULL; + } +} - l2cache->computeEnergy(is_tdp); - //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); - //l2cache->rt_power = l2cache->rt_power*pppm_t; - rt_power = rt_power + l2cache->rt_power; - } - } +EXECU ::~EXECU() { + if (!exist) return; + if (int_bypass) { + delete int_bypass; + int_bypass = NULL; + } + if (intTagBypass) { + delete intTagBypass; + intTagBypass = NULL; + } + if (int_mul_bypass) { + delete int_mul_bypass; + int_mul_bypass = NULL; + } + if (intTag_mul_Bypass) { + delete intTag_mul_Bypass; + intTag_mul_Bypass = NULL; + } + if (fp_bypass) { + delete fp_bypass; + fp_bypass = NULL; + } + if (fpTagBypass) { + delete fpTagBypass; + fpTagBypass = NULL; + } + if (fp_u) { + delete fp_u; + fp_u = NULL; + } + if (exeu) { + delete exeu; + exeu = NULL; + } + if (mul) { + delete mul; + mul = NULL; + } + if (rfu) { + delete rfu; + rfu = NULL; + } + if (scheu) { + delete scheu; + scheu = NULL; + } } -void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - if (is_tdp) - { - cout << "Core:" << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout<<endl; - if (ifu->exist) - { - cout << indent_str << "Instruction Fetch Unit:" << endl; - cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel >2){ - ifu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { - cout << indent_str<< "Renaming Unit:" << endl; - cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel >2){ - rnu->displayEnergy(indent+4,plevel,is_tdp); - } - } +Core::~Core() { - } - if (lsu->exist) - { - cout << indent_str<< "Load Store Unit:" << endl; - cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel >2){ - lsu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (mmu->exist) - { - cout << indent_str<< "Memory Management Unit:" << endl; - cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel >2){ - mmu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (exu->exist) - { - cout << indent_str<< "Execution Unit:" << endl; - cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <<endl; - if (plevel >2){ - exu->displayEnergy(indent+4,plevel,is_tdp); - } - } -// if (plevel >2) -// { -// if (undiffCore->exist) -// { -// cout << indent_str << "Undifferentiated Core" << endl; -// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl; -// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl; -//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl; -// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl; -// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; -// cout <<endl; -// } -// } - if (XML->sys.Private_L2) - { - - l2cache->displayEnergy(4,is_tdp); - } - - } - else - { -// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl; - } -} -InstFetchU ::~InstFetchU(){ - - if (!exist) return; - if(IB) {delete IB; IB = 0;} - if(ID_inst) {delete ID_inst; ID_inst = 0;} - if(ID_operand) {delete ID_operand; ID_operand = 0;} - if(ID_misc) {delete ID_misc; ID_misc = 0;} - if (coredynp.predictionW>0) - { - if(BTB) {delete BTB; BTB = 0;} - if(BPT) {delete BPT; BPT = 0;} - } + if (ifu) { + delete ifu; + ifu = NULL; + } + if (lsu) { + delete lsu; + lsu = NULL; + } + if (rnu) { + delete rnu; + rnu = NULL; + } + if (mmu) { + delete mmu; + mmu = NULL; + } + if (exu) { + delete exu; + exu = NULL; + } + if (corepipe) { + delete corepipe; + corepipe = NULL; + } + if (undiffCore) { + delete undiffCore; + undiffCore = NULL; + } + if (l2cache) { + delete l2cache; + l2cache = NULL; + } } -BranchPredictor ::~BranchPredictor(){ +void Core::initialize_params() { + memset(&core_params, 0, sizeof(CoreParameters)); + core_params.peak_issueW = -1; + core_params.peak_commitW = -1; +} - if (!exist) return; - if(globalBPT) {delete globalBPT; globalBPT = 0;} - if(localBPT) {delete localBPT; localBPT = 0;} - if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;} - if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;} - if(chooser) {delete chooser; chooser = 0;} - if(RAS) {delete RAS; RAS = 0;} - } +void Core::initialize_stats() { + memset(&core_stats, 0, sizeof(CoreStatistics)); + core_stats.IFU_duty_cycle = 1.0; + core_stats.ALU_duty_cycle = 1.0; + core_stats.FPU_duty_cycle = 1.0; + core_stats.MUL_duty_cycle = 1.0; + core_stats.ALU_cdb_duty_cycle = 1.0; + core_stats.FPU_cdb_duty_cycle = 1.0; + core_stats.MUL_cdb_duty_cycle = 1.0; + core_stats.pipeline_duty_cycle = 1.0; + core_stats.IFU_duty_cycle = 1.0; + core_stats.LSU_duty_cycle = 1.0; + core_stats.MemManU_D_duty_cycle = 1.0; + core_stats.MemManU_I_duty_cycle = 1.0; +} -RENAMINGU ::~RENAMINGU(){ - - if (!exist) return; - if(iFRAT ) {delete iFRAT; iFRAT = 0;} - if(fFRAT ) {delete fFRAT; fFRAT =0;} - if(iRRAT) {delete iRRAT; iRRAT = 0;} - if(iFRAT) {delete iFRAT; iFRAT = 0;} - if(ifreeL) {delete ifreeL;ifreeL= 0;} - if(ffreeL) {delete ffreeL;ffreeL= 0;} - if(idcl) {delete idcl; idcl = 0;} - if(fdcl) {delete fdcl; fdcl = 0;} - if(RAHT) {delete RAHT; RAHT = 0;} +void Core::set_core_param() { + initialize_params(); + initialize_stats(); + + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_STR_IF("name", name); + ASSIGN_INT_IF("opt_local", core_params.opt_local); + ASSIGN_FP_IF("clock_rate", core_params.clockRate); + ASSIGN_INT_IF("instruction_length", core_params.instruction_length); + ASSIGN_INT_IF("opcode_width", core_params.opcode_width); + ASSIGN_INT_IF("x86", core_params.x86); + ASSIGN_INT_IF("Embedded", core_params.Embedded); + ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type); + ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length); + ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads); + ASSIGN_INT_IF("fetch_width", core_params.fetchW); + ASSIGN_INT_IF("decode_width", core_params.decodeW); + ASSIGN_INT_IF("issue_width", core_params.issueW); + ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW); + ASSIGN_INT_IF("commit_width", core_params.commitW); + ASSIGN_INT_IF("prediction_width", core_params.predictionW); + ASSIGN_INT_IF("ALU_per_core", core_params.num_alus); + ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus); + ASSIGN_INT_IF("MUL_per_core", core_params.num_muls); + ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW); + ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty, + Scheduler_type); + ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type); + ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size); + ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size); + ASSIGN_INT_IF("ROB_size", core_params.ROB_size); + ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc); + ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks); + ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width); + ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc); + ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks); + ASSIGN_INT_IF("register_window_size", + core_params.register_window_size); + ASSIGN_INT_IF("register_window_throughput", + core_params.register_window_throughput); + ASSIGN_INT_IF("register_window_latency", + core_params.register_window_latency); + ASSIGN_INT_IF("register_window_assoc", + core_params.register_window_assoc); + ASSIGN_INT_IF("register_window_nbanks", + core_params.register_window_nbanks); + ASSIGN_INT_IF("register_window_tag_width", + core_params.register_window_tag_width); + ASSIGN_INT_IF("register_window_rw_ports", + core_params.register_window_rw_ports); + ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size); + ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc); + ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks); + ASSIGN_INT_IF("phy_Regs_IRF_tag_width", + core_params.phy_Regs_IRF_tag_width); + ASSIGN_INT_IF("phy_Regs_IRF_rd_ports", + core_params.phy_Regs_IRF_rd_ports); + ASSIGN_INT_IF("phy_Regs_IRF_wr_ports", + core_params.phy_Regs_IRF_wr_ports); + ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size); + ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc); + ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks); + ASSIGN_INT_IF("phy_Regs_FRF_tag_width", + core_params.phy_Regs_FRF_tag_width); + ASSIGN_INT_IF("phy_Regs_FRF_rd_ports", + core_params.phy_Regs_FRF_rd_ports); + ASSIGN_INT_IF("phy_Regs_FRF_wr_ports", + core_params.phy_Regs_FRF_wr_ports); + ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks); + ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports); + ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks); + ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports); + ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks); + ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports); + ASSIGN_INT_IF("memory_ports", core_params.memory_ports); + ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size); + ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc); + ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks); + ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size); + ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc); + ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks); + ASSIGN_INT_IF("instruction_window_size", + core_params.instruction_window_size); + ASSIGN_INT_IF("fp_instruction_window_size", + core_params.fp_instruction_window_size); + ASSIGN_INT_IF("instruction_buffer_size", + core_params.instruction_buffer_size); + ASSIGN_INT_IF("instruction_buffer_assoc", + core_params.instruction_buffer_assoc); + ASSIGN_INT_IF("instruction_buffer_nbanks", + core_params.instruction_buffer_nbanks); + ASSIGN_INT_IF("instruction_buffer_tag_width", + core_params.instruction_buffer_tag_width); + ASSIGN_INT_IF("number_instruction_fetch_ports", + core_params.number_instruction_fetch_ports); + ASSIGN_INT_IF("RAS_size", core_params.RAS_size); + ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt, + Wire_type); + ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type); + ASSIGN_INT_IF("execu_int_bypass_ports", + core_params.execu_int_bypass_ports); + ASSIGN_INT_IF("execu_mul_bypass_ports", + core_params.execu_mul_bypass_ports); + ASSIGN_INT_IF("execu_fp_bypass_ports", + core_params.execu_fp_bypass_ports); + ASSIGN_ENUM_IF("execu_bypass_wire_type", + core_params.execu_bypass_wire_type, Wire_type); + ASSIGN_FP_IF("execu_bypass_base_width", + core_params.execu_bypass_base_width); + ASSIGN_FP_IF("execu_bypass_base_height", + core_params.execu_bypass_base_height); + ASSIGN_INT_IF("execu_bypass_start_wiring_level", + core_params.execu_bypass_start_wiring_level); + ASSIGN_FP_IF("execu_bypass_route_over_perc", + core_params.execu_bypass_route_over_perc); + ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator); + ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages); + ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages); + ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines); + ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines); + ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint); + ASSIGN_INT_IF("perThreadState", core_params.perThreadState); + ASSIGN_INT_IF("instruction_length", core_params.instruction_length); + + else { + warnUnrecognizedParam(node_name); } + } -LoadStoreU ::~LoadStoreU(){ - - if (!exist) return; - if(LSQ) {delete LSQ; LSQ = 0;} + // Change from MHz to Hz + core_params.clockRate *= 1e6; + clockRate = core_params.clockRate; + + core_params.peak_commitW = core_params.peak_issueW; + core_params.fp_decodeW = core_params.fp_issueW; + + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle); + ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle); + ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle); + ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle); + ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle); + ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle); + ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle); + ASSIGN_FP_IF("total_cycles", core_stats.total_cycles); + ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles); + ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles); + ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle); + ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle); + ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle); + ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle); + ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle); + ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses); + ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses); + ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses); + ASSIGN_FP_IF("function_calls", core_stats.function_calls); + ASSIGN_FP_IF("total_instructions", core_stats.total_instructions); + ASSIGN_FP_IF("int_instructions", core_stats.int_instructions); + ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions); + ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions); + ASSIGN_FP_IF("branch_mispredictions", + core_stats.branch_mispredictions); + ASSIGN_FP_IF("load_instructions", core_stats.load_instructions); + ASSIGN_FP_IF("store_instructions", core_stats.store_instructions); + ASSIGN_FP_IF("committed_instructions", + core_stats.committed_instructions); + ASSIGN_FP_IF("committed_int_instructions", + core_stats.committed_int_instructions); + ASSIGN_FP_IF("committed_fp_instructions", + core_stats.committed_fp_instructions); + ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads); + ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes); + ASSIGN_FP_IF("rename_reads", core_stats.rename_reads); + ASSIGN_FP_IF("rename_writes", core_stats.rename_writes); + ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads); + ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes); + ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads); + ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes); + ASSIGN_FP_IF("inst_window_wakeup_accesses", + core_stats.inst_window_wakeup_accesses); + ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads); + ASSIGN_FP_IF("fp_inst_window_writes", + core_stats.fp_inst_window_writes); + ASSIGN_FP_IF("fp_inst_window_wakeup_accesses", + core_stats.fp_inst_window_wakeup_accesses); + ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads); + ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads); + ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes); + ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes); + ASSIGN_FP_IF("context_switches", core_stats.context_switches); + ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses); + ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses); + ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses); + + else { + warnUnrecognizedStat(node_name); } + } -MemManU ::~MemManU(){ + // Initialize a few variables + core_params.multithreaded = core_params.num_hthreads > 1 ? true : false; + core_params.pc_width = virtual_address_width; + core_params.v_address_width = virtual_address_width; + core_params.p_address_width = physical_address_width; + core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32; + core_params.fp_data_width = core_params.int_data_width; + core_params.arch_ireg_width = + int(ceil(log2(core_params.archi_Regs_IRF_size))); + core_params.arch_freg_width + = int(ceil(log2(core_params.archi_Regs_FRF_size))); + core_params.num_IRF_entry = core_params.archi_Regs_IRF_size; + core_params.num_FRF_entry = core_params.archi_Regs_FRF_size; + + if (core_params.instruction_length <= 0) { + errorNonPositiveParam("instruction_length"); + } - if (!exist) return; - if(itlb) {delete itlb; itlb = 0;} - if(dtlb) {delete dtlb; dtlb = 0;} - } + if (core_params.num_hthreads <= 0) { + errorNonPositiveParam("number_hardware_threads"); + } -RegFU ::~RegFU(){ + if (core_params.opcode_width <= 0) { + errorNonPositiveParam("opcode_width"); + } - if (!exist) return; - if(IRF) {delete IRF; IRF = 0;} - if(FRF) {delete FRF; FRF = 0;} - if(RFWIN) {delete RFWIN; RFWIN = 0;} - } + if (core_params.instruction_buffer_size <= 0) { + errorNonPositiveParam("instruction_buffer_size"); + } -SchedulerU ::~SchedulerU(){ + if (core_params.number_instruction_fetch_ports <= 0) { + errorNonPositiveParam("number_instruction_fetch_ports"); + } - if (!exist) return; - if(int_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(ROB) {delete ROB; ROB = 0;} - if(instruction_selection) {delete instruction_selection;instruction_selection = 0;} - } + if (core_params.peak_issueW <= 0) { + errorNonPositiveParam("peak_issue_width"); + } else { + assert(core_params.peak_commitW > 0); + } -EXECU ::~EXECU(){ - - if (!exist) return; - if(int_bypass) {delete int_bypass; int_bypass = 0;} - if(intTagBypass) {delete intTagBypass; intTagBypass =0;} - if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;} - if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;} - if(fp_bypass) {delete fp_bypass;fp_bypass = 0;} - if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;} - if(fp_u) {delete fp_u;fp_u = 0;} - if(exeu) {delete exeu;exeu = 0;} - if(mul) {delete mul;mul = 0;} - if(rfu) {delete rfu;rfu = 0;} - if(scheu) {delete scheu; scheu = 0;} + if (core_params.core_ty == OOO) { + if (core_params.scheu_ty == PhysicalRegFile) { + core_params.phy_ireg_width = + int(ceil(log2(core_params.phy_Regs_IRF_size))); + core_params.phy_freg_width = + int(ceil(log2(core_params.phy_Regs_FRF_size))); + core_params.num_ifreelist_entries = + core_params.num_IRF_entry = core_params.phy_Regs_IRF_size; + core_params.num_ffreelist_entries = + core_params.num_FRF_entry = core_params.phy_Regs_FRF_size; + } else if (core_params.scheu_ty == ReservationStation) { + core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size))); + core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size))); + core_params.num_ifreelist_entries = core_params.ROB_size; + core_params.num_ffreelist_entries = core_params.ROB_size; } + } -Core ::~Core(){ + core_params.regWindowing = + (core_params.register_window_size > 0 && + core_params.core_ty == Inorder) ? true : false; - if(ifu) {delete ifu; ifu = 0;} - if(lsu) {delete lsu; lsu = 0;} - if(rnu) {delete rnu; rnu = 0;} - if(mmu) {delete mmu; mmu = 0;} - if(exu) {delete exu; exu = 0;} - if(corepipe) {delete corepipe; corepipe = 0;} - if(undiffCore) {delete undiffCore;undiffCore = 0;} - if(l2cache) {delete l2cache;l2cache = 0;} + if (core_params.regWindowing) { + if (core_params.register_window_throughput <= 0) { + errorNonPositiveParam("register_window_throughput"); + } else if (core_params.register_window_latency <= 0) { + errorNonPositiveParam("register_window_latency"); } + } -void Core::set_core_param() -{ - coredynp.opt_local = XML->sys.core[ithCore].opt_local; - coredynp.x86 = XML->sys.core[ithCore].x86; - coredynp.Embedded = XML->sys.Embedded; - coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; - coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; - coredynp.fetchW = XML->sys.core[ithCore].fetch_width; - coredynp.decodeW = XML->sys.core[ithCore].decode_width; - coredynp.issueW = XML->sys.core[ithCore].issue_width; - coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; - coredynp.commitW = XML->sys.core[ithCore].commit_width; - coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; - coredynp.predictionW = XML->sys.core[ithCore].prediction_width; - coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; - coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; - coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; - coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; - coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; - - - coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; - coredynp.multithreaded = coredynp.num_hthreads>1? true:false; - coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; - coredynp.pc_width = XML->sys.virtual_address_width; - - coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; - coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; - coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; - coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; - coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; - coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; - coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32; - coredynp.fp_data_width = coredynp.int_data_width; - coredynp.v_address_width = XML->sys.virtual_address_width; - coredynp.p_address_width = XML->sys.physical_address_width; - - coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; - coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); - coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); - coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; - coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; - coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; - coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; - coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; - coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; - - //Max power duty cycle for peak power estimation -// if (coredynp.core_ty==OOO) -// { -// coredynp.IFU_duty_cycle = 1; -// coredynp.LSU_duty_cycle = 1; -// coredynp.MemManU_I_duty_cycle =1; -// coredynp.MemManU_D_duty_cycle =1; -// coredynp.ALU_duty_cycle =1; -// coredynp.MUL_duty_cycle =1; -// coredynp.FPU_duty_cycle =1; -// coredynp.ALU_cdb_duty_cycle =1; -// coredynp.MUL_cdb_duty_cycle =1; -// coredynp.FPU_cdb_duty_cycle =1; -// } -// else -// { - coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; - coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; - coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; - coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; - coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; - coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; - coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; - coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; - coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; - coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; - coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; -// } - - - if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder))) - { - cout<<"Invalid Core Type"<<endl; - exit(0); - } -// if (coredynp.core_ty==OOO) -// { -// cout<<"OOO processor models are being updated and will be available in next release"<<endl; -// exit(0); -// } - if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation))) - { - cout<<"Invalid OOO Scheduler Type"<<endl; - exit(0); - } + set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads, + core_params.num_hthreads, 0); - if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased))) - { - cout<<"Invalid OOO Renaming Type"<<endl; - exit(0); - } + if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) { + cout << "Invalid Core Type" << endl; + exit(0); + } -if (coredynp.core_ty==OOO) -{ - if (coredynp.scheu_ty==PhysicalRegFile) - { - coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); - coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size; - coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size; - } - else if (coredynp.scheu_ty==ReservationStation) - {//ROB serves as Phy RF in RS based OOO - coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; - coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; + if (!((core_params.scheu_ty == PhysicalRegFile) || + (core_params.scheu_ty == ReservationStation))) { + cout << "Invalid OOO Scheduler Type" << endl; + exit(0); + } - } + if (!((core_params.rm_ty == RAMbased) || + (core_params.rm_ty == CAMbased))) { + cout << "Invalid OOO Renaming Type" << endl; + exit(0); + } } - coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference. - coredynp.perThreadState = 8; - coredynp.instruction_length = 32; - coredynp.clockRate = XML->sys.core[ithCore].clock_rate; - coredynp.clockRate *= 1e6; - coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false; - coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate; - set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0); -} |