/***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #include #include #include #include #include #include #include "basic_circuit.h" #include "basic_components.h" #include "common.h" #include "const.h" #include "core.h" #include "io.h" #include "parameter.h" int RegFU::RFWIN_ACCESS_MULTIPLIER = 16; // The five bits are: busy, Issued, Finished, speculative, valid int SchedulerU::ROB_STATUS_BITS = 5; InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL), BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; int idx, tag, data, size, line, assoc, banks; bool is_default = true; clockRate = core_params.clockRate; name = "Instruction Fetch Unit"; // Check if there is an icache child: int i; icache = NULL; for( i = 0; i < xml_data->nChildNode("component"); i++ ) { XMLNode* childXML = xml_data->getChildNodePtr("component", &i); XMLCSTR type = childXML->getAttribute("type"); if (!type) warnMissingComponentType(childXML->getAttribute("id")); STRCMP(type, "CacheUnit") { XMLCSTR name = childXML->getAttribute("name"); if (strcmp(name, "Instruction Cache") == 0 || strcmp(name, "icache") == 0) { icache = new CacheUnit(childXML, &interface_ip); children.push_back(icache); } } } set_params_stats(); //Instruction buffer data = core_params.instruction_length * core_params.peak_issueW; line = int(ceil(data / BITS_PER_BYTE)); size = core_params.num_hthreads * core_params.instruction_buffer_size * line; if (size < MIN_BUFFER_SIZE) { size = MIN_BUFFER_SIZE; } interface_ip.cache_sz = size; interface_ip.line_sz = line; interface_ip.assoc = core_params.instruction_buffer_assoc; interface_ip.nbanks = core_params.instruction_buffer_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0; interface_ip.tag_w = core_params.instruction_buffer_tag_width; interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_ram = true; interface_ip.pure_cam = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer", Core_device, clockRate, core_params.opt_local, core_params.core_ty); IB->area.set_area(IB->area.get_area() + IB->local_result.area); area.set_area(area.get_area() + IB->local_result.area); if (core_params.predictionW > 0) { /* * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged * It is only a cache without all the buffers in the cache controller since it is more like a * look up table than a cache with cache controller. When access miss, no load from other places * such as main memory (not actively fill the misses), it is passively updated under two circumstances: * 1) when BPT@ID stage finds out current is a taken branch while BTB missed * 2) When BPT@ID stage predicts differently than BTB * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) * 4) when EXEU find out wrong target has been provided from BTB. * */ size = inst_fetch_params.btb_size; line = inst_fetch_params.btb_block_size; assoc = inst_fetch_params.btb_assoc; banks = inst_fetch_params.btb_num_banks; idx = int(ceil(log2(size / line / assoc))); tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; interface_ip.cache_sz = size; interface_ip.line_sz = line; interface_ip.assoc = assoc; interface_ip.nbanks = banks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = core_params.predictionW; interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = true; interface_ip.pure_ram = false; interface_ip.pure_cam = false; interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate; interface_ip.latency = inst_fetch_params.btb_latency / clockRate; BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + BTB->local_result.area); BPT = new BranchPredictor(xml_data, &interface_ip, core_params, core_stats); area.set_area(area.get_area() + BPT->area.get_area()); } ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder", is_default, &interface_ip, core_params.opcode_width, core_params.decodeW, core_params.x86, clockRate, Core_device, core_params.core_ty); ID_operand = new InstructionDecoder(xml_data, "Instruction Operand Decoder", is_default, &interface_ip, core_params.arch_ireg_width, core_params.decodeW, core_params.x86, clockRate, Core_device, core_params.core_ty); ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder", is_default, &interface_ip, core_params.micro_opcode_length, core_params.decodeW, core_params.x86, clockRate, Core_device, core_params.core_ty); area.set_area(area.get_area()+ (ID_inst->area.get_area() + ID_operand->area.get_area() + ID_misc->area.get_area()) * core_params.decodeW); } void InstFetchU::set_params_stats() { int num_children = xml_data->nChildNode("component"); int i; memset(&inst_fetch_params,0,sizeof(InstFetchParameters)); for (i = 0; i < num_children; i++) { XMLNode* child = xml_data->getChildNodePtr("component", &i); XMLCSTR type = child->getAttribute("type"); if (!type) warnMissingComponentType(child->getAttribute("id")); STRCMP(type, "BranchTargetBuffer") { int sub_num_children = child->nChildNode("param"); int j; for (j = 0; j < sub_num_children; j++) { XMLNode* paramNode = child->getChildNodePtr("param", &j); XMLCSTR node_name = paramNode->getAttribute("name"); XMLCSTR value = paramNode->getAttribute("value"); if (!node_name) warnMissingParamName(paramNode->getAttribute("id")); ASSIGN_INT_IF("size", inst_fetch_params.btb_size); ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size); ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc); ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks); ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency); ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput); ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports); else { warnUnrecognizedParam(node_name); } } sub_num_children = child->nChildNode("stat"); for (j = 0; j < sub_num_children; j++) { XMLNode* statNode = child->getChildNodePtr("stat", &j); XMLCSTR node_name = statNode->getAttribute("name"); XMLCSTR value = statNode->getAttribute("value"); if (!node_name) warnMissingStatName(statNode->getAttribute("id")); ASSIGN_FP_IF("read_accesses", inst_fetch_stats.btb_read_accesses); ASSIGN_FP_IF("write_accesses", inst_fetch_stats.btb_write_accesses); else { warnUnrecognizedStat(node_name); } } } } // Parameter sanity check if (inst_fetch_params.btb_size <= 0) { errorNonPositiveParam("size"); } if (inst_fetch_params.btb_block_size <= 0) { errorNonPositiveParam("block_size"); } if (inst_fetch_params.btb_assoc <= 0) { errorNonPositiveParam("assoc"); } if (inst_fetch_params.btb_num_banks <= 0) { errorNonPositiveParam("num_banks"); } } BranchPredictor::BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL), L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; int tag; int data; int size; clockRate = core_params.clockRate; name = "Branch Predictor"; // Common interface parameters for the branch predictor structures interface_ip.pure_cam = false; if (core_params.multithreaded) { tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS); interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.is_cache = true; interface_ip.pure_ram = false; } else { interface_ip.specific_tag = 0; interface_ip.tag_w = 0; interface_ip.is_cache = false; interface_ip.pure_ram = true; } // Parse params and stats from XML set_params_stats(); // Common interface parameters for the branch predictor structures interface_ip.assoc = branch_pred_params.assoc; interface_ip.nbanks = branch_pred_params.nbanks; //Global predictor data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE)); size = data * branch_pred_params.global_predictor_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.predictionW; interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + globalBPT->local_result.area); //Local BPT (Level 1) data = int(ceil(branch_pred_params.local_l1_predictor_size / BITS_PER_BYTE)); size = data * branch_pred_params.local_predictor_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.predictionW; interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; L1_localBPT = new ArrayST(xml_data, &interface_ip, "Local Predictor, Level 1", Core_device, clockRate, core_params.opt_local, core_params.core_ty); L1_localBPT->area.set_area(L1_localBPT->area.get_area() + L1_localBPT->local_result.area); area.set_area(area.get_area()+ L1_localBPT->local_result.area); //Local BPT (Level 2) data = int(ceil(branch_pred_params.local_l2_predictor_size / BITS_PER_BYTE)); size = data * branch_pred_params.local_predictor_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.predictionW; interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; L2_localBPT = new ArrayST(xml_data, &interface_ip, "Local Predictor, Level 2", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + L2_localBPT->local_result.area); //Chooser data = int(ceil(branch_pred_params.chooser_predictor_bits / BITS_PER_BYTE)); size = data * branch_pred_params.chooser_predictor_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.predictionW; interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + chooser->local_result.area); //RAS return address stacks are Duplicated for each thread. data = int(ceil(core_params.pc_width / BITS_PER_BYTE)); size = data * core_params.RAS_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.predictionW; interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate, core_params.opt_local, core_params.core_ty); RAS->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + RAS->local_result.area * core_params.num_hthreads); } void BranchPredictor::set_params_stats() { int num_children = xml_data->nChildNode("component"); int i; for (i = 0; i < num_children; i++) { XMLNode* child = xml_data->getChildNodePtr("component", &i); XMLCSTR type = child->getAttribute("type"); if (!type) warnMissingComponentType(child->getAttribute("id")); STRCMP(type, "BranchPredictor") { int sub_num_children = child->nChildNode("param"); int j; for (j = 0; j < sub_num_children; j++) { XMLNode* paramNode = child->getChildNodePtr("param", &j); XMLCSTR node_name = paramNode->getAttribute("name"); XMLCSTR value = paramNode->getAttribute("value"); if (!node_name) warnMissingParamName(paramNode->getAttribute("id")); ASSIGN_INT_IF("assoc", branch_pred_params.assoc); ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks); ASSIGN_INT_IF("local_l1_predictor_size", branch_pred_params.local_l1_predictor_size); ASSIGN_INT_IF("local_l2_predictor_size", branch_pred_params.local_l2_predictor_size); ASSIGN_INT_IF("local_predictor_entries", branch_pred_params.local_predictor_entries); ASSIGN_INT_IF("global_predictor_entries", branch_pred_params.global_predictor_entries); ASSIGN_INT_IF("global_predictor_bits", branch_pred_params.global_predictor_bits); ASSIGN_INT_IF("chooser_predictor_entries", branch_pred_params.chooser_predictor_entries); ASSIGN_INT_IF("chooser_predictor_bits", branch_pred_params.chooser_predictor_bits); else { warnUnrecognizedParam(node_name); } } // The core reads in the number of branches and the number of // function calls and these values are passed through the // core_stats variable, so we don't need to read them in here } } } SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), int_inst_window(NULL), fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL), fp_instruction_selection(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; int tag; int data; int size; int line; bool is_default = true; string tmp_name; clockRate = core_params.clockRate; name = "Instruction Scheduler"; if ((core_params.core_ty == Inorder && core_params.multithreaded)) { //Instruction issue queue, in-order multi-issue or multithreaded //processor also has this structure. Unified window for Inorder //processors //This tag width is the normal thread state bits based on //Niagara Design tag = int(log2(core_params.num_hthreads) * core_params.perThreadState); data = core_params.instruction_length; line = int(ceil(data / BITS_PER_BYTE)); size = core_params.instruction_window_size * line; if (size < MIN_BUFFER_SIZE) { size = MIN_BUFFER_SIZE; } //NOTE: x86 inst can be very lengthy, up to 15B. //Source: Intel® 64 and IA-32 Architectures //Software Developer’s Manual interface_ip.cache_sz = size; interface_ip.line_sz = line; interface_ip.assoc = core_params.scheduler_assoc; interface_ip.nbanks = core_params.scheduler_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.peak_issueW; interface_ip.num_wr_ports = core_params.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.peak_issueW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; int_inst_window = new ArrayST(xml_data, &interface_ip, "InstFetchQueue", Core_device, clockRate, core_params.opt_local, core_params.core_ty); int_inst_window->output_data.area *= core_params.num_pipelines; area.set_area(area.get_area() + int_inst_window->local_result.area * core_params.num_pipelines); Iw_height = int_inst_window->local_result.cache_ht; /* * selection logic * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who * at the issue stage. */ int_instruction_selection = new selection_logic(xml_data, is_default, core_params.instruction_window_size, core_params.peak_issueW * core_params.num_hthreads, &interface_ip, "Int Instruction Selection Logic", core_stats.inst_window_wakeup_accesses, clockRate, Core_device, core_params.core_ty); if (core_params.fp_instruction_window_size > 0) { fp_instruction_selection = new selection_logic(xml_data, is_default, core_params.fp_instruction_window_size, core_params.fp_issueW * core_params.num_hthreads, &interface_ip, "FP Instruction Selection Logic", core_stats.fp_inst_window_wakeup_accesses, clockRate, Core_device, core_params.core_ty); } } if (core_params.core_ty == OOO) { /* * CAM based instruction window * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored * It is written once and read twice(two operands) before an instruction can be issued. * X86 instruction can be very long up to 15B. add instruction length in XML */ if (core_params.scheu_ty == PhysicalRegFile) { tag = core_params.phy_ireg_width; data = int((ceil((core_params.instruction_length + NUM_SOURCE_OPERANDS * (core_params.phy_ireg_width - core_params.arch_ireg_width)) / (double)NUM_SOURCE_OPERANDS) / BITS_PER_BYTE)); tmp_name = "Integer Instruction Window"; } else { tag = core_params.phy_ireg_width; data = int(ceil(((core_params.instruction_length + NUM_SOURCE_OPERANDS * (core_params.phy_ireg_width - core_params.arch_ireg_width) + 2 * core_params.int_data_width) / (double)NUM_SOURCE_OPERANDS) / BITS_PER_BYTE)); tmp_name = "Integer Reservation Station"; } size = data * core_params.instruction_window_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = core_params.scheduler_assoc; interface_ip.nbanks = core_params.scheduler_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.peak_issueW; interface_ip.num_wr_ports = core_params.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.peak_issueW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate; interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate; int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, clockRate, core_params.opt_local, core_params.core_ty); int_inst_window->output_data.area *= core_params.num_pipelines; area.set_area(area.get_area() + int_inst_window->local_result.area * core_params.num_pipelines); Iw_height = int_inst_window->local_result.cache_ht; //FU inst window if (core_params.scheu_ty == PhysicalRegFile) { tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width; data = int(ceil((core_params.instruction_length + NUM_SOURCE_OPERANDS * (core_params.phy_freg_width - core_params.arch_freg_width)) / BITS_PER_BYTE)); tmp_name = "FP Instruction Window"; } else { tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width; data = int(ceil((core_params.instruction_length + NUM_SOURCE_OPERANDS * (core_params.phy_freg_width - core_params.arch_freg_width) + NUM_SOURCE_OPERANDS * core_params.fp_data_width) / BITS_PER_BYTE)); tmp_name = "FP Reservation Station"; } size = data * core_params.fp_instruction_window_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = core_params.scheduler_assoc; interface_ip.nbanks = core_params.scheduler_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.fp_issueW; interface_ip.num_wr_ports = core_params.fp_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.fp_issueW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; fp_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, clockRate, core_params.opt_local, core_params.core_ty); fp_inst_window->output_data.area *= core_params.num_fp_pipelines; area.set_area(area.get_area() + fp_inst_window->local_result.area *core_params.num_fp_pipelines); fp_Iw_height = fp_inst_window->local_result.cache_ht; if (core_params.ROB_size > 0) { /* * if ROB_size = 0, then the target processor does not support hardware-based * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which * means branch must be resolved before instruction issued into instruction window, since * there is no change to flush miss-predict branch path after instructions are issued in this situation. * * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. * However, this approach is abandoned due to its high power and poor scalablility. * McPAT uses current implementation of ROB as circular buffer. * ROB is written once when instruction is issued and read once when the instruction is committed. * */ int robExtra = int(ceil(ROB_STATUS_BITS + log2(core_params.num_hthreads))); if (core_params.scheu_ty == PhysicalRegFile) { //PC is to id the instruction for recover exception. //inst is used to map the renamed dest. registers. so that //commit stage can know which reg/RRAT to update data = int(ceil((robExtra + core_params.pc_width + core_params.phy_ireg_width) / BITS_PER_BYTE)); } else { //in RS based OOO, ROB also contains value of destination reg data = int(ceil((robExtra + core_params.pc_width + core_params.phy_ireg_width + core_params.fp_data_width) / BITS_PER_BYTE)); } interface_ip.cache_sz = data * core_params.ROB_size; interface_ip.line_sz = data; interface_ip.assoc = core_params.ROB_assoc; interface_ip.nbanks = core_params.ROB_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = core_params.ROB_tag_width > 0; interface_ip.tag_w = core_params.ROB_tag_width; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.peak_commitW; interface_ip.num_wr_ports = core_params.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer", Core_device, clockRate, core_params.opt_local, core_params.core_ty); ROB->output_data.area *= core_params.num_pipelines; area.set_area(area.get_area() + ROB->local_result.area * core_params.num_pipelines); ROB_height = ROB->local_result.cache_ht; } int_instruction_selection = new selection_logic(xml_data, is_default, core_params.instruction_window_size, core_params.peak_issueW, &interface_ip, "Int Instruction Selection Logic", core_stats.inst_window_wakeup_accesses, clockRate, Core_device, core_params.core_ty); if (core_params.fp_instruction_window_size > 0) { fp_instruction_selection = new selection_logic(xml_data, is_default, core_params.fp_instruction_window_size, core_params.fp_issueW, &interface_ip, "FP Instruction Selection Logic", core_stats.fp_inst_window_wakeup_accesses, clockRate, Core_device, core_params.core_ty); } } } LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; int tag; int line; int size; int ldst_opcode = core_params.opcode_width; clockRate = core_params.clockRate; name = "Load/Store Unit"; // Check if there is a dcache child: int i; dcache = NULL; for( i = 0; i < xml_data->nChildNode("component"); i++ ) { XMLNode* childXML = xml_data->getChildNodePtr("component", &i); XMLCSTR type = childXML->getAttribute("type"); if (!type) warnMissingComponentType(childXML->getAttribute("id")); STRCMP(type, "CacheUnit") { XMLCSTR name = childXML->getAttribute("name"); if (strcmp(name, "Data Cache") == 0 || strcmp(name, "dcache") == 0) { dcache = new CacheUnit(childXML, &interface_ip); children.push_back(dcache); } } } /* * LSU--in-order processors do not have separate load queue: unified lsq * partitioned among threads * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ */ tag = ldst_opcode + virtual_address_width + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; line = int(ceil(data_path_width / BITS_PER_BYTE)); size = core_params.store_buffer_size * line * core_params.num_hthreads; interface_ip.cache_sz = size; interface_ip.line_sz = line; interface_ip.assoc = core_params.store_buffer_assoc; interface_ip.nbanks = core_params.store_buffer_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.memory_ports; interface_ip.num_wr_ports = core_params.memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.memory_ports; interface_ip.is_cache = true; interface_ip.pure_ram = false; interface_ip.pure_cam = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + LSQ->local_result.area); area.set_area(area.get_area()*cdb_overhead); lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead); if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) { tag = ldst_opcode + virtual_address_width + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; line = int(ceil(data_path_width / BITS_PER_BYTE)); size = core_params.load_buffer_size * line * core_params.num_hthreads; interface_ip.cache_sz = size; interface_ip.line_sz = line; interface_ip.assoc = core_params.load_buffer_assoc; interface_ip.nbanks = core_params.load_buffer_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.memory_ports; interface_ip.num_wr_ports = core_params.memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.memory_ports; interface_ip.is_cache = true; interface_ip.pure_ram = false; interface_ip.pure_cam = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device, clockRate, core_params.opt_local, core_params.core_ty); LoadQ->area.set_area(LoadQ->area.get_area() + LoadQ->local_result.area); area.set_area(area.get_area()*cdb_overhead); lsq_height = (LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht) * sqrt(cdb_overhead); } } MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; int tag; int data; int line; clockRate = core_params.clockRate; name = "Memory Management Unit"; set_params_stats(); // These are shared between ITLB and DTLB interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; //Itlb TLBs are partioned among threads according to Nigara and Nehalem tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; data = physical_address_width - int(floor(log2(virtual_memory_page_size))); line = int(ceil(data / BITS_PER_BYTE)); interface_ip.cache_sz = mem_man_params.itlb_number_entries * line; interface_ip.line_sz = line; interface_ip.assoc = mem_man_params.itlb_assoc; interface_ip.nbanks = mem_man_params.itlb_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.number_instruction_fetch_ports; interface_ip.throughput = mem_man_params.itlb_throughput / clockRate; interface_ip.latency = mem_man_params.itlb_latency / clockRate; itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + itlb->local_result.area); //dtlb tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; data = physical_address_width - int(floor(log2(virtual_memory_page_size))); line = int(ceil(data / BITS_PER_BYTE)); interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line; interface_ip.line_sz = line; interface_ip.assoc = mem_man_params.dtlb_assoc; interface_ip.nbanks = mem_man_params.dtlb_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.memory_ports; interface_ip.num_wr_ports = core_params.memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.memory_ports; interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate; interface_ip.latency = mem_man_params.dtlb_latency / clockRate; dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device, clockRate, core_params.opt_local, core_params.core_ty); area.set_area(area.get_area() + dtlb->local_result.area); } void MemManU::set_params_stats() { memset(&mem_man_params, 0, sizeof(MemoryManagementParams)); memset(&mem_man_stats, 0, sizeof(MemoryManagementStats)); int num_children = xml_data->nChildNode("component"); int i; for (i = 0; i < num_children; i++) { XMLNode* child = xml_data->getChildNodePtr("component", &i); XMLCSTR type = child->getAttribute("type"); if (!type) warnMissingComponentType(child->getAttribute("id")); STRCMP(type, "InstructionTLB") { int sub_num_children = child->nChildNode("param"); int j; for (j = 0; j < sub_num_children; j++) { XMLNode* paramNode = child->getChildNodePtr("param", &j); XMLCSTR node_name = paramNode->getAttribute("name"); XMLCSTR value = paramNode->getAttribute("value"); if (!node_name) warnMissingParamName(paramNode->getAttribute("id")); ASSIGN_INT_IF("number_entries", mem_man_params.itlb_number_entries); ASSIGN_FP_IF("latency", mem_man_params.itlb_latency); ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput); ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc); ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks); else { warnUnrecognizedParam(node_name); } } sub_num_children = child->nChildNode("stat"); for (j = 0; j < sub_num_children; j++) { XMLNode* statNode = child->getChildNodePtr("stat", &j); XMLCSTR node_name = statNode->getAttribute("name"); XMLCSTR value = statNode->getAttribute("value"); if (!node_name) warnMissingStatName(statNode->getAttribute("id")); ASSIGN_FP_IF("total_accesses", mem_man_stats.itlb_total_accesses); ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses); ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts); else { warnUnrecognizedStat(node_name); } } } STRCMP(type, "DataTLB") { int sub_num_children = child->nChildNode("param"); int j; for (j = 0; j < sub_num_children; j++) { XMLNode* paramNode = child->getChildNodePtr("param", &j); XMLCSTR node_name = paramNode->getAttribute("name"); XMLCSTR value = paramNode->getAttribute("value"); if (!node_name) warnMissingParamName(paramNode->getAttribute("id")); ASSIGN_INT_IF("number_entries", mem_man_params.dtlb_number_entries); ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency); ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput); ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc); ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks); else { warnUnrecognizedParam(node_name); } } sub_num_children = child->nChildNode("stat"); for (j = 0; j < sub_num_children; j++) { XMLNode* statNode = child->getChildNodePtr("stat", &j); XMLCSTR node_name = statNode->getAttribute("name"); XMLCSTR value = statNode->getAttribute("value"); if (!node_name) warnMissingStatName(statNode->getAttribute("id")); ASSIGN_FP_IF("read_accesses", mem_man_stats.dtlb_read_accesses); ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses); ASSIGN_FP_IF("write_accesses", mem_man_stats.dtlb_write_accesses); ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses); ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts); else { warnUnrecognizedStat(node_name); } } } } } RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { /* * processors have separate architectural register files for each thread. * therefore, the bypass buses need to travel across all the register files. */ if (!exist) return; int data; int line; clockRate = core_params.clockRate; name = "Register File Unit"; //**********************************IRF************************************ data = core_params.int_data_width; line = int(ceil(data / BITS_PER_BYTE)); interface_ip.cache_sz = core_params.num_IRF_entry * line; interface_ip.line_sz = line; interface_ip.assoc = core_params.phy_Regs_IRF_assoc; interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0; interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports; interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File", Core_device, clockRate, core_params.opt_local, core_params.core_ty); IRF->output_data.area *= core_params.num_hthreads * core_params.num_pipelines * cdb_overhead; area.set_area(area.get_area() + IRF->local_result.area * core_params.num_hthreads * core_params.num_pipelines * cdb_overhead); //**********************************FRF************************************ data = core_params.fp_data_width; line = int(ceil(data / BITS_PER_BYTE)); interface_ip.cache_sz = core_params.num_FRF_entry * line; interface_ip.line_sz = line; interface_ip.assoc = core_params.phy_Regs_FRF_assoc; interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0; interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 0; interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports; interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device, clockRate, core_params.opt_local, core_params.core_ty); FRF->output_data.area *= core_params.num_hthreads * core_params.num_fp_pipelines * cdb_overhead; area.set_area(area.get_area() + FRF->local_result.area * core_params.num_hthreads * core_params.num_fp_pipelines * cdb_overhead); int_regfile_height = IRF->local_result.cache_ht * core_params.num_hthreads * sqrt(cdb_overhead); fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads * sqrt(cdb_overhead); //since a EXU is associated with each pipeline, the cdb should not have //longer length. if (core_params.regWindowing) { //*********************************REG_WIN***************************** //ECC, and usually 2 regs are transfered together during window //shifting.Niagara Mega cell data = core_params.int_data_width; line = int(ceil(data / BITS_PER_BYTE)); interface_ip.cache_sz = core_params.register_window_size * IRF->l_ip.cache_sz * core_params.num_hthreads; interface_ip.line_sz = line; interface_ip.assoc = core_params.register_window_assoc; interface_ip.nbanks = core_params.register_window_nbanks; interface_ip.out_w = line * BITS_PER_BYTE; interface_ip.specific_tag = core_params.register_window_tag_width > 0; interface_ip.tag_w = core_params.register_window_tag_width; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.register_window_rw_ports; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = core_params.register_window_throughput / clockRate; interface_ip.latency = core_params.register_window_latency / clockRate; RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device, clockRate, core_params.opt_local, core_params.core_ty); RFWIN->output_data.area *= core_params.num_pipelines; area.set_area(area.get_area() + RFWIN->local_result.area * core_params.num_pipelines); } } EXECU::EXECU(XMLNode* _xml_data, InputParameter* interface_ip_, double lsq_height_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL), exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL), int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL), fpTagBypass(NULL), interface_ip(*interface_ip_), lsq_height(lsq_height_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; double fu_height = 0.0; clockRate = core_params.clockRate; name = "Execution Unit"; rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats); if (core_params.core_ty == OOO || (core_params.core_ty == Inorder && core_params.multithreaded)) { scheu = new SchedulerU(xml_data, &interface_ip, core_params, core_stats); area.set_area(area.get_area() + scheu->area.get_area() ); } exeu = new FunctionalUnit(xml_data, &interface_ip, core_params, core_stats, ALU); area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area()); fu_height = exeu->FU_height; if (core_params.num_fpus > 0) { fp_u = new FunctionalUnit(xml_data, &interface_ip, core_params, core_stats, FPU); area.set_area(area.get_area() + fp_u->area.get_area()); } if (core_params.num_muls > 0) { mul = new FunctionalUnit(xml_data, &interface_ip, core_params, core_stats, MUL); area.set_area(area.get_area() + mul->area.get_area()); fu_height += mul->FU_height; } /* * broadcast logic, including int-broadcast; int_tag-broadcast; * fp-broadcast; fp_tag-broadcast * integer by pass has two paths and fp has 3 paths. * on the same bus there are multiple tri-state drivers and muxes that go * to different components on the same bus */ interface_ip.wt = core_params.execu_broadcast_wt; interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type; interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type; interface_ip.throughput = core_params.broadcast_numerator / clockRate; interface_ip.latency = core_params.broadcast_numerator / clockRate; double scheu_Iw_height = 0.0; double scheu_ROB_height = 0.0; double scheu_fp_Iw_height = 0.0; if (scheu) { scheu_Iw_height = scheu->Iw_height; scheu_ROB_height = scheu->ROB_height; scheu_fp_Iw_height = scheu->fp_Iw_height; } // Common bypass logic parameters double base_w = core_params.execu_bypass_base_width; double base_h = core_params.execu_bypass_base_height; int level = core_params.execu_bypass_start_wiring_level; double route_over_perc = core_params.execu_bypass_route_over_perc; Wire_type wire_type = core_params.execu_bypass_wire_type; int data_w; double len; if (core_params.core_ty == Inorder) { data_w = int(ceil(data_path_width / 32.0)*32); len = rfu->int_regfile_height + exeu->FU_height + lsq_height; int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.perThreadState; len = rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu_Iw_height; intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); if (core_params.num_muls > 0) { data_w = int(ceil(data_path_width / 32.0)*32*1.5); len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height; int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.perThreadState; len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu_Iw_height; intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); } if (core_params.num_fpus > 0) { data_w = int(ceil(data_path_width / 32.0)*32*1.5); len = rfu->fp_regfile_height + fp_u->FU_height; fp_bypass = new Interconnect(xml_data, "FP Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.perThreadState; len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu_Iw_height; fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); } } else {//OOO if (core_params.scheu_ty == PhysicalRegFile) { /* For physical register based OOO, * data broadcast interconnects cover across functional units, lsq, * inst windows and register files, * while tag broadcast interconnects also cover across ROB */ data_w = int(ceil(core_params.int_data_width)); len = rfu->int_regfile_height + exeu->FU_height + lsq_height; int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.phy_ireg_width; len = rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu_Iw_height + scheu_ROB_height; intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); if (core_params.num_muls > 0) { data_w = int(ceil(core_params.int_data_width)); len = rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height; int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.phy_ireg_width; len = rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu_Iw_height + scheu_ROB_height; intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); } if (core_params.num_fpus > 0) { data_w = int(ceil(core_params.fp_data_width)); len = rfu->fp_regfile_height + fp_u->FU_height; fp_bypass = new Interconnect(xml_data, "FP Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.phy_freg_width; len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu_fp_Iw_height + scheu_ROB_height; fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); } } else { /* * In RS based processor both data and tag are broadcast together, * covering functional units, lsq, nst windows, register files, and ROBs */ data_w = int(ceil(core_params.int_data_width)); len = rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu_Iw_height + scheu_ROB_height; int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.phy_ireg_width; len = rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu_Iw_height + scheu_ROB_height; intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); if (core_params.num_muls > 0) { data_w = int(ceil(core_params.int_data_width)); len = rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu_Iw_height + scheu_ROB_height; int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.phy_ireg_width; len = rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu_Iw_height + scheu_ROB_height; intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); } if (core_params.num_fpus > 0) { data_w = int(ceil(core_params.fp_data_width)); len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu_fp_Iw_height + scheu_ROB_height; fp_bypass = new Interconnect(xml_data, "FP Bypass Data", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); data_w = core_params.phy_freg_width; len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu_fp_Iw_height + scheu_ROB_height; fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", Core_device, base_w, base_h, data_w, len, &interface_ip, level, clockRate, false, route_over_perc, core_params.opt_local, core_params.core_ty, wire_type); } } } if (int_bypass) { children.push_back(int_bypass); } if (intTagBypass) { children.push_back(intTagBypass); } if (int_mul_bypass) { children.push_back(int_mul_bypass); } if (intTag_mul_Bypass) { children.push_back(intTag_mul_Bypass); } if (fp_bypass) { children.push_back(fp_bypass); } if (fpTagBypass) { children.push_back(fpTagBypass); } area.set_area(area.get_area() + int_bypass->area.get_area() + intTagBypass->area.get_area()); if (core_params.num_muls > 0) { area.set_area(area.get_area() + int_mul_bypass->area.get_area() + intTag_mul_Bypass->area.get_area()); } if (core_params.num_fpus > 0) { area.set_area(area.get_area() + fp_bypass->area.get_area() + fpTagBypass->area.get_area()); } } RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, const CoreParameters & _core_params, const CoreStatistics & _core_stats, bool exist_) : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL), fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL), RAHT(NULL), interface_ip(*interface_ip_), core_params(_core_params), core_stats(_core_stats), exist(exist_) { if (!exist) return; int tag; int data; int out_w; int size; // Assumption: // We make an implicit design assumption based on the specific structure // that is being modeled. // 1. RAM-based RATs are direct mapped. However, if the associated // scheduler is a reservation station style, the RATs are fully // associative. // 2. Non-CAM based RATs and free lists do not have tags. // 3. Free lists are direct mapped. const int RAM_BASED_RAT_ASSOC = 1; const int RS_RAT_ASSOC = 0; const int NON_CAM_BASED_TAG_WIDTH = 0; const int FREELIST_ASSOC = 1; clockRate = core_params.clockRate; name = "Rename Unit"; if (core_params.core_ty == OOO) { //integer pipeline if (core_params.scheu_ty == PhysicalRegFile) { if (core_params.rm_ty == RAMbased) { //FRAT with global checkpointing (GCs) please see paper tech //report for detailed explaintions data = int(ceil(core_params.phy_ireg_width * (1 + core_params.globalCheckpoint) / BITS_PER_BYTE)); out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); size = data * core_params.archi_Regs_IRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = RAM_BASED_RAT_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = NUM_SOURCE_OPERANDS * core_params.decodeW; interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); iFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + iFRAT->area.get_area()); //FRAT floating point data = int(ceil(core_params.phy_freg_width * (1 + core_params.globalCheckpoint) / BITS_PER_BYTE)); out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); size = data * core_params.archi_Regs_FRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = RAM_BASED_RAT_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = NUM_SOURCE_OPERANDS * core_params.fp_decodeW; interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); fFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + fFRAT->area.get_area()); } else if ((core_params.rm_ty == CAMbased)) { //IRAT tag = core_params.arch_ireg_width; //the address of CAM needed to be sent out data = int(ceil((core_params.arch_ireg_width + 1 * core_params.globalCheckpoint) / BITS_PER_BYTE)); out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE)); size = data * core_params.phy_Regs_IRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = CAM_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = core_params.decodeW; interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = NUM_SOURCE_OPERANDS * core_params.decodeW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); iFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + iFRAT->area.get_area()); //FRAT for FP tag = core_params.arch_freg_width; //the address of CAM needed to be sent out data = int(ceil((core_params.arch_freg_width + 1 * core_params.globalCheckpoint) / BITS_PER_BYTE)); out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); size = data * core_params.phy_Regs_FRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = CAM_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = core_params.fp_decodeW; interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = NUM_SOURCE_OPERANDS * core_params.fp_decodeW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); fFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + fFRAT->area.get_area()); } //RRAT is always RAM based, does not have GCs, and is used only for //record latest non-speculative mapping data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); size = data * core_params.archi_Regs_IRF_size * NUM_SOURCE_OPERANDS; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = RAM_BASED_RAT_ASSOC; interface_ip.nbanks = core_params.retire_rat_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; interface_ip.num_rd_ports = core_params.commitW; interface_ip.num_wr_ports = core_params.commitW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); iRRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + iRRAT->area.get_area()); //RRAT for FP data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); size = data * core_params.archi_Regs_FRF_size * NUM_SOURCE_OPERANDS; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = RAM_BASED_RAT_ASSOC; interface_ip.nbanks = core_params.retire_rat_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; interface_ip.num_rd_ports = core_params.fp_decodeW; interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); fRRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + fRRAT->area.get_area()); //Freelist of renaming unit always RAM based //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist // 2)When instruction commits the Phyregisters/ROB needed to be recycled. //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); size = data * core_params.num_ifreelist_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = FREELIST_ASSOC; interface_ip.nbanks = core_params.freelist_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.freelist_rw_ports; interface_ip.num_rd_ports = core_params.decodeW; interface_ip.num_wr_ports = core_params.decodeW - 1 + core_params.commitW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List", Core_device, clockRate, core_params.opt_local, core_params.core_ty); ifreeL->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + ifreeL->area.get_area()); //freelist for FP data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); size = data * core_params.num_ffreelist_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = FREELIST_ASSOC; interface_ip.nbanks = core_params.freelist_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.freelist_rw_ports; interface_ip.num_rd_ports = core_params.fp_decodeW; interface_ip.num_wr_ports = core_params.fp_decodeW - 1 + core_params.commitW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List", Core_device, clockRate, core_params.opt_local, core_params.core_ty); ffreeL->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + ffreeL->area.get_area()); } else if (core_params.scheu_ty == ReservationStation) { if (core_params.rm_ty == RAMbased) { tag = core_params.phy_ireg_width; data = int(ceil(core_params.phy_ireg_width * (1 + core_params.globalCheckpoint) / BITS_PER_BYTE)); out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); size = data * core_params.archi_Regs_IRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = RS_RAT_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = NUM_SOURCE_OPERANDS * core_params.decodeW; interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.commitW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); iFRAT->local_result.adjust_area(); iFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + iFRAT->area.get_area()); //FP tag = core_params.phy_freg_width; data = int(ceil(core_params.phy_freg_width * (1 + core_params.globalCheckpoint) / BITS_PER_BYTE)); out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); size = data * core_params.archi_Regs_FRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = RS_RAT_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = NUM_SOURCE_OPERANDS * core_params.fp_decodeW; interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = core_params.fp_issueW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); fFRAT->local_result.adjust_area(); fFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + fFRAT->area.get_area()); } else if ((core_params.rm_ty == CAMbased)) { //FRAT //the address of CAM needed to be sent out tag = core_params.arch_ireg_width; data = int(ceil (core_params.arch_ireg_width + 1 * core_params.globalCheckpoint / BITS_PER_BYTE)); out_w = int(ceil (core_params.arch_ireg_width / BITS_PER_BYTE)); size = data * core_params.phy_Regs_IRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = CAM_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = core_params.decodeW; interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = NUM_SOURCE_OPERANDS * core_params.decodeW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); iFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + iFRAT->area.get_area()); //FRAT tag = core_params.arch_freg_width; //the address of CAM needed to be sent out data = int(ceil(core_params.arch_freg_width + 1 * core_params.globalCheckpoint / BITS_PER_BYTE)); out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); size = data * core_params.phy_Regs_FRF_size; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = CAM_ASSOC; interface_ip.nbanks = core_params.front_rat_nbanks; interface_ip.out_w = out_w * BITS_PER_BYTE; interface_ip.specific_tag = tag > 0; interface_ip.tag_w = tag; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.front_rat_rw_ports; interface_ip.num_rd_ports = core_params.decodeW; interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = NUM_SOURCE_OPERANDS * core_params.fp_decodeW; interface_ip.is_cache = true; interface_ip.pure_cam = false; interface_ip.pure_ram = false; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", Core_device, clockRate, core_params.opt_local, core_params.core_ty); fFRAT->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + fFRAT->area.get_area()); } //No RRAT for RS based OOO //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); size = data * core_params.num_ifreelist_entries; interface_ip.cache_sz = size; interface_ip.line_sz = data; interface_ip.assoc = FREELIST_ASSOC; interface_ip.nbanks = core_params.freelist_nbanks; interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = core_params.freelist_rw_ports; interface_ip.num_rd_ports = core_params.decodeW; interface_ip.num_wr_ports = core_params.decodeW - 1 + core_params.commitW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; interface_ip.is_cache = false; interface_ip.pure_cam = false; interface_ip.pure_ram = true; interface_ip.throughput = 1.0 / clockRate; interface_ip.latency = 1.0 / clockRate; ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List", Core_device, clockRate, core_params.opt_local, core_params.core_ty); ifreeL->output_data.area *= core_params.num_hthreads; area.set_area(area.get_area() + ifreeL->area.get_area()); } } idcl = new dep_resource_conflict_check(xml_data, "Instruction Dependency Check?", &interface_ip, core_params, core_params.phy_ireg_width, clockRate); fdcl = new dep_resource_conflict_check(xml_data, "FP Dependency Check?", &interface_ip, core_params, core_params.phy_freg_width, clockRate); } Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_) : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL), exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL), ithCore(_ithCore), interface_ip(*interface_ip_) { ostringstream os; os << ithCore; name = "Core " + os.str(); int i = 0; XMLNode* childXML; for (i = 0; i < xml_data->nChildNode("component"); i++) { childXML = xml_data->getChildNodePtr("component", &i); XMLCSTR type = childXML->getAttribute("type"); if (!type) warnMissingComponentType(childXML->getAttribute("id")); STRCMP(type, "CacheUnit") { XMLCSTR comp_name = childXML->getAttribute("id"); if (!comp_name) continue; STRCMP(comp_name, "system.L20") { l2cache = new CacheUnit(childXML, &interface_ip); children.push_back(l2cache); } } } set_core_param(); clockRate = core_params.clockRate; ifu = new InstFetchU(xml_data, &interface_ip, core_params, core_stats); children.push_back(ifu); lsu = new LoadStoreU(xml_data, &interface_ip, core_params, core_stats); children.push_back(lsu); mmu = new MemManU(xml_data, &interface_ip, core_params, core_stats); children.push_back(mmu); exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height, core_params, core_stats); children.push_back(exu); undiffCore = new UndiffCore(xml_data, &interface_ip, core_params); children.push_back(undiffCore); if (core_params.core_ty == OOO) { rnu = new RENAMINGU(xml_data, &interface_ip, core_params, core_stats); children.push_back(rnu); } corepipe = new Pipeline(xml_data, &interface_ip, core_params); children.push_back(corepipe); double pipeline_area_per_unit; if (core_params.core_ty == OOO) { pipeline_area_per_unit = (corepipe->area.get_area() * core_params.num_pipelines) / 5.0; if (rnu->exist) { rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); } } else { pipeline_area_per_unit = (corepipe->area.get_area() * core_params.num_pipelines) / 4.0; } // Move all of this to computeArea //area.set_area(area.get_area()+ corepipe->area.get_area()); if (ifu->exist) { ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + ifu->area.get_area()); } if (lsu->exist) { lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + lsu->area.get_area()); } if (exu->exist) { exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + exu->area.get_area()); } if (mmu->exist) { mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + mmu->area.get_area()); } if (core_params.core_ty == OOO) { if (rnu->exist) { area.set_area(area.get_area() + rnu->area.get_area()); } } if (undiffCore->exist) { area.set_area(area.get_area() + undiffCore->area.get_area()); } if (l2cache) { area.set_area(area.get_area() + l2cache->area.get_area()); } } void BranchPredictor::computeEnergy() { if (!exist) return; // ASSUMPTION: All instructions access the branch predictors at Fetch and // only branch instrucions update the predictors regardless // of the correctness of the prediction. double tdp_read_accesses = core_params.predictionW * core_stats.BR_duty_cycle; globalBPT->tdp_stats.reset(); globalBPT->tdp_stats.readAc.access = tdp_read_accesses; globalBPT->tdp_stats.writeAc.access = 0; globalBPT->rtp_stats.reset(); globalBPT->rtp_stats.readAc.access = core_stats.total_instructions; globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; globalBPT->power_t.reset(); globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic * globalBPT->tdp_stats.readAc.access + globalBPT->local_result.power.writeOp.dynamic * globalBPT->tdp_stats.writeAc.access; globalBPT->power_t = globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; globalBPT->rt_power.reset(); globalBPT->rt_power.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic * globalBPT->rtp_stats.readAc.access + globalBPT->local_result.power.writeOp.dynamic * globalBPT->rtp_stats.writeAc.access; L1_localBPT->tdp_stats.reset(); L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses; L1_localBPT->tdp_stats.writeAc.access = 0; L1_localBPT->rtp_stats.reset(); L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions; L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; L1_localBPT->power_t.reset(); L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic * L1_localBPT->tdp_stats.readAc.access + L1_localBPT->local_result.power.writeOp.dynamic * L1_localBPT->tdp_stats.writeAc.access; L1_localBPT->power_t = L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; L1_localBPT->rt_power.reset(); L1_localBPT->rt_power.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic * L1_localBPT->rtp_stats.readAc.access + L1_localBPT->local_result.power.writeOp.dynamic * L1_localBPT->rtp_stats.writeAc.access; L2_localBPT->tdp_stats.reset(); L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses; L2_localBPT->tdp_stats.writeAc.access = 0; L2_localBPT->rtp_stats.reset(); L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions; L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; L2_localBPT->power_t.reset(); L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic * L2_localBPT->tdp_stats.readAc.access + L2_localBPT->local_result.power.writeOp.dynamic * L2_localBPT->tdp_stats.writeAc.access; L2_localBPT->power_t = L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; L2_localBPT->rt_power.reset(); L2_localBPT->rt_power.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic * L2_localBPT->rtp_stats.readAc.access + L2_localBPT->local_result.power.writeOp.dynamic * L2_localBPT->rtp_stats.writeAc.access; chooser->tdp_stats.reset(); chooser->tdp_stats.readAc.access = tdp_read_accesses; chooser->tdp_stats.writeAc.access = 0; chooser->rtp_stats.reset(); chooser->rtp_stats.readAc.access = core_stats.total_instructions; chooser->rtp_stats.writeAc.access = core_stats.branch_instructions; chooser->power_t.reset(); chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic * chooser->tdp_stats.readAc.access + chooser->local_result.power.writeOp.dynamic * chooser->tdp_stats.writeAc.access; chooser->power_t = chooser->power_t + chooser->local_result.power * pppm_lkg; chooser->rt_power.reset(); chooser->rt_power.readOp.dynamic += chooser->local_result.power.readOp.dynamic * chooser->rtp_stats.readAc.access + chooser->local_result.power.writeOp.dynamic * chooser->rtp_stats.writeAc.access; RAS->tdp_stats.reset(); RAS->tdp_stats.readAc.access = tdp_read_accesses; RAS->tdp_stats.writeAc.access = 0; RAS->rtp_stats.reset(); RAS->rtp_stats.readAc.access = core_stats.function_calls; RAS->rtp_stats.writeAc.access = core_stats.function_calls; RAS->power_t.reset(); RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access + RAS->local_result.power.writeOp.dynamic * RAS->tdp_stats.writeAc.access; RAS->power_t = RAS->power_t + RAS->local_result.power * core_params.pppm_lkg_multhread; RAS->rt_power.reset(); RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic * RAS->rtp_stats.readAc.access + RAS->local_result.power.writeOp.dynamic * RAS->rtp_stats.writeAc.access; output_data.reset(); if (globalBPT) { globalBPT->output_data.peak_dynamic_power = globalBPT->power_t.readOp.dynamic * clockRate; globalBPT->output_data.runtime_dynamic_energy = globalBPT->rt_power.readOp.dynamic; output_data += globalBPT->output_data; } if (L1_localBPT) { L1_localBPT->output_data.peak_dynamic_power = L1_localBPT->power_t.readOp.dynamic * clockRate; L1_localBPT->output_data.runtime_dynamic_energy = L1_localBPT->rt_power.readOp.dynamic; output_data += L1_localBPT->output_data; } if (L2_localBPT) { L2_localBPT->output_data.peak_dynamic_power = L2_localBPT->power_t.readOp.dynamic * clockRate; L2_localBPT->output_data.runtime_dynamic_energy = L2_localBPT->rt_power.readOp.dynamic; output_data += L2_localBPT->output_data; } if (chooser) { chooser->output_data.peak_dynamic_power = chooser->power_t.readOp.dynamic * clockRate; chooser->output_data.runtime_dynamic_energy = chooser->rt_power.readOp.dynamic; output_data += chooser->output_data; } if (RAS) { RAS->output_data.peak_dynamic_power = RAS->power_t.readOp.dynamic * clockRate; RAS->output_data.subthreshold_leakage_power = RAS->power_t.readOp.leakage * core_params.num_hthreads; RAS->output_data.gate_leakage_power = RAS->power_t.readOp.gate_leakage * core_params.num_hthreads; RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic; output_data += RAS->output_data; } } void BranchPredictor::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); globalBPT->displayData(indent + 4, plevel); L1_localBPT->displayData(indent + 4, plevel); L2_localBPT->displayData(indent + 4, plevel); chooser->displayData(indent + 4, plevel); RAS->displayData(indent + 4, plevel); } void InstFetchU::computeEnergy() { if (!exist) return; if (BPT) { BPT->computeEnergy(); } IB->tdp_stats.reset(); IB->tdp_stats.readAc.access = core_params.peak_issueW; IB->tdp_stats.writeAc.access = core_params.peak_issueW; IB->rtp_stats.reset(); IB->rtp_stats.readAc.access = core_stats.total_instructions; IB->rtp_stats.writeAc.access = core_stats.total_instructions; IB->power_t.reset(); IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * IB->tdp_stats.readAc.access + IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access; IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg; IB->rt_power.reset(); IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic * IB->rtp_stats.readAc.access + IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access; if (core_params.predictionW > 0) { BTB->tdp_stats.reset(); BTB->tdp_stats.readAc.access = core_params.predictionW; BTB->tdp_stats.writeAc.access = 0; BTB->rtp_stats.reset(); BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses; BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses; BTB->power_t.reset(); BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic * BTB->tdp_stats.readAc.access + BTB->local_result.power.writeOp.dynamic * BTB->tdp_stats.writeAc.access; BTB->rt_power.reset(); BTB->rt_power.readOp.dynamic += BTB->local_result.power.readOp.dynamic * BTB->rtp_stats.readAc.access + BTB->local_result.power.writeOp.dynamic * BTB->rtp_stats.writeAc.access; } ID_inst->tdp_stats.reset(); ID_inst->tdp_stats.readAc.access = core_params.decodeW; ID_inst->power_t.reset(); ID_inst->power_t = ID_misc->power; ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic * ID_inst->tdp_stats.readAc.access; ID_inst->rtp_stats.reset(); ID_inst->rtp_stats.readAc.access = core_stats.total_instructions; ID_inst->rt_power.reset(); ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic * ID_inst->rtp_stats.readAc.access; ID_operand->tdp_stats.reset(); ID_operand->tdp_stats.readAc.access = core_params.decodeW; ID_operand->power_t.reset(); ID_operand->power_t = ID_misc->power; ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic * ID_operand->tdp_stats.readAc.access; ID_operand->rtp_stats.reset(); ID_operand->rtp_stats.readAc.access = core_stats.total_instructions; ID_operand->rt_power.reset(); ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic * ID_operand->rtp_stats.readAc.access; ID_misc->tdp_stats.reset(); ID_misc->tdp_stats.readAc.access = core_params.decodeW; ID_misc->power_t.reset(); ID_misc->power_t = ID_misc->power; ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic * ID_misc->tdp_stats.readAc.access; ID_misc->rtp_stats.reset(); ID_misc->rtp_stats.readAc.access = core_stats.total_instructions; ID_misc->rt_power.reset(); ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic * ID_misc->rtp_stats.readAc.access; power.reset(); rt_power.reset(); McPATComponent::computeEnergy(); output_data.reset(); if (icache) { output_data += icache->output_data; } if (IB) { IB->output_data.peak_dynamic_power = IB->power_t.readOp.dynamic * clockRate; IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic; output_data += IB->output_data; } if (BTB) { BTB->output_data.peak_dynamic_power = BTB->power_t.readOp.dynamic * clockRate; BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic; output_data += BTB->output_data; } if (BPT) { output_data += BPT->output_data; } if (ID_inst) { ID_inst->output_data.peak_dynamic_power = ID_inst->power_t.readOp.dynamic * clockRate; ID_inst->output_data.runtime_dynamic_energy = ID_inst->rt_power.readOp.dynamic; output_data += ID_inst->output_data; } if (ID_operand) { ID_operand->output_data.peak_dynamic_power = ID_operand->power_t.readOp.dynamic * clockRate; ID_operand->output_data.runtime_dynamic_energy = ID_operand->rt_power.readOp.dynamic; output_data += ID_operand->output_data; } if (ID_misc) { ID_misc->output_data.peak_dynamic_power = ID_misc->power_t.readOp.dynamic * clockRate; ID_misc->output_data.runtime_dynamic_energy = ID_misc->rt_power.readOp.dynamic; output_data += ID_misc->output_data; } } void InstFetchU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); if (core_params.predictionW > 0) { BTB->displayData(indent + 4, plevel); if (BPT->exist) { BPT->displayData(indent + 4, plevel); } } IB->displayData(indent + 4, plevel); ID_inst->displayData(indent + 4, plevel); ID_operand->displayData(indent + 4, plevel); ID_misc->displayData(indent + 4, plevel); } void RENAMINGU::computeEnergy() { if (!exist) return; idcl->tdp_stats.reset(); idcl->rtp_stats.reset(); idcl->power_t.reset(); idcl->rt_power.reset(); if (core_params.core_ty == OOO) { idcl->tdp_stats.readAc.access = core_params.decodeW; idcl->rtp_stats.readAc.access = 3 * core_params.decodeW * core_params.decodeW * core_stats.rename_reads; } else if (core_params.issueW > 1) { idcl->tdp_stats.readAc.access = core_params.decodeW; idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions; } idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access * idcl->power.readOp.dynamic; idcl->power_t.readOp.leakage = idcl->power.readOp.leakage * core_params.num_hthreads; idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage * core_params.num_hthreads; idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access * idcl->power.readOp.dynamic; fdcl->tdp_stats.reset(); fdcl->rtp_stats.reset(); fdcl->power_t.reset(); fdcl->rt_power.reset(); if (core_params.core_ty == OOO) { fdcl->tdp_stats.readAc.access = core_params.decodeW; fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW * core_params.fp_issueW * core_stats.fp_rename_writes; } else if (core_params.issueW > 1) { fdcl->tdp_stats.readAc.access = core_params.decodeW; fdcl->rtp_stats.readAc.access = core_stats.fp_instructions; } fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access * fdcl->power.readOp.dynamic; fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage * core_params.num_hthreads; fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage * core_params.num_hthreads; fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access * fdcl->power.readOp.dynamic; if (iRRAT) { iRRAT->tdp_stats.reset(); iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports; iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports; iRRAT->rtp_stats.reset(); iRRAT->rtp_stats.readAc.access = core_stats.rename_writes; iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes; iRRAT->power_t.reset(); iRRAT->power_t.readOp.dynamic += iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic + iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; iRRAT->rt_power.reset(); iRRAT->rt_power.readOp.dynamic += iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic + iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; iRRAT->power_t.readOp.leakage = iRRAT->power.readOp.leakage * core_params.num_hthreads; iRRAT->power_t.readOp.gate_leakage = iRRAT->power.readOp.gate_leakage * core_params.num_hthreads; } if (ifreeL) { ifreeL->tdp_stats.reset(); ifreeL->tdp_stats.readAc.access = core_params.decodeW; ifreeL->tdp_stats.writeAc.access = core_params.decodeW; ifreeL->rtp_stats.reset(); if (core_params.scheu_ty == PhysicalRegFile) { ifreeL->rtp_stats.readAc.access = core_stats.rename_reads; ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes; } else if (core_params.scheu_ty == ReservationStation) { ifreeL->rtp_stats.readAc.access = core_stats.rename_reads + core_stats.fp_rename_reads; ifreeL->rtp_stats.writeAc.access = 2 * (core_stats.rename_writes + core_stats.fp_rename_writes); } ifreeL->power_t.reset(); ifreeL->power_t.readOp.dynamic += ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic + ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; ifreeL->rt_power.reset(); ifreeL->rt_power.readOp.dynamic += ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic + ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; ifreeL->power_t.readOp.leakage = ifreeL->power.readOp.leakage * core_params.num_hthreads; ifreeL->power_t.readOp.gate_leakage = ifreeL->power.readOp.gate_leakage * core_params.num_hthreads; } if (fRRAT) { fRRAT->tdp_stats.reset(); fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports; fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports; fRRAT->rtp_stats.reset(); fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes; fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; fRRAT->power_t.reset(); fRRAT->power_t.readOp.dynamic += fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic + fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; fRRAT->rt_power.reset(); fRRAT->rt_power.readOp.dynamic += fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic + fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; fRRAT->power_t.readOp.leakage = fRRAT->power.readOp.leakage * core_params.num_hthreads; fRRAT->power_t.readOp.gate_leakage = fRRAT->power.readOp.gate_leakage * core_params.num_hthreads; } if (ffreeL) { ffreeL->tdp_stats.reset(); ffreeL->tdp_stats.readAc.access = core_params.decodeW; ffreeL->tdp_stats.writeAc.access = core_params.decodeW; ffreeL->rtp_stats.reset(); ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads; ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes; ffreeL->power_t.reset(); ffreeL->power_t.readOp.dynamic += ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic + ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; ffreeL->rt_power.reset(); ffreeL->rt_power.readOp.dynamic += ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic + ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; ffreeL->power_t.readOp.leakage = ffreeL->power.readOp.leakage * core_params.num_hthreads; ffreeL->power_t.readOp.gate_leakage = ffreeL->power.readOp.gate_leakage * core_params.num_hthreads; } if (iFRAT) { tdp_stats.reset(); if (core_params.rm_ty == RAMbased) { iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports; iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports; } else if ((core_params.rm_ty == CAMbased)) { iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports; iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; } rtp_stats.reset(); iFRAT->rtp_stats.readAc.access = core_stats.rename_reads; iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes; if (core_params.scheu_ty == ReservationStation && core_params.rm_ty == RAMbased) { iFRAT->rtp_stats.searchAc.access = core_stats.committed_int_instructions; } iFRAT->power_t.reset(); iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access * (iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) + iFRAT->tdp_stats.writeAc.access * iFRAT->local_result.power.writeOp.dynamic + iFRAT->tdp_stats.searchAc.access * iFRAT->local_result.power.searchOp.dynamic; iFRAT->power_t.readOp.leakage = iFRAT->power.readOp.leakage * core_params.num_hthreads; iFRAT->power_t.readOp.gate_leakage = iFRAT->power.readOp.gate_leakage * core_params.num_hthreads; iFRAT->rt_power.reset(); iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access * (iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) + iFRAT->rtp_stats.writeAc.access * iFRAT->local_result.power.writeOp.dynamic + iFRAT->rtp_stats.searchAc.access * iFRAT->local_result.power.searchOp.dynamic; } if (fFRAT) { tdp_stats.reset(); fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports; if ((core_params.rm_ty == CAMbased)) { fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports; } else if (core_params.rm_ty == RAMbased) { fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports; if (core_params.scheu_ty == ReservationStation) { fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports; } } rtp_stats.reset(); fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads; fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; if (core_params.scheu_ty == ReservationStation && core_params.rm_ty == RAMbased) { fFRAT->rtp_stats.searchAc.access = core_stats.committed_fp_instructions; } fFRAT->power_t.reset(); fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access * (fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) + fFRAT->tdp_stats.writeAc.access * fFRAT->local_result.power.writeOp.dynamic + fFRAT->tdp_stats.searchAc.access * fFRAT->local_result.power.searchOp.dynamic; fFRAT->power_t.readOp.leakage = fFRAT->power.readOp.leakage * core_params.num_hthreads; fFRAT->power_t.readOp.gate_leakage = fFRAT->power.readOp.gate_leakage * core_params.num_hthreads; fFRAT->rt_power.reset(); fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access * (fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) + fFRAT->rtp_stats.writeAc.access * fFRAT->local_result.power.writeOp.dynamic + fFRAT->rtp_stats.searchAc.access * fFRAT->local_result.power.searchOp.dynamic; } output_data.reset(); if (iFRAT) { iFRAT->output_data.peak_dynamic_power = iFRAT->power_t.readOp.dynamic * clockRate; iFRAT->output_data.subthreshold_leakage_power = iFRAT->power_t.readOp.leakage; iFRAT->output_data.gate_leakage_power = iFRAT->power_t.readOp.gate_leakage; iFRAT->output_data.runtime_dynamic_energy = iFRAT->rt_power.readOp.dynamic; output_data += iFRAT->output_data; } if (fFRAT) { fFRAT->output_data.peak_dynamic_power = fFRAT->power_t.readOp.dynamic * clockRate; fFRAT->output_data.subthreshold_leakage_power = fFRAT->power_t.readOp.leakage; fFRAT->output_data.gate_leakage_power = fFRAT->power_t.readOp.gate_leakage; fFRAT->output_data.runtime_dynamic_energy = fFRAT->rt_power.readOp.dynamic; output_data += fFRAT->output_data; } if (iRRAT) { iRRAT->output_data.peak_dynamic_power = iRRAT->power_t.readOp.dynamic * clockRate; iRRAT->output_data.subthreshold_leakage_power = iRRAT->power_t.readOp.leakage; iRRAT->output_data.gate_leakage_power = iRRAT->power_t.readOp.gate_leakage; iRRAT->output_data.runtime_dynamic_energy = iRRAT->rt_power.readOp.dynamic; output_data += iRRAT->output_data; } if (fRRAT) { fRRAT->output_data.peak_dynamic_power = fRRAT->power_t.readOp.dynamic * clockRate; fRRAT->output_data.subthreshold_leakage_power = fRRAT->power_t.readOp.leakage; fRRAT->output_data.gate_leakage_power = fRRAT->power_t.readOp.gate_leakage; fRRAT->output_data.runtime_dynamic_energy = fRRAT->rt_power.readOp.dynamic; output_data += fRRAT->output_data; } if (ifreeL) { ifreeL->output_data.peak_dynamic_power = ifreeL->power_t.readOp.dynamic * clockRate; ifreeL->output_data.subthreshold_leakage_power = ifreeL->power_t.readOp.leakage; ifreeL->output_data.gate_leakage_power = ifreeL->power_t.readOp.gate_leakage; ifreeL->output_data.runtime_dynamic_energy = ifreeL->rt_power.readOp.dynamic; output_data += ifreeL->output_data; } if (ffreeL) { ffreeL->output_data.peak_dynamic_power = ffreeL->power_t.readOp.dynamic * clockRate; ffreeL->output_data.subthreshold_leakage_power = ffreeL->power_t.readOp.leakage; ffreeL->output_data.gate_leakage_power = ffreeL->power_t.readOp.gate_leakage; ffreeL->output_data.runtime_dynamic_energy = ffreeL->rt_power.readOp.dynamic; output_data += ffreeL->output_data; } if (idcl) { idcl->output_data.peak_dynamic_power = idcl->power_t.readOp.dynamic * clockRate; idcl->output_data.subthreshold_leakage_power = idcl->power_t.readOp.leakage; idcl->output_data.gate_leakage_power = idcl->power_t.readOp.gate_leakage; idcl->output_data.runtime_dynamic_energy = idcl->rt_power.readOp.dynamic; output_data += idcl->output_data; } if (fdcl) { fdcl->output_data.peak_dynamic_power = fdcl->power_t.readOp.dynamic * clockRate; fdcl->output_data.subthreshold_leakage_power = fdcl->power_t.readOp.leakage; fdcl->output_data.gate_leakage_power = fdcl->power_t.readOp.gate_leakage; fdcl->output_data.runtime_dynamic_energy = fdcl->rt_power.readOp.dynamic; output_data += fdcl->output_data; } if (RAHT) { output_data += RAHT->output_data; } } void RENAMINGU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); if (core_params.core_ty == OOO) { iFRAT->displayData(indent + 4, plevel); fFRAT->displayData(indent + 4, plevel); ifreeL->displayData(indent + 4, plevel); if (core_params.scheu_ty == PhysicalRegFile) { iRRAT->displayData(indent + 4, plevel); fRRAT->displayData(indent + 4, plevel); ffreeL->displayData(indent + 4, plevel); } } idcl->displayData(indent + 4, plevel); fdcl->displayData(indent + 4, plevel); } void SchedulerU::computeEnergy() { if (!exist) return; double ROB_duty_cycle; ROB_duty_cycle = 1; if (int_instruction_selection) { int_instruction_selection->computeEnergy(); } if (fp_instruction_selection) { fp_instruction_selection->computeEnergy(); } if (int_inst_window) { int_inst_window->tdp_stats.reset(); int_inst_window->rtp_stats.reset(); int_inst_window->power_t.reset(); int_inst_window->rt_power.reset(); if (core_params.core_ty == OOO) { int_inst_window->tdp_stats.readAc.access = core_params.issueW * core_params.num_pipelines; int_inst_window->tdp_stats.writeAc.access = core_params.issueW * core_params.num_pipelines; int_inst_window->tdp_stats.searchAc.access = core_params.issueW * core_params.num_pipelines; int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->tdp_stats.readAc.access + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->tdp_stats.searchAc.access + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->tdp_stats.writeAc.access; int_inst_window->rtp_stats.readAc.access = core_stats.inst_window_reads; int_inst_window->rtp_stats.writeAc.access = core_stats.inst_window_writes; int_inst_window->rtp_stats.searchAc.access = core_stats.inst_window_wakeup_accesses; int_inst_window->rt_power.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->rtp_stats.readAc.access + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->rtp_stats.searchAc.access + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->rtp_stats.writeAc.access; } else if (core_params.multithreaded) { int_inst_window->tdp_stats.readAc.access = core_params.issueW * core_params.num_pipelines; int_inst_window->tdp_stats.writeAc.access = core_params.issueW * core_params.num_pipelines; int_inst_window->tdp_stats.searchAc.access = core_params.issueW * core_params.num_pipelines; int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->tdp_stats.readAc.access + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->tdp_stats.searchAc.access + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->tdp_stats.writeAc.access; int_inst_window->rtp_stats.readAc.access = core_stats.int_instructions + core_stats.fp_instructions; int_inst_window->rtp_stats.writeAc.access = core_stats.int_instructions + core_stats.fp_instructions; int_inst_window->rtp_stats.searchAc.access = 2 * (core_stats.int_instructions + core_stats.fp_instructions); int_inst_window->rt_power.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->rtp_stats.readAc.access + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->rtp_stats.searchAc.access + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->rtp_stats.writeAc.access; } } if (fp_inst_window) { fp_inst_window->tdp_stats.reset(); fp_inst_window->tdp_stats.readAc.access = fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines; fp_inst_window->tdp_stats.writeAc.access = fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines; fp_inst_window->tdp_stats.searchAc.access = fp_inst_window->l_ip.num_search_ports * core_params.num_fp_pipelines; fp_inst_window->rtp_stats.reset(); fp_inst_window->rtp_stats.readAc.access = core_stats.fp_inst_window_reads; fp_inst_window->rtp_stats.writeAc.access = core_stats.fp_inst_window_writes; fp_inst_window->rtp_stats.searchAc.access = core_stats.fp_inst_window_wakeup_accesses; fp_inst_window->power_t.reset(); fp_inst_window->power_t.readOp.dynamic += fp_inst_window->power.readOp.dynamic * fp_inst_window->tdp_stats.readAc.access + fp_inst_window->power.searchOp.dynamic * fp_inst_window->tdp_stats.searchAc.access + fp_inst_window->power.writeOp.dynamic * fp_inst_window->tdp_stats.writeAc.access; fp_inst_window->rt_power.reset(); fp_inst_window->rt_power.readOp.dynamic += fp_inst_window->power.readOp.dynamic * fp_inst_window->rtp_stats.readAc.access + fp_inst_window->power.searchOp.dynamic * fp_inst_window->rtp_stats.searchAc.access + fp_inst_window->power.writeOp.dynamic * fp_inst_window->rtp_stats.writeAc.access; } if (ROB) { ROB->tdp_stats.reset(); ROB->tdp_stats.readAc.access = core_params.commitW * core_params.num_pipelines * ROB_duty_cycle; ROB->tdp_stats.writeAc.access = core_params.issueW * core_params.num_pipelines * ROB_duty_cycle; ROB->rtp_stats.reset(); ROB->rtp_stats.readAc.access = core_stats.ROB_reads; ROB->rtp_stats.writeAc.access = core_stats.ROB_writes; ROB->power_t.reset(); ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic * ROB->tdp_stats.readAc.access + ROB->local_result.power.writeOp.dynamic * ROB->tdp_stats.writeAc.access; ROB->rt_power.reset(); ROB->rt_power.readOp.dynamic += ROB->local_result.power.readOp.dynamic * ROB->rtp_stats.readAc.access + ROB->local_result.power.writeOp.dynamic * ROB->rtp_stats.writeAc.access; } output_data.reset(); if (int_inst_window) { int_inst_window->output_data.subthreshold_leakage_power = int_inst_window->power_t.readOp.leakage; int_inst_window->output_data.gate_leakage_power = int_inst_window->power_t.readOp.gate_leakage; int_inst_window->output_data.peak_dynamic_power = int_inst_window->power_t.readOp.dynamic * clockRate; int_inst_window->output_data.runtime_dynamic_energy = int_inst_window->rt_power.readOp.dynamic; output_data += int_inst_window->output_data; } if (fp_inst_window) { fp_inst_window->output_data.subthreshold_leakage_power = fp_inst_window->power_t.readOp.leakage; fp_inst_window->output_data.gate_leakage_power = fp_inst_window->power_t.readOp.gate_leakage; fp_inst_window->output_data.peak_dynamic_power = fp_inst_window->power_t.readOp.dynamic * clockRate; fp_inst_window->output_data.runtime_dynamic_energy = fp_inst_window->rt_power.readOp.dynamic; output_data += fp_inst_window->output_data; } if (ROB) { ROB->output_data.peak_dynamic_power = ROB->power_t.readOp.dynamic * clockRate; ROB->output_data.runtime_dynamic_energy = ROB->rt_power.readOp.dynamic; output_data += ROB->output_data; } // Integer and FP instruction selection logic is not included in the // roll-up due to the uninitialized area /* if (int_instruction_selection) { output_data += int_instruction_selection->output_data; } if (fp_instruction_selection) { output_data += fp_instruction_selection->output_data; } */ } void SchedulerU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); if (core_params.core_ty == OOO) { int_inst_window->displayData(indent + 4, plevel); fp_inst_window->displayData(indent + 4, plevel); if (core_params.ROB_size > 0) { ROB->displayData(indent + 4, plevel); } } else if (core_params.multithreaded) { int_inst_window->displayData(indent + 4, plevel); } // Integer and FP instruction selection logic is not included in the // roll-up due to the uninitialized area /* if (int_instruction_selection) { int_instruction_selection->displayData(indent + 4, plevel); } if (fp_instruction_selection) { fp_instruction_selection->displayData(indent + 4, plevel); } */ } void LoadStoreU::computeEnergy() { if (!exist) return; LSQ->tdp_stats.reset(); LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports * core_stats.LSU_duty_cycle; LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports * core_stats.LSU_duty_cycle; LSQ->rtp_stats.reset(); // Flush overhead conidered LSQ->rtp_stats.readAc.access = (core_stats.load_instructions + core_stats.store_instructions) * 2; LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions + core_stats.store_instructions) * 2; LSQ->power_t.reset(); //every memory access invloves at least two operations on LSQ LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access * (LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) + LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; LSQ->rt_power.reset(); //every memory access invloves at least two operations on LSQ LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access * (LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) + LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; if (LoadQ) { LoadQ->tdp_stats.reset(); LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports * core_stats.LSU_duty_cycle; LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports * core_stats.LSU_duty_cycle; LoadQ->rtp_stats.reset(); LoadQ->rtp_stats.readAc.access = core_stats.load_instructions + core_stats.store_instructions; LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions + core_stats.store_instructions; LoadQ->power_t.reset(); //every memory access invloves at least two operations on LoadQ LoadQ->power_t.readOp.dynamic += LoadQ->tdp_stats.readAc.access * (LoadQ->local_result.power.searchOp.dynamic + LoadQ->local_result.power.readOp.dynamic) + LoadQ->tdp_stats.writeAc.access * LoadQ->local_result.power.writeOp.dynamic; LoadQ->rt_power.reset(); //every memory access invloves at least two operations on LoadQ LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access * (LoadQ->local_result.power.searchOp.dynamic + LoadQ->local_result.power.readOp.dynamic) + LoadQ->rtp_stats.writeAc.access * LoadQ->local_result.power.writeOp.dynamic; } McPATComponent::computeEnergy(); output_data.reset(); if (dcache) { output_data += dcache->output_data; } if (LSQ) { LSQ->output_data.peak_dynamic_power = LSQ->power_t.readOp.dynamic * clockRate; LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic; output_data += LSQ->output_data; } if (LoadQ) { LoadQ->output_data.peak_dynamic_power = LoadQ->power_t.readOp.dynamic * clockRate; LoadQ->output_data.runtime_dynamic_energy = LoadQ->rt_power.readOp.dynamic; output_data += LoadQ->output_data; } } void LoadStoreU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); if (LoadQ) { LoadQ->displayData(indent + 4, plevel); } LSQ->displayData(indent + 4, plevel); } void MemManU::computeEnergy() { if (!exist) return; itlb->tdp_stats.reset(); itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports; itlb->tdp_stats.readAc.miss = 0; itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access - itlb->tdp_stats.readAc.miss; itlb->rtp_stats.reset(); itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses; itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses; itlb->power_t.reset(); //FA spent most power in tag, so use total access not hits itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access * itlb->local_result.power.searchOp.dynamic + itlb->tdp_stats.readAc.miss * itlb->local_result.power.writeOp.dynamic; itlb->rt_power.reset(); //FA spent most power in tag, so use total access not hits itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access * itlb->local_result.power.searchOp.dynamic + itlb->rtp_stats.writeAc.access * itlb->local_result.power.writeOp.dynamic; dtlb->tdp_stats.reset(); dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports * core_stats.LSU_duty_cycle; dtlb->tdp_stats.readAc.miss = 0; dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access - dtlb->tdp_stats.readAc.miss; dtlb->rtp_stats.reset(); dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses + mem_man_stats.dtlb_write_misses; dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses + mem_man_stats.dtlb_read_misses; dtlb->power_t.reset(); //FA spent most power in tag, so use total access not hits dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access * dtlb->local_result.power.searchOp.dynamic + dtlb->tdp_stats.readAc.miss * dtlb->local_result.power.writeOp.dynamic; dtlb->rt_power.reset(); //FA spent most power in tag, so use total access not hits dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access * dtlb->local_result.power.searchOp.dynamic + dtlb->rtp_stats.writeAc.access * dtlb->local_result.power.writeOp.dynamic; output_data.reset(); if (itlb) { itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic * clockRate; itlb->output_data.runtime_dynamic_energy = itlb->rt_power.readOp.dynamic; output_data += itlb->output_data; } if (dtlb) { dtlb->output_data.peak_dynamic_power = dtlb->power_t.readOp.dynamic * clockRate; dtlb->output_data.runtime_dynamic_energy = dtlb->rt_power.readOp.dynamic; output_data += dtlb->output_data; } } void MemManU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); itlb->displayData(indent + 4, plevel); dtlb->displayData(indent + 4, plevel); } void RegFU::computeEnergy() { /* * Architecture RF and physical RF cannot be present at the same time. * Therefore, the RF stats can only refer to either ARF or PRF; * And the same stats can be used for both. */ if (!exist) return; IRF->tdp_stats.reset(); IRF->tdp_stats.readAc.access = core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS * (core_stats.ALU_duty_cycle * 1.1 + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * core_params.num_pipelines; IRF->tdp_stats.writeAc.access = core_params.issueW * (core_stats.ALU_duty_cycle * 1.1 + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * core_params.num_pipelines; IRF->rtp_stats.reset(); IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads; IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes; if (core_params.regWindowing) { IRF->rtp_stats.readAc.access += core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; IRF->rtp_stats.writeAc.access += core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; } IRF->power_t.reset(); IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access * IRF->local_result.power.readOp.dynamic + IRF->tdp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic; IRF->rt_power.reset(); IRF->rt_power.readOp.dynamic += IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic + IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic; FRF->tdp_stats.reset(); FRF->tdp_stats.readAc.access = FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 * core_params.num_fp_pipelines; FRF->tdp_stats.writeAc.access = FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 * core_params.num_fp_pipelines; FRF->rtp_stats.reset(); FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads; FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes; if (core_params.regWindowing) { FRF->rtp_stats.readAc.access += core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; FRF->rtp_stats.writeAc.access += core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; } FRF->power_t.reset(); FRF->power_t.readOp.dynamic += FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; FRF->rt_power.reset(); FRF->rt_power.readOp.dynamic += FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; if (core_params.regWindowing) { RFWIN->tdp_stats.reset(); RFWIN->tdp_stats.readAc.access = 0; RFWIN->tdp_stats.writeAc.access = 0; RFWIN->rtp_stats.reset(); RFWIN->rtp_stats.readAc.access = core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; RFWIN->rtp_stats.writeAc.access = core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; RFWIN->power_t.reset(); RFWIN->power_t.readOp.dynamic += RFWIN->tdp_stats.readAc.access * RFWIN->local_result.power.readOp.dynamic + RFWIN->tdp_stats.writeAc.access * RFWIN->local_result.power.writeOp.dynamic; RFWIN->rt_power.reset(); RFWIN->rt_power.readOp.dynamic += RFWIN->rtp_stats.readAc.access * RFWIN->local_result.power.readOp.dynamic + RFWIN->rtp_stats.writeAc.access * RFWIN->local_result.power.writeOp.dynamic; } output_data.reset(); if (IRF) { IRF->output_data.peak_dynamic_power = IRF->power_t.readOp.dynamic * clockRate; IRF->output_data.subthreshold_leakage_power *= core_params.num_hthreads; IRF->output_data.gate_leakage_power *= core_params.num_hthreads; IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic; output_data += IRF->output_data; } if (FRF) { FRF->output_data.peak_dynamic_power = FRF->power_t.readOp.dynamic * clockRate; FRF->output_data.subthreshold_leakage_power *= core_params.num_hthreads; FRF->output_data.gate_leakage_power *= core_params.num_hthreads; FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic; output_data += FRF->output_data; } if (RFWIN) { RFWIN->output_data.peak_dynamic_power = RFWIN->power_t.readOp.dynamic * clockRate; RFWIN->output_data.runtime_dynamic_energy = RFWIN->rt_power.readOp.dynamic; output_data += RFWIN->output_data; } } void RegFU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); IRF->displayData(indent + 4, plevel); FRF->displayData(indent + 4, plevel); if (core_params.regWindowing) { RFWIN->displayData(indent + 4, plevel); } } void EXECU::computeEnergy() { if (!exist) return; int_bypass->set_params_stats(core_params.execu_int_bypass_ports, core_stats.ALU_cdb_duty_cycle, core_stats.cdb_alu_accesses); intTagBypass->set_params_stats(core_params.execu_int_bypass_ports, core_stats.ALU_cdb_duty_cycle, core_stats.cdb_alu_accesses); if (core_params.num_muls > 0) { int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports, core_stats.MUL_cdb_duty_cycle, core_stats.cdb_mul_accesses); intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports, core_stats.MUL_cdb_duty_cycle, core_stats.cdb_mul_accesses); } if (core_params.num_fpus > 0) { fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports, core_stats.FPU_cdb_duty_cycle, core_stats.cdb_fpu_accesses); fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports, core_stats.FPU_cdb_duty_cycle, core_stats.cdb_fpu_accesses); } McPATComponent::computeEnergy(); if (rfu) { rfu->computeEnergy(); output_data += rfu->output_data; } if (scheu) { scheu->computeEnergy(); output_data += scheu->output_data; } if (fp_u) { fp_u->computeEnergy(); output_data += fp_u->output_data; } if (exeu) { exeu->computeEnergy(); output_data += exeu->output_data; } if (mul) { mul->computeEnergy(); output_data += mul->output_data; } } void EXECU::displayData(uint32_t indent, int plevel) { if (!exist) return; McPATComponent::displayData(indent, plevel); rfu->displayData(indent + 4, plevel); if (scheu) { scheu->displayData(indent + 4, plevel); } exeu->displayData(indent + 4, plevel); if (core_params.num_fpus > 0) { fp_u->displayData(indent + 4, plevel); } if (core_params.num_muls > 0) { mul->displayData(indent + 4, plevel); } } void Core::computeEnergy() { ifu->computeEnergy(); lsu->computeEnergy(); mmu->computeEnergy(); exu->computeEnergy(); if (core_params.core_ty == OOO) { rnu->computeEnergy(); } output_data.reset(); if (ifu) { output_data += ifu->output_data; } if (lsu) { output_data += lsu->output_data; } if (mmu) { output_data += mmu->output_data; } if (exu) { output_data += exu->output_data; } if (rnu) { output_data += rnu->output_data; } if (corepipe) { output_data += corepipe->output_data; } if (undiffCore) { output_data += undiffCore->output_data; } if (l2cache) { output_data += l2cache->output_data; } } InstFetchU ::~InstFetchU() { if (!exist) return; if (IB) { delete IB; IB = NULL; } if (ID_inst) { delete ID_inst; ID_inst = NULL; } if (ID_operand) { delete ID_operand; ID_operand = NULL; } if (ID_misc) { delete ID_misc; ID_misc = NULL; } if (core_params.predictionW > 0) { if (BTB) { delete BTB; BTB = NULL; } if (BPT) { delete BPT; BPT = NULL; } } if (icache) { delete icache; } } BranchPredictor ::~BranchPredictor() { if (!exist) return; if (globalBPT) { delete globalBPT; globalBPT = NULL; } if (localBPT) { delete localBPT; localBPT = NULL; } if (L1_localBPT) { delete L1_localBPT; L1_localBPT = NULL; } if (L2_localBPT) { delete L2_localBPT; L2_localBPT = NULL; } if (chooser) { delete chooser; chooser = NULL; } if (RAS) { delete RAS; RAS = NULL; } } RENAMINGU ::~RENAMINGU() { if (!exist) return; if (iFRAT) { delete iFRAT; iFRAT = NULL; } if (fFRAT) { delete fFRAT; fFRAT = NULL; } if (iRRAT) { delete iRRAT; iRRAT = NULL; } if (iFRAT) { delete iFRAT; iFRAT = NULL; } if (ifreeL) { delete ifreeL; ifreeL = NULL; } if (ffreeL) { delete ffreeL; ffreeL = NULL; } if (idcl) { delete idcl; idcl = NULL; } if (fdcl) { delete fdcl; fdcl = NULL; } if (RAHT) { delete RAHT; RAHT = NULL; } } LoadStoreU ::~LoadStoreU() { if (!exist) return; if (LSQ) { delete LSQ; LSQ = NULL; } if (dcache) { delete dcache; dcache = NULL; } } MemManU ::~MemManU() { if (!exist) return; if (itlb) { delete itlb; itlb = NULL; } if (dtlb) { delete dtlb; dtlb = NULL; } } RegFU ::~RegFU() { if (!exist) return; if (IRF) { delete IRF; IRF = NULL; } if (FRF) { delete FRF; FRF = NULL; } if (RFWIN) { delete RFWIN; RFWIN = NULL; } } SchedulerU ::~SchedulerU() { if (!exist) return; if (int_inst_window) { delete int_inst_window; int_inst_window = NULL; } if (fp_inst_window) { delete int_inst_window; int_inst_window = NULL; } if (ROB) { delete ROB; ROB = NULL; } if (int_instruction_selection) { delete int_instruction_selection; int_instruction_selection = NULL; } if (fp_instruction_selection) { delete fp_instruction_selection; fp_instruction_selection = NULL; } } EXECU ::~EXECU() { if (!exist) return; if (int_bypass) { delete int_bypass; int_bypass = NULL; } if (intTagBypass) { delete intTagBypass; intTagBypass = NULL; } if (int_mul_bypass) { delete int_mul_bypass; int_mul_bypass = NULL; } if (intTag_mul_Bypass) { delete intTag_mul_Bypass; intTag_mul_Bypass = NULL; } if (fp_bypass) { delete fp_bypass; fp_bypass = NULL; } if (fpTagBypass) { delete fpTagBypass; fpTagBypass = NULL; } if (fp_u) { delete fp_u; fp_u = NULL; } if (exeu) { delete exeu; exeu = NULL; } if (mul) { delete mul; mul = NULL; } if (rfu) { delete rfu; rfu = NULL; } if (scheu) { delete scheu; scheu = NULL; } } Core::~Core() { if (ifu) { delete ifu; ifu = NULL; } if (lsu) { delete lsu; lsu = NULL; } if (rnu) { delete rnu; rnu = NULL; } if (mmu) { delete mmu; mmu = NULL; } if (exu) { delete exu; exu = NULL; } if (corepipe) { delete corepipe; corepipe = NULL; } if (undiffCore) { delete undiffCore; undiffCore = NULL; } if (l2cache) { delete l2cache; l2cache = NULL; } } void Core::initialize_params() { memset(&core_params, 0, sizeof(CoreParameters)); core_params.peak_issueW = -1; core_params.peak_commitW = -1; } void Core::initialize_stats() { memset(&core_stats, 0, sizeof(CoreStatistics)); core_stats.IFU_duty_cycle = 1.0; core_stats.ALU_duty_cycle = 1.0; core_stats.FPU_duty_cycle = 1.0; core_stats.MUL_duty_cycle = 1.0; core_stats.ALU_cdb_duty_cycle = 1.0; core_stats.FPU_cdb_duty_cycle = 1.0; core_stats.MUL_cdb_duty_cycle = 1.0; core_stats.pipeline_duty_cycle = 1.0; core_stats.IFU_duty_cycle = 1.0; core_stats.LSU_duty_cycle = 1.0; core_stats.MemManU_D_duty_cycle = 1.0; core_stats.MemManU_I_duty_cycle = 1.0; } void Core::set_core_param() { initialize_params(); initialize_stats(); int num_children = xml_data->nChildNode("param"); int i; for (i = 0; i < num_children; i++) { XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); XMLCSTR node_name = paramNode->getAttribute("name"); XMLCSTR value = paramNode->getAttribute("value"); if (!node_name) warnMissingParamName(paramNode->getAttribute("id")); ASSIGN_STR_IF("name", name); ASSIGN_INT_IF("opt_local", core_params.opt_local); ASSIGN_FP_IF("clock_rate", core_params.clockRate); ASSIGN_INT_IF("instruction_length", core_params.instruction_length); ASSIGN_INT_IF("opcode_width", core_params.opcode_width); ASSIGN_INT_IF("x86", core_params.x86); ASSIGN_INT_IF("Embedded", core_params.Embedded); ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type); ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length); ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads); ASSIGN_INT_IF("fetch_width", core_params.fetchW); ASSIGN_INT_IF("decode_width", core_params.decodeW); ASSIGN_INT_IF("issue_width", core_params.issueW); ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW); ASSIGN_INT_IF("commit_width", core_params.commitW); ASSIGN_INT_IF("prediction_width", core_params.predictionW); ASSIGN_INT_IF("ALU_per_core", core_params.num_alus); ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus); ASSIGN_INT_IF("MUL_per_core", core_params.num_muls); ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW); ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty, Scheduler_type); ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type); ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size); ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size); ASSIGN_INT_IF("ROB_size", core_params.ROB_size); ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc); ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks); ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width); ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc); ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks); ASSIGN_INT_IF("register_window_size", core_params.register_window_size); ASSIGN_INT_IF("register_window_throughput", core_params.register_window_throughput); ASSIGN_INT_IF("register_window_latency", core_params.register_window_latency); ASSIGN_INT_IF("register_window_assoc", core_params.register_window_assoc); ASSIGN_INT_IF("register_window_nbanks", core_params.register_window_nbanks); ASSIGN_INT_IF("register_window_tag_width", core_params.register_window_tag_width); ASSIGN_INT_IF("register_window_rw_ports", core_params.register_window_rw_ports); ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size); ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc); ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks); ASSIGN_INT_IF("phy_Regs_IRF_tag_width", core_params.phy_Regs_IRF_tag_width); ASSIGN_INT_IF("phy_Regs_IRF_rd_ports", core_params.phy_Regs_IRF_rd_ports); ASSIGN_INT_IF("phy_Regs_IRF_wr_ports", core_params.phy_Regs_IRF_wr_ports); ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size); ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc); ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks); ASSIGN_INT_IF("phy_Regs_FRF_tag_width", core_params.phy_Regs_FRF_tag_width); ASSIGN_INT_IF("phy_Regs_FRF_rd_ports", core_params.phy_Regs_FRF_rd_ports); ASSIGN_INT_IF("phy_Regs_FRF_wr_ports", core_params.phy_Regs_FRF_wr_ports); ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks); ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports); ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks); ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports); ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks); ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports); ASSIGN_INT_IF("memory_ports", core_params.memory_ports); ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size); ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc); ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks); ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size); ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc); ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks); ASSIGN_INT_IF("instruction_window_size", core_params.instruction_window_size); ASSIGN_INT_IF("fp_instruction_window_size", core_params.fp_instruction_window_size); ASSIGN_INT_IF("instruction_buffer_size", core_params.instruction_buffer_size); ASSIGN_INT_IF("instruction_buffer_assoc", core_params.instruction_buffer_assoc); ASSIGN_INT_IF("instruction_buffer_nbanks", core_params.instruction_buffer_nbanks); ASSIGN_INT_IF("instruction_buffer_tag_width", core_params.instruction_buffer_tag_width); ASSIGN_INT_IF("number_instruction_fetch_ports", core_params.number_instruction_fetch_ports); ASSIGN_INT_IF("RAS_size", core_params.RAS_size); ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt, Wire_type); ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type); ASSIGN_INT_IF("execu_int_bypass_ports", core_params.execu_int_bypass_ports); ASSIGN_INT_IF("execu_mul_bypass_ports", core_params.execu_mul_bypass_ports); ASSIGN_INT_IF("execu_fp_bypass_ports", core_params.execu_fp_bypass_ports); ASSIGN_ENUM_IF("execu_bypass_wire_type", core_params.execu_bypass_wire_type, Wire_type); ASSIGN_FP_IF("execu_bypass_base_width", core_params.execu_bypass_base_width); ASSIGN_FP_IF("execu_bypass_base_height", core_params.execu_bypass_base_height); ASSIGN_INT_IF("execu_bypass_start_wiring_level", core_params.execu_bypass_start_wiring_level); ASSIGN_FP_IF("execu_bypass_route_over_perc", core_params.execu_bypass_route_over_perc); ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator); ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages); ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages); ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines); ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines); ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint); ASSIGN_INT_IF("perThreadState", core_params.perThreadState); ASSIGN_INT_IF("instruction_length", core_params.instruction_length); else { warnUnrecognizedParam(node_name); } } // Change from MHz to Hz core_params.clockRate *= 1e6; clockRate = core_params.clockRate; core_params.peak_commitW = core_params.peak_issueW; core_params.fp_decodeW = core_params.fp_issueW; num_children = xml_data->nChildNode("stat"); for (i = 0; i < num_children; i++) { XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); XMLCSTR node_name = statNode->getAttribute("name"); XMLCSTR value = statNode->getAttribute("value"); if (!node_name) warnMissingStatName(statNode->getAttribute("id")); ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle); ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle); ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle); ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle); ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle); ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle); ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle); ASSIGN_FP_IF("total_cycles", core_stats.total_cycles); ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles); ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles); ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle); ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle); ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle); ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle); ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle); ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses); ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses); ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses); ASSIGN_FP_IF("function_calls", core_stats.function_calls); ASSIGN_FP_IF("total_instructions", core_stats.total_instructions); ASSIGN_FP_IF("int_instructions", core_stats.int_instructions); ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions); ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions); ASSIGN_FP_IF("branch_mispredictions", core_stats.branch_mispredictions); ASSIGN_FP_IF("load_instructions", core_stats.load_instructions); ASSIGN_FP_IF("store_instructions", core_stats.store_instructions); ASSIGN_FP_IF("committed_instructions", core_stats.committed_instructions); ASSIGN_FP_IF("committed_int_instructions", core_stats.committed_int_instructions); ASSIGN_FP_IF("committed_fp_instructions", core_stats.committed_fp_instructions); ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads); ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes); ASSIGN_FP_IF("rename_reads", core_stats.rename_reads); ASSIGN_FP_IF("rename_writes", core_stats.rename_writes); ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads); ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes); ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads); ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes); ASSIGN_FP_IF("inst_window_wakeup_accesses", core_stats.inst_window_wakeup_accesses); ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads); ASSIGN_FP_IF("fp_inst_window_writes", core_stats.fp_inst_window_writes); ASSIGN_FP_IF("fp_inst_window_wakeup_accesses", core_stats.fp_inst_window_wakeup_accesses); ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads); ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads); ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes); ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes); ASSIGN_FP_IF("context_switches", core_stats.context_switches); ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses); ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses); ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses); else { warnUnrecognizedStat(node_name); } } // Initialize a few variables core_params.multithreaded = core_params.num_hthreads > 1 ? true : false; core_params.pc_width = virtual_address_width; core_params.v_address_width = virtual_address_width; core_params.p_address_width = physical_address_width; core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32; core_params.fp_data_width = core_params.int_data_width; core_params.arch_ireg_width = int(ceil(log2(core_params.archi_Regs_IRF_size))); core_params.arch_freg_width = int(ceil(log2(core_params.archi_Regs_FRF_size))); core_params.num_IRF_entry = core_params.archi_Regs_IRF_size; core_params.num_FRF_entry = core_params.archi_Regs_FRF_size; if (core_params.instruction_length <= 0) { errorNonPositiveParam("instruction_length"); } if (core_params.num_hthreads <= 0) { errorNonPositiveParam("number_hardware_threads"); } if (core_params.opcode_width <= 0) { errorNonPositiveParam("opcode_width"); } if (core_params.instruction_buffer_size <= 0) { errorNonPositiveParam("instruction_buffer_size"); } if (core_params.number_instruction_fetch_ports <= 0) { errorNonPositiveParam("number_instruction_fetch_ports"); } if (core_params.peak_issueW <= 0) { errorNonPositiveParam("peak_issue_width"); } else { assert(core_params.peak_commitW > 0); } if (core_params.core_ty == OOO) { if (core_params.scheu_ty == PhysicalRegFile) { core_params.phy_ireg_width = int(ceil(log2(core_params.phy_Regs_IRF_size))); core_params.phy_freg_width = int(ceil(log2(core_params.phy_Regs_FRF_size))); core_params.num_ifreelist_entries = core_params.num_IRF_entry = core_params.phy_Regs_IRF_size; core_params.num_ffreelist_entries = core_params.num_FRF_entry = core_params.phy_Regs_FRF_size; } else if (core_params.scheu_ty == ReservationStation) { core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size))); core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size))); core_params.num_ifreelist_entries = core_params.ROB_size; core_params.num_ffreelist_entries = core_params.ROB_size; } } core_params.regWindowing = (core_params.register_window_size > 0 && core_params.core_ty == Inorder) ? true : false; if (core_params.regWindowing) { if (core_params.register_window_throughput <= 0) { errorNonPositiveParam("register_window_throughput"); } else if (core_params.register_window_latency <= 0) { errorNonPositiveParam("register_window_latency"); } } set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads, core_params.num_hthreads, 0); if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) { cout << "Invalid Core Type" << endl; exit(0); } if (!((core_params.scheu_ty == PhysicalRegFile) || (core_params.scheu_ty == ReservationStation))) { cout << "Invalid OOO Scheduler Type" << endl; exit(0); } if (!((core_params.rm_ty == RAMbased) || (core_params.rm_ty == CAMbased))) { cout << "Invalid OOO Renaming Type" << endl; exit(0); } }