summaryrefslogtreecommitdiff
path: root/ext/mcpat/core.cc
diff options
context:
space:
mode:
authorYasuko Eckert <yasuko.eckert@amd.com>2014-06-03 13:32:59 -0700
committerYasuko Eckert <yasuko.eckert@amd.com>2014-06-03 13:32:59 -0700
commit0deef376d96bfe0a3a2496714ac22471d9ee818a (patch)
tree43d383a5bc4315863240dd61f7a4077ce2ac86e7 /ext/mcpat/core.cc
parent1104199115a6ff5ed04f92ba6391f18728765014 (diff)
downloadgem5-0deef376d96bfe0a3a2496714ac22471d9ee818a.tar.xz
ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint.
Diffstat (limited to 'ext/mcpat/core.cc')
-rw-r--r--ext/mcpat/core.cc7640
1 files changed, 3757 insertions, 3883 deletions
diff --git a/ext/mcpat/core.cc b/ext/mcpat/core.cc
index ba9106061..b25c23cac 100644
--- a/ext/mcpat/core.cc
+++ b/ext/mcpat/core.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -33,491 +34,570 @@
#include <cassert>
#include <cmath>
#include <iostream>
+#include <sstream>
#include <string>
-#include "XML_Parse.h"
#include "basic_circuit.h"
+#include "basic_components.h"
+#include "common.h"
#include "const.h"
#include "core.h"
#include "io.h"
#include "parameter.h"
-//#include "globalvar.h"
-
-InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IB (0),
- BTB (0),
- ID_inst (0),
- ID_operand (0),
- ID_misc (0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false, is_default = true;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
- //Assuming all L1 caches are virtually idxed physically tagged.
- //cache
-
- size = (int)XML->sys.core[ithCore].icache.icache_config[0];
- line = (int)XML->sys.core[ithCore].icache.icache_config[1];
- assoc = (int)XML->sys.core[ithCore].icache.icache_config[2];
- banks = (int)XML->sys.core[ithCore].icache.icache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- // interface_ip.obj_func_dyn_energy = 0;
- // interface_ip.obj_func_dyn_power = 0;
- // interface_ip.obj_func_leak_power = 0;
- // interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
- scktRatio = g_tp.sckt_co_eff;
- chip_PR_overhead = g_tp.chip_layout_overhead;
- macro_PR_overhead = g_tp.macro_layout_overhead;
- icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
- area.set_area(area.get_area()+ icache.caches->local_result.area);
- //output_data_csv(icache.caches.local_result);
-
-
- /*
- *iCache controllers
- *miss buffer Each MSHR contains enough state
- *to handle one or more accesses of any type to a single memory line.
- *Due to the generality of the MSHR mechanism,
- *the amount of state involved is non-trivial:
- *including the address, pointers to the cache entry and destination register,
- *written data, and various other pieces of state.
- */
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
- area.set_area(area.get_area()+ icache.missb->local_result.area);
- //output_data_csv(icache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = icache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
- area.set_area(area.get_area()+ icache.ifb->local_result.area);
- //output_data_csv(icache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
- //output_data_csv(icache.prefetchb.local_result);
-
- //Instruction buffer
- data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- interface_ip.pure_cam = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
- XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
- area.set_area(area.get_area()+ IB->local_result.area);
- //output_data_csv(IB.IB.local_result);
-
- // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
- // inst_decoder.init_decoder(is_default, &interface_ip);
- // inst_decoder.full_decoder_power();
-
- if (coredynp.predictionW>0)
- {
- /*
- * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
- * It is only a cache without all the buffers in the cache controller since it is more like a
- * look up table than a cache with cache controller. When access miss, no load from other places
- * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
- * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
- * 2) When BPT@ID stage predicts differently than BTB
- * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
- * 4) when EXEU find out wrong target has been provided from BTB.
- *
- */
- size = XML->sys.core[ithCore].BTB.BTB_config[0];
- line = XML->sys.core[ithCore].BTB.BTB_config[1];
- assoc = XML->sys.core[ithCore].BTB.BTB_config[2];
- banks = XML->sys.core[ithCore].BTB.BTB_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
-// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- interface_ip.pure_cam = false;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:size;
- interface_ip.line_sz = debug?64:line;
- interface_ip.assoc = debug?8:assoc;
- interface_ip.nbanks = debug?1:banks;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
- area.set_area(area.get_area()+ BTB->local_result.area);
- ///cout<<"area="<<area<<endl;
-
- BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp);
- area.set_area(area.get_area()+ BPT->area.get_area());
- }
-
- ID_inst = new inst_decoder(is_default, &interface_ip,
- coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_operand = new inst_decoder(is_default, &interface_ip,
- coredynp.arch_ireg_width, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_misc = new inst_decoder(is_default, &interface_ip,
- 8/* Prefix field etc upto 14B*/, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
- //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
- //So the dynamic power should be multiplied by a few times.
- area.set_area(area.get_area()+ (ID_inst->area.get_area()
- +ID_operand->area.get_area()
- +ID_misc->area.get_area())*coredynp.decodeW);
-}
+int RegFU::RFWIN_ACCESS_MULTIPLIER = 16;
+
+// The five bits are: busy, Issued, Finished, speculative, valid
+int SchedulerU::ROB_STATUS_BITS = 5;
+
+InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL),
+ BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int idx, tag, data, size, line, assoc, banks;
+ bool is_default = true;
+
+ clockRate = core_params.clockRate;
+ name = "Instruction Fetch Unit";
+ // Check if there is an icache child:
+ int i;
+ icache = NULL;
+ for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR name = childXML->getAttribute("name");
+ if (strcmp(name, "Instruction Cache") == 0 ||
+ strcmp(name, "icache") == 0) {
+ icache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(icache);
+ }
+ }
+ }
+ set_params_stats();
-BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- globalBPT(0),
- localBPT(0),
- L1_localBPT(0),
- L2_localBPT(0),
- chooser(0),
- RAS(0),
- exist(exist_)
-{
+ //Instruction buffer
+ data = core_params.instruction_length * core_params.peak_issueW;
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = core_params.num_hthreads * core_params.instruction_buffer_size *
+ line;
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.instruction_buffer_assoc;
+ interface_ip.nbanks = core_params.instruction_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0;
+ interface_ip.tag_w = core_params.instruction_buffer_tag_width;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports =
+ core_params.number_instruction_fetch_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+
+ IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ IB->area.set_area(IB->area.get_area() + IB->local_result.area);
+ area.set_area(area.get_area() + IB->local_result.area);
+
+ if (core_params.predictionW > 0) {
/*
- * Branch Predictor, accessed during ID stage.
- * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
- * including global predictor, local two level predictor, and Chooser.
- * The Branch predictor also includes a RAS (return address stack) for function calls
- * Branch predictors are tagged by thread ID and modeled as 1-way associative $
- * However RAS return address stacks are duplicated for each thread.
- * TODO:Data Width need to be computed more precisely *
+ * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
+ * It is only a cache without all the buffers in the cache controller since it is more like a
+ * look up table than a cache with cache controller. When access miss, no load from other places
+ * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
+ * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
+ * 2) When BPT@ID stage predicts differently than BTB
+ * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
+ * 4) when EXEU find out wrong target has been provided from BTB.
+ *
*/
- if (!exist) return;
- int tag, data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.assoc = 1;
- interface_ip.pure_cam = false;
- if (coredynp.multithreaded)
- {
-
- tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
-
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- }
- else
- {
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
-
- }
- //Global predictor
- data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ size = inst_fetch_params.btb_size;
+ line = inst_fetch_params.btb_block_size;
+ assoc = inst_fetch_params.btb_assoc;
+ banks = inst_fetch_params.btb_num_banks;
+ idx = int(ceil(log2(size / line / assoc)));
+ tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads)))
+ + EXTRA_TAG_BITS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = assoc;
+ interface_ip.nbanks = banks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 1;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
- globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
- area.set_area(area.get_area()+ globalBPT->local_result.area);
-
- //Local BPT (Level 1)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
- area.set_area(area.get_area()+ L1_localBPT->local_result.area);
-
- //Local BPT (Level 2)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
- area.set_area(area.get_area()+ L2_localBPT->local_result.area);
-
- //Chooser
- data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
- chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
- area.set_area(area.get_area()+ chooser->local_result.area);
-
- //RAS return address stacks are Duplicated for each thread.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- data = int(ceil(coredynp.pc_width/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate;
+ interface_ip.latency = inst_fetch_params.btb_latency / clockRate;
+
+ BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + BTB->local_result.area);
+
+ BPT = new BranchPredictor(xml_data, &interface_ip,
+ core_params, core_stats);
+ area.set_area(area.get_area() + BPT->area.get_area());
+ }
+
+ ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder",
+ is_default, &interface_ip,
+ core_params.opcode_width,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+
+ ID_operand = new InstructionDecoder(xml_data,
+ "Instruction Operand Decoder",
+ is_default, &interface_ip,
+ core_params.arch_ireg_width,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+
+ ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder",
+ is_default, &interface_ip,
+ core_params.micro_opcode_length,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+ area.set_area(area.get_area()+ (ID_inst->area.get_area()
+ + ID_operand->area.get_area()
+ + ID_misc->area.get_area())
+ * core_params.decodeW);
+}
+
+void
+InstFetchU::set_params_stats() {
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ memset(&inst_fetch_params,0,sizeof(InstFetchParameters));
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "BranchTargetBuffer") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("size", inst_fetch_params.btb_size);
+ ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size);
+ ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc);
+ ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks);
+ ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency);
+ ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput);
+ ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("read_accesses",
+ inst_fetch_stats.btb_read_accesses);
+ ASSIGN_FP_IF("write_accesses",
+ inst_fetch_stats.btb_write_accesses);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ }
+ }
+
+ // Parameter sanity check
+ if (inst_fetch_params.btb_size <= 0) {
+ errorNonPositiveParam("size");
+ }
+
+ if (inst_fetch_params.btb_block_size <= 0) {
+ errorNonPositiveParam("block_size");
+ }
+
+ if (inst_fetch_params.btb_assoc <= 0) {
+ errorNonPositiveParam("assoc");
+ }
+
+ if (inst_fetch_params.btb_num_banks <= 0) {
+ errorNonPositiveParam("num_banks");
+ }
+}
+
+BranchPredictor::BranchPredictor(XMLNode* _xml_data,
+ InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_)
+ : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL),
+ L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int size;
+
+ clockRate = core_params.clockRate;
+ name = "Branch Predictor";
+
+ // Common interface parameters for the branch predictor structures
+ interface_ip.pure_cam = false;
+
+ if (core_params.multithreaded) {
+ tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS);
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ } else {
+ interface_ip.specific_tag = 0;
+ interface_ip.tag_w = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ }
+
+ // Parse params and stats from XML
+ set_params_stats();
+
+ // Common interface parameters for the branch predictor structures
+ interface_ip.assoc = branch_pred_params.assoc;
+ interface_ip.nbanks = branch_pred_params.nbanks;
+
+ //Global predictor
+ data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE));
+ size = data * branch_pred_params.global_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + globalBPT->local_result.area);
+
+ //Local BPT (Level 1)
+ data = int(ceil(branch_pred_params.local_l1_predictor_size /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.local_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ L1_localBPT = new ArrayST(xml_data, &interface_ip,
+ "Local Predictor, Level 1",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ L1_localBPT->area.set_area(L1_localBPT->area.get_area() +
+ L1_localBPT->local_result.area);
+ area.set_area(area.get_area()+ L1_localBPT->local_result.area);
+
+ //Local BPT (Level 2)
+ data = int(ceil(branch_pred_params.local_l2_predictor_size /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.local_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ L2_localBPT = new ArrayST(xml_data, &interface_ip,
+ "Local Predictor, Level 2",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + L2_localBPT->local_result.area);
+
+ //Chooser
+ data = int(ceil(branch_pred_params.chooser_predictor_bits /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.chooser_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + chooser->local_result.area);
+
+ //RAS return address stacks are Duplicated for each thread.
+ data = int(ceil(core_params.pc_width / BITS_PER_BYTE));
+ size = data * core_params.RAS_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate,
+ core_params.opt_local, core_params.core_ty);
+ RAS->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + RAS->local_result.area *
+ core_params.num_hthreads);
+
+}
+
+void
+BranchPredictor::set_params_stats() {
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "BranchPredictor") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("assoc", branch_pred_params.assoc);
+ ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks);
+ ASSIGN_INT_IF("local_l1_predictor_size",
+ branch_pred_params.local_l1_predictor_size);
+ ASSIGN_INT_IF("local_l2_predictor_size",
+ branch_pred_params.local_l2_predictor_size);
+ ASSIGN_INT_IF("local_predictor_entries",
+ branch_pred_params.local_predictor_entries);
+ ASSIGN_INT_IF("global_predictor_entries",
+ branch_pred_params.global_predictor_entries);
+ ASSIGN_INT_IF("global_predictor_bits",
+ branch_pred_params.global_predictor_bits);
+ ASSIGN_INT_IF("chooser_predictor_entries",
+ branch_pred_params.chooser_predictor_entries);
+ ASSIGN_INT_IF("chooser_predictor_bits",
+ branch_pred_params.chooser_predictor_bits);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ // The core reads in the number of branches and the number of
+ // function calls and these values are passed through the
+ // core_stats variable, so we don't need to read them in here
+ }
+ }
+}
+
+SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), int_inst_window(NULL),
+ fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL),
+ fp_instruction_selection(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int size;
+ int line;
+ bool is_default = true;
+ string tmp_name;
+
+ clockRate = core_params.clockRate;
+ name = "Instruction Scheduler";
+ if ((core_params.core_ty == Inorder && core_params.multithreaded)) {
+ //Instruction issue queue, in-order multi-issue or multithreaded
+ //processor also has this structure. Unified window for Inorder
+ //processors
+ //This tag width is the normal thread state bits based on
+ //Niagara Design
+ tag = int(log2(core_params.num_hthreads) * core_params.perThreadState);
+ data = core_params.instruction_length;
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = core_params.instruction_window_size * line;
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ //NOTE: x86 inst can be very lengthy, up to 15B.
+ //Source: Intel® 64 and IA-32 Architectures
+ //Software Developer’s Manual
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_issueW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
interface_ip.num_se_rd_ports = 0;
- RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
- RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
- area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
+ interface_ip.num_search_ports = core_params.peak_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ int_inst_window = new ArrayST(xml_data, &interface_ip,
+ "InstFetchQueue", Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ int_inst_window->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + int_inst_window->local_result.area *
+ core_params.num_pipelines);
+ Iw_height = int_inst_window->local_result.cache_ht;
-}
+ /*
+ * selection logic
+ * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
+ * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
+ * at the issue stage.
+ */
-SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- int_inst_window(0),
- fp_inst_window(0),
- ROB(0),
- instruction_selection(0),
- exist(exist_)
- {
- if (!exist) return;
- int tag, data;
- bool is_default=true;
- string tmp_name;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
- {
- //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
- tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
- data = XML->sys.core[ithCore].instruction_length;
- //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
- //Software Developer’s Manual
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- //output_data_csv(iRS.RS.local_result);
- Iw_height =int_inst_window->local_result.cache_ht;
-
- /*
- * selection logic
- * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
- * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
- * at the issue stage.
- */
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
- &interface_ip, Core_device, coredynp.core_ty);
+ int_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.instruction_window_size,
+ core_params.peak_issueW *
+ core_params.num_hthreads,
+ &interface_ip,
+ "Int Instruction Selection Logic",
+ core_stats.inst_window_wakeup_accesses,
+ clockRate, Core_device, core_params.core_ty);
+
+ if (core_params.fp_instruction_window_size > 0) {
+ fp_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.fp_instruction_window_size,
+ core_params.fp_issueW *
+ core_params.num_hthreads,
+ &interface_ip,
+ "FP Instruction Selection Logic",
+ core_stats.fp_inst_window_wakeup_accesses,
+ clockRate, Core_device,
+ core_params.core_ty);
}
+ }
- if (coredynp.core_ty==OOO)
- {
+ if (core_params.core_ty == OOO) {
/*
* CAM based instruction window
* For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
@@ -525,3611 +605,3405 @@ SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* in
* It is written once and read twice(two operands) before an instruction can be issued.
* X86 instruction can be very long up to 15B. add instruction length in XML
*/
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
- tmp_name = "InstIssueQueue";
- }
- else
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
- 2*coredynp.int_data_width)/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
-
- tmp_name = "IntReservationStation";
- }
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 2*1.0/clockRate;
- interface_ip.latency = 2*1.0/clockRate;
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ tag = core_params.phy_ireg_width;
+ data = int((ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_ireg_width -
+ core_params.arch_ireg_width)) /
+ (double)NUM_SOURCE_OPERANDS) /
+ BITS_PER_BYTE));
+ tmp_name = "Integer Instruction Window";
+ } else {
+ tag = core_params.phy_ireg_width;
+ data = int(ceil(((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_ireg_width -
+ core_params.arch_ireg_width) +
+ 2 * core_params.int_data_width) /
+ (double)NUM_SOURCE_OPERANDS) /
+ BITS_PER_BYTE));
+ tmp_name = "Integer Reservation Station";
+ }
+
+ size = data * core_params.instruction_window_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_issueW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.peak_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
+ interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
+ int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name,
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ int_inst_window->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + int_inst_window->local_result.area *
+ core_params.num_pipelines);
+ Iw_height = int_inst_window->local_result.cache_ht;
+
+ //FU inst window
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width;
+ data = int(ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_freg_width -
+ core_params.arch_freg_width)) / BITS_PER_BYTE));
+ tmp_name = "FP Instruction Window";
+ } else {
+ tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width;
+ data = int(ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_freg_width -
+ core_params.arch_freg_width) +
+ NUM_SOURCE_OPERANDS * core_params.fp_data_width) /
+ BITS_PER_BYTE));
+ tmp_name = "FP Reservation Station";
+ }
+
+ size = data * core_params.fp_instruction_window_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- Iw_height =int_inst_window->local_result.cache_ht;
- //FU inst window
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
- tmp_name = "FPIssueQueue";
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.fp_issueW;
+ interface_ip.num_wr_ports = core_params.fp_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.fp_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fp_inst_window =
+ new ArrayST(xml_data, &interface_ip, tmp_name, Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ fp_inst_window->output_data.area *= core_params.num_fp_pipelines;
+ area.set_area(area.get_area() + fp_inst_window->local_result.area
+ *core_params.num_fp_pipelines);
+ fp_Iw_height = fp_inst_window->local_result.cache_ht;
+
+ if (core_params.ROB_size > 0) {
+ /*
+ * if ROB_size = 0, then the target processor does not support hardware-based
+ * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
+ * means branch must be resolved before instruction issued into instruction window, since
+ * there is no change to flush miss-predict branch path after instructions are issued in this situation.
+ *
+ * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
+ * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
+ * However, this approach is abandoned due to its high power and poor scalablility.
+ * McPAT uses current implementation of ROB as circular buffer.
+ * ROB is written once when instruction is issued and read once when the instruction is committed. *
+ */
+ int robExtra = int(ceil(ROB_STATUS_BITS +
+ log2(core_params.num_hthreads)));
+
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ //PC is to id the instruction for recover exception.
+ //inst is used to map the renamed dest. registers. so that
+ //commit stage can know which reg/RRAT to update
+ data = int(ceil((robExtra + core_params.pc_width +
+ core_params.phy_ireg_width) / BITS_PER_BYTE));
+ } else {
+ //in RS based OOO, ROB also contains value of destination reg
+ data = int(ceil((robExtra + core_params.pc_width +
+ core_params.phy_ireg_width +
+ core_params.fp_data_width) / BITS_PER_BYTE));
+ }
+
+ interface_ip.cache_sz = data * core_params.ROB_size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.ROB_assoc;
+ interface_ip.nbanks = core_params.ROB_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.ROB_tag_width > 0;
+ interface_ip.tag_w = core_params.ROB_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_commitW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ROB->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + ROB->local_result.area *
+ core_params.num_pipelines);
+ ROB_height = ROB->local_result.cache_ht;
+ }
+
+ int_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.instruction_window_size,
+ core_params.peak_issueW, &interface_ip,
+ "Int Instruction Selection Logic",
+ core_stats.inst_window_wakeup_accesses,
+ clockRate, Core_device, core_params.core_ty);
+
+ if (core_params.fp_instruction_window_size > 0) {
+ fp_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.fp_instruction_window_size,
+ core_params.fp_issueW, &interface_ip,
+ "FP Instruction Selection Logic",
+ core_stats.fp_inst_window_wakeup_accesses,
+ clockRate, Core_device,
+ core_params.core_ty);
}
- else
- {
- tag = 2*coredynp.phy_ireg_width;
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
- 2*coredynp.fp_data_width)/8.0));
- tmp_name = "FPReservationStation";
+
+ }
+}
+
+LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int line;
+ int size;
+ int ldst_opcode = core_params.opcode_width;
+
+ clockRate = core_params.clockRate;
+ name = "Load/Store Unit";
+
+ // Check if there is a dcache child:
+ int i;
+ dcache = NULL;
+ for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR name = childXML->getAttribute("name");
+ if (strcmp(name, "Data Cache") == 0 ||
+ strcmp(name, "dcache") == 0) {
+ dcache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(dcache);
+ }
}
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ }
+
+ /*
+ * LSU--in-order processors do not have separate load queue: unified lsq
+ * partitioned among threads
+ * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
+ */
+ tag = ldst_opcode + virtual_address_width +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ line = int(ceil(data_path_width / BITS_PER_BYTE));
+ size = core_params.store_buffer_size * line * core_params.num_hthreads;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.store_buffer_assoc;
+ interface_ip.nbanks = core_params.store_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + LSQ->local_result.area);
+ area.set_area(area.get_area()*cdb_overhead);
+ lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead);
+
+ if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) {
+ tag = ldst_opcode + virtual_address_width +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ line = int(ceil(data_path_width / BITS_PER_BYTE));
+ size = core_params.load_buffer_size * line * core_params.num_hthreads;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.load_buffer_assoc;
+ interface_ip.nbanks = core_params.load_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_issueW;
- interface_ip.num_wr_ports = coredynp.fp_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.fp_issueW;
- fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- fp_Iw_height =fp_inst_window->local_result.cache_ht;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- /*
- * if ROB_size = 0, then the target processor does not support hardware-based
- * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
- * means branch must be resolved before instruction issued into instruction window, since
- * there is no change to flush miss-predict branch path after instructions are issued in this situation.
- *
- * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
- * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
- * However, this approach is abandoned due to its high power and poor scalablility.
- * McPAT uses current implementation of ROB as circular buffer.
- * ROB is written once when instruction is issued and read once when the instruction is committed. *
- */
- int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
- //5 bits are: busy, Issued, Finished, speculative, valid
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- //PC is to id the instruction for recover exception.
- //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
- data = int(ceil((robExtra+coredynp.pc_width +
- coredynp.phy_ireg_width)/8.0));
- }
- else
- {
- //in RS based OOO, ROB also contains value of destination reg
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
- data = int(ceil((robExtra + coredynp.pc_width +
- coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
- }
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_commitW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = 0;
- ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- ROB_height =ROB->local_result.cache_ht;
- }
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device,
+ clockRate, core_params.opt_local,
+ core_params.core_ty);
+ LoadQ->area.set_area(LoadQ->area.get_area() +
+ LoadQ->local_result.area);
+ area.set_area(area.get_area()*cdb_overhead);
+ lsq_height = (LSQ->local_result.cache_ht +
+ LoadQ->local_result.cache_ht) * sqrt(cdb_overhead);
}
+
}
-LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- LSQ(0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false;
- int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];
-
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- //Dcache
- size = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
- line = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
- assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
- banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
- area.set_area(area.get_area()+ dcache.caches->local_result.area);
- //output_data_csv(dcache.caches.local_result);
-
- //dCache controllers
- //miss buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
- area.set_area(area.get_area()+ dcache.missb->local_result.area);
- //output_data_csv(dcache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
- area.set_area(area.get_area()+ dcache.ifb->local_result.area);
- //output_data_csv(dcache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
- //output_data_csv(dcache.prefetchb.local_result);
-
- //WBB
-
- if (cache_p==Write_back)
- {
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
- area.set_area(area.get_area()+ dcache.wbb->local_result.area);
- //output_data_csv(dcache.wbb.local_result);
- }
-
- /*
- * LSU--in-order processors do not have separate load queue: unified lsq
- * partitioned among threads
- * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
- */
- tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
- data = XML->sys.machine_bits;
- interface_ip.is_cache = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(LSQ.LSQ.local_result);
- lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
-
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(LoadQ.LoadQ.local_result);
- lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
- }
+MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int line;
+
+ clockRate = core_params.clockRate;
+ name = "Memory Management Unit";
+
+ set_params_stats();
+
+ // These are shared between ITLB and DTLB
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ //Itlb TLBs are partioned among threads according to Nigara and Nehalem
+ tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = mem_man_params.itlb_number_entries * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = mem_man_params.itlb_assoc;
+ interface_ip.nbanks = mem_man_params.itlb_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.number_instruction_fetch_ports;
+ interface_ip.throughput = mem_man_params.itlb_throughput / clockRate;
+ interface_ip.latency = mem_man_params.itlb_latency / clockRate;
+ itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + itlb->local_result.area);
+
+ //dtlb
+ tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = mem_man_params.dtlb_assoc;
+ interface_ip.nbanks = mem_man_params.dtlb_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate;
+ interface_ip.latency = mem_man_params.dtlb_latency / clockRate;
+ dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + dtlb->local_result.area);
}
-MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- itlb(0),
- dtlb(0),
- exist(exist_)
-{
- if (!exist) return;
- int tag, data;
- bool debug= false;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.specific_tag = 1;
- //Itlb TLBs are partioned among threads according to Nigara and Nehalem
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
- area.set_area(area.get_area()+ itlb->local_result.area);
- //output_data_csv(itlb.tlb.local_result);
-
- //dtlb
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
- area.set_area(area.get_area()+ dtlb->local_result.area);
- //output_data_csv(dtlb.tlb.local_result);
+void
+MemManU::set_params_stats() {
+ memset(&mem_man_params, 0, sizeof(MemoryManagementParams));
+ memset(&mem_man_stats, 0, sizeof(MemoryManagementStats));
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "InstructionTLB") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("number_entries",
+ mem_man_params.itlb_number_entries);
+ ASSIGN_FP_IF("latency", mem_man_params.itlb_latency);
+ ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput);
+ ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc);
+ ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("total_accesses",
+ mem_man_stats.itlb_total_accesses);
+ ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses);
+ ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ } STRCMP(type, "DataTLB") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("number_entries",
+ mem_man_params.dtlb_number_entries);
+ ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency);
+ ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput);
+ ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc);
+ ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("read_accesses",
+ mem_man_stats.dtlb_read_accesses);
+ ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses);
+ ASSIGN_FP_IF("write_accesses",
+ mem_man_stats.dtlb_write_accesses);
+ ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses);
+ ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ }
+ }
}
-RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IRF (0),
- FRF (0),
- RFWIN (0),
- exist(exist_)
- {
- /*
- * processors have separate architectural register files for each thread.
- * therefore, the bypass buses need to travel across all the register files.
- */
- if (!exist) return;
- int data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- //**********************************IRF***************************************
- data = coredynp.int_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
- area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(IRF.RF.local_result);
-
- //**********************************FRF***************************************
- data = coredynp.fp_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ /*
+ * processors have separate architectural register files for each thread.
+ * therefore, the bypass buses need to travel across all the register files.
+ */
+ if (!exist) return;
+ int data;
+ int line;
+
+ clockRate = core_params.clockRate;
+ name = "Register File Unit";
+
+ //**********************************IRF************************************
+ data = core_params.int_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.num_IRF_entry * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.phy_Regs_IRF_assoc;
+ interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0;
+ interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports;
+ interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ IRF->output_data.area *= core_params.num_hthreads *
+ core_params.num_pipelines * cdb_overhead;
+ area.set_area(area.get_area() + IRF->local_result.area *
+ core_params.num_hthreads * core_params.num_pipelines *
+ cdb_overhead);
+
+ //**********************************FRF************************************
+ data = core_params.fp_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.num_FRF_entry * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.phy_Regs_FRF_assoc;
+ interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0;
+ interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports;
+ interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ FRF->output_data.area *= core_params.num_hthreads *
+ core_params.num_fp_pipelines * cdb_overhead;
+ area.set_area(area.get_area() + FRF->local_result.area *
+ core_params.num_hthreads * core_params.num_fp_pipelines *
+ cdb_overhead);
+ int_regfile_height = IRF->local_result.cache_ht *
+ core_params.num_hthreads * sqrt(cdb_overhead);
+ fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads *
+ sqrt(cdb_overhead);
+ //since a EXU is associated with each pipeline, the cdb should not have
+ //longer length.
+
+ if (core_params.regWindowing) {
+ //*********************************REG_WIN*****************************
+ //ECC, and usually 2 regs are transfered together during window
+ //shifting.Niagara Mega cell
+ data = core_params.int_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.register_window_size *
+ IRF->l_ip.cache_sz * core_params.num_hthreads;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.register_window_assoc;
+ interface_ip.nbanks = core_params.register_window_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.register_window_tag_width > 0;
+ interface_ip.tag_w = core_params.register_window_tag_width;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.register_window_rw_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
- FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
- area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(FRF.RF.local_result);
- int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
- fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
- //since a EXU is associated with each pipeline, the cdb should not have longer length.
- if (coredynp.regWindowing)
- {
- //*********************************REG_WIN************************************
- data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 4.0/clockRate;
- interface_ip.latency = 4.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
- RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- //output_data_csv(RFWIN.RF.local_result);
- }
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput =
+ core_params.register_window_throughput / clockRate;
+ interface_ip.latency =
+ core_params.register_window_latency / clockRate;
+ RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device,
+ clockRate, core_params.opt_local,
+ core_params.core_ty);
+ RFWIN->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + RFWIN->local_result.area *
+ core_params.num_pipelines);
+ }
+}
+EXECU::EXECU(XMLNode* _xml_data,
+ InputParameter* interface_ip_, double lsq_height_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL),
+ exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL),
+ int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL),
+ fpTagBypass(NULL), interface_ip(*interface_ip_),
+ lsq_height(lsq_height_), core_params(_core_params),
+ core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ double fu_height = 0.0;
+ clockRate = core_params.clockRate;
+ name = "Execution Unit";
+ rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats);
+ if (core_params.core_ty == OOO ||
+ (core_params.core_ty == Inorder && core_params.multithreaded)) {
+ scheu = new SchedulerU(xml_data, &interface_ip, core_params,
+ core_stats);
+ area.set_area(area.get_area() + scheu->area.get_area() );
+ }
+ exeu = new FunctionalUnit(xml_data, &interface_ip, core_params,
+ core_stats, ALU);
+ area.set_area(area.get_area() + exeu->area.get_area() +
+ rfu->area.get_area());
+ fu_height = exeu->FU_height;
+ if (core_params.num_fpus > 0) {
+ fp_u = new FunctionalUnit(xml_data, &interface_ip,
+ core_params, core_stats, FPU);
+ area.set_area(area.get_area() + fp_u->area.get_area());
+ }
+ if (core_params.num_muls > 0) {
+ mul = new FunctionalUnit(xml_data, &interface_ip,
+ core_params, core_stats, MUL);
+ area.set_area(area.get_area() + mul->area.get_area());
+ fu_height += mul->FU_height;
+ }
+ /*
+ * broadcast logic, including int-broadcast; int_tag-broadcast;
+ * fp-broadcast; fp_tag-broadcast
+ * integer by pass has two paths and fp has 3 paths.
+ * on the same bus there are multiple tri-state drivers and muxes that go
+ * to different components on the same bus
+ */
+ interface_ip.wt = core_params.execu_broadcast_wt;
+ interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type;
+ interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type;
+ interface_ip.throughput = core_params.broadcast_numerator / clockRate;
+ interface_ip.latency = core_params.broadcast_numerator / clockRate;
+ double scheu_Iw_height = 0.0;
+ double scheu_ROB_height = 0.0;
+ double scheu_fp_Iw_height = 0.0;
+ if (scheu) {
+ scheu_Iw_height = scheu->Iw_height;
+ scheu_ROB_height = scheu->ROB_height;
+ scheu_fp_Iw_height = scheu->fp_Iw_height;
+ }
- }
-
-EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- lsq_height(lsq_height_),
- coredynp(dyn_p_),
- rfu(0),
- scheu(0),
- fp_u(0),
- exeu(0),
- mul(0),
- int_bypass(0),
- intTagBypass(0),
- int_mul_bypass(0),
- intTag_mul_Bypass(0),
- fp_bypass(0),
- fpTagBypass(0),
- exist(exist_)
-{
- if (!exist) return;
- double fu_height = 0.0;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- rfu = new RegFU(XML, ithCore, &interface_ip,coredynp);
- scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
- exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU);
- area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
- fu_height = exeu->FU_height;
- if (coredynp.num_fpus >0)
- {
- fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU);
- area.set_area(area.get_area()+ fp_u->area.get_area());
- }
- if (coredynp.num_muls >0)
- {
- mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL);
- area.set_area(area.get_area()+ mul->area.get_area());
- fu_height += mul->FU_height;
- }
- /*
- * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
- * integer by pass has two paths and fp has 3 paths.
- * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
- */
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
- interface_ip.wire_os_mat_type = 2;
- interface_ip.throughput = 10.0/clockRate; //Do not care
- interface_ip.latency = 10.0/clockRate;
- }
-
- if (coredynp.core_ty==Inorder)
- {
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {//OOO
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- /* For physical register based OOO,
- * data broadcast interconnects cover across functional units, lsq, inst windows and register files,
- * while tag broadcast interconnects also cover across ROB
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {
- /*
- * In RS based processor both data and tag are broadcast together,
- * covering functional units, lsq, nst windows, register files, and ROBs
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
-
-
- }
- area.set_area(area.get_area()+ bypass.area.get_area());
-}
+ // Common bypass logic parameters
+ double base_w = core_params.execu_bypass_base_width;
+ double base_h = core_params.execu_bypass_base_height;
+ int level = core_params.execu_bypass_start_wiring_level;
+ double route_over_perc = core_params.execu_bypass_route_over_perc;
+ Wire_type wire_type = core_params.execu_bypass_wire_type;
+ int data_w;
+ double len;
+
+ if (core_params.core_ty == Inorder) {
+ data_w = int(ceil(data_path_width / 32.0)*32);
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate, false,
+ route_over_perc, core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate, false,
+ route_over_perc, core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(data_path_width / 32.0)*32*1.5);
+ len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
+ lsq_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
+ lsq_height + scheu_Iw_height;
+ intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
+
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(data_path_width / 32.0)*32*1.5);
+ len = rfu->fp_regfile_height + fp_u->FU_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_Iw_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ } else {//OOO
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ /* For physical register based OOO,
+ * data broadcast interconnects cover across functional units, lsq,
+ * inst windows and register files,
+ * while tag broadcast interconnects also cover across ROB
+ */
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ intTag_mul_Bypass = new Interconnect(xml_data,
+ "Mul Bypass Tag",
+ Core_device, base_w,
+ base_h, data_w, len,
+ &interface_ip, level,
+ clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
-RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- iFRAT(0),
- fFRAT(0),
- iRRAT(0),
- fRRAT(0),
- ifreeL(0),
- ffreeL(0),
- idcl(0),
- fdcl(0),
- RAHT(0),
- exist(exist_)
- {
- /*
- * Although renaming logic maybe be used in in-order processors,
- * McPAT assumes no renaming logic is used since the performance gain is very limited and
- * the only major inorder processor with renaming logic is Itainium
- * that is a VLIW processor and different from current McPAT's model.
- * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
- * i,f prefix mean int and fp
- * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
- * FRAT will be read twice and written once per instruction;
- * RRAT will be write once per instruction when committing and reads out all when context switch
- * checkpointing is implicit
- * Renaming logic is duplicated for each different hardware threads
- *
- * No Dual-RAT is needed in RS-based OOO processors,
- * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
- * to make sure all the renamings associated with the ROB to be released are updated at the same time.
- * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
- * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
- *
- * Both RAM and CAM have same DCL
- */
- if (!exist) return;
- int tag, data, out_w;
-// interface_ip.wire_is_mat_type = 0;
-// interface_ip.wire_os_mat_type = 0;
-// interface_ip.wt = Global_30;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if (coredynp.core_ty==OOO)
- {
- //integer pipeline
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions
- data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
-// data = int(ceil(coredynp.phy_ireg_width/8.0));
- out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
-// //RAHT According to Intel, combine GC with FRAT is very costly.
-// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry);
-// out_w = data;
-// interface_ip.is_cache = false;
-// interface_ip.pure_cam = false;
-// interface_ip.pure_ram = true;
-// interface_ip.line_sz = data;
-// interface_ip.cache_sz = data*coredynp.globalCheckpoint;
-// interface_ip.assoc = 1;
-// interface_ip.nbanks = 1;
-// interface_ip.out_w = out_w*8;
-// interface_ip.access_mode = 0;
-// interface_ip.throughput = 1.0/clockRate;
-// interface_ip.latency = 1.0/clockRate;
-// interface_ip.obj_func_dyn_energy = 0;
-// interface_ip.obj_func_dyn_power = 0;
-// interface_ip.obj_func_leak_power = 0;
-// interface_ip.obj_func_cycle_t = 1;
-// interface_ip.num_rw_ports = 1;//the extra one port is for GCs
-// interface_ip.num_rd_ports = 2*coredynp.decodeW;
-// interface_ip.num_wr_ports = coredynp.decodeW;
-// interface_ip.num_se_rd_ports = 0;
-// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT");
-// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
-// area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT floating point
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(core_params.fp_data_width));
+ len = rfu->fp_regfile_height + fp_u->FU_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_freg_width;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ } else {
+ /*
+ * In RS based processor both data and tag are broadcast together,
+ * covering functional units, lsq, nst windows, register files, and ROBs
+ */
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ intTag_mul_Bypass = new Interconnect(xml_data,
+ "Mul Bypass Tag",
+ Core_device, base_w,
+ base_h, data_w, len,
+ &interface_ip, level,
+ clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width;
- data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.decodeW;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT for FP
- tag = coredynp.arch_freg_width;
- data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(core_params.fp_data_width));
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_freg_width;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ }
+ }
+ if (int_bypass) {
+ children.push_back(int_bypass);
+ }
+ if (intTagBypass) {
+ children.push_back(intTagBypass);
+ }
+ if (int_mul_bypass) {
+ children.push_back(int_mul_bypass);
+ }
+ if (intTag_mul_Bypass) {
+ children.push_back(intTag_mul_Bypass);
+ }
+ if (fp_bypass) {
+ children.push_back(fp_bypass);
+ }
+ if (fpTagBypass) {
+ children.push_back(fpTagBypass);
+ }
- }
+ area.set_area(area.get_area() + int_bypass->area.get_area() +
+ intTagBypass->area.get_area());
+ if (core_params.num_muls > 0) {
+ area.set_area(area.get_area() + int_mul_bypass->area.get_area() +
+ intTag_mul_Bypass->area.get_area());
+ }
+ if (core_params.num_fpus > 0) {
+ area.set_area(area.get_area() + fp_bypass->area.get_area() +
+ fpTagBypass->area.get_area());
+ }
+}
- //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL),
+ fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL),
+ RAHT(NULL), interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int out_w;
+ int size;
+
+ // Assumption:
+ // We make an implicit design assumption based on the specific structure
+ // that is being modeled.
+ // 1. RAM-based RATs are direct mapped. However, if the associated
+ // scheduler is a reservation station style, the RATs are fully
+ // associative.
+ // 2. Non-CAM based RATs and free lists do not have tags.
+ // 3. Free lists are direct mapped.
+
+ const int RAM_BASED_RAT_ASSOC = 1;
+ const int RS_RAT_ASSOC = 0;
+ const int NON_CAM_BASED_TAG_WIDTH = 0;
+ const int FREELIST_ASSOC = 1;
+
+ clockRate = core_params.clockRate;
+ name = "Rename Unit";
+ if (core_params.core_ty == OOO) {
+ //integer pipeline
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ if (core_params.rm_ty == RAMbased) {
+ //FRAT with global checkpointing (GCs) please see paper tech
+ //report for detailed explaintions
+
+ data = int(ceil(core_params.phy_ireg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+
+ size = data * core_params.archi_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
- iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iRRAT->area.get_area());
-
- //RRAT for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT floating point
+ data = int(ceil(core_params.phy_freg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fRRAT->area.get_area());
-
- //Freelist of renaming unit always RAM based
- //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
- // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
- //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+
+ } else if ((core_params.rm_ty == CAMbased)) {
+ //IRAT
+ tag = core_params.arch_ireg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil((core_params.arch_ireg_width + 1 *
+ core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
- //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
-
- //freelist for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ffreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT for FP
+ tag = core_params.arch_freg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil((core_params.arch_freg_width + 1 *
+ core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ffreeL->area.get_area());
-
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
-
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased){
- /*
- * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
- * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time.
- * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than
- * CAM based RAT so that it is more scalable as number of ROB/physical regs increases.
- */
- tag = coredynp.phy_ireg_width;
- data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= coredynp.commitW;//TODO
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->local_result.adjust_area();
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FP
- tag = coredynp.phy_freg_width;
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->local_result.adjust_area();
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width;
- data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO
- interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT
- tag = coredynp.arch_freg_width;
- data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+ }
- }
- //No RRAT for RS based OOO
- //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ //RRAT is always RAM based, does not have GCs, and is used only for
+ //record latest non-speculative mapping
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_IRF_size *
+ NUM_SOURCE_OPERANDS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.retire_rat_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.commitW;
+ interface_ip.num_wr_ports = core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ iRRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iRRAT->area.get_area());
+
+ //RRAT for FP
+ data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size *
+ NUM_SOURCE_OPERANDS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.retire_rat_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ fRRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fRRAT->area.get_area());
+
+ //Freelist of renaming unit always RAM based
+ //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
+ // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
+ //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ifreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports =
+ core_params.decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ifreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ifreeL->area.get_area());
+
+ //freelist for FP
+ data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ffreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports =
+ core_params.fp_decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ffreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ffreeL->area.get_area());
+
+ } else if (core_params.scheu_ty == ReservationStation) {
+ if (core_params.rm_ty == RAMbased) {
+ tag = core_params.phy_ireg_width;
+ data = int(ceil(core_params.phy_ireg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RS_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.commitW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->local_result.adjust_area();
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FP
+ tag = core_params.phy_freg_width;
+ data = int(ceil(core_params.phy_freg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RS_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.fp_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->local_result.adjust_area();
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+
+ } else if ((core_params.rm_ty == CAMbased)) {
+ //FRAT
+ //the address of CAM needed to be sent out
+ tag = core_params.arch_ireg_width;
+ data = int(ceil (core_params.arch_ireg_width +
+ 1 * core_params.globalCheckpoint /
+ BITS_PER_BYTE));
+ out_w = int(ceil (core_params.arch_ireg_width /
+ BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT
+ tag = core_params.arch_freg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil(core_params.arch_freg_width +
+ 1 * core_params.globalCheckpoint /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
+ }
+ //No RRAT for RS based OOO
+ //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ifreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports =
+ core_params.decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ifreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ifreeL->area.get_area());
}
-}
- if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
- {
- /* Dependency check logic will only present when decode(issue) width>1.
- * Multiple issue in order processor can do without renaming, but dcl is a must.
- */
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
}
+ idcl =
+ new dep_resource_conflict_check(xml_data,
+ "Instruction Dependency Check?",
+ &interface_ip, core_params,
+ core_params.phy_ireg_width,
+ clockRate);
+ fdcl =
+ new dep_resource_conflict_check(xml_data,
+ "FP Dependency Check?", &interface_ip,
+ core_params,
+ core_params.phy_freg_width, clockRate);
}
-Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- ifu (0),
- lsu (0),
- mmu (0),
- exu (0),
- rnu (0),
- corepipe (0),
- undiffCore (0),
- l2cache (0)
-{
- /*
- * initialize, compute and optimize individual components.
- */
-
- double pipeline_area_per_unit;
- if (XML->sys.Private_L2)
- {
- l2cache = new SharedCache(XML,ithCore, &interface_ip);
-
- }
-// interface_ip.wire_is_mat_type = 2;
-// interface_ip.wire_os_mat_type = 2;
-// interface_ip.wt =Global_30;
- set_core_param();
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp);
- lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp);
- mmu = new MemManU (XML, ithCore, &interface_ip,coredynp);
- exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp);
- undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp);
- if (coredynp.core_ty==OOO)
- {
- rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
- }
- corepipe = new Pipeline(&interface_ip,coredynp);
-
- if (coredynp.core_ty==OOO)
- {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
- if (rnu->exist)
- {
- rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
- }
- }
- else {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
- }
-
- //area.set_area(area.get_area()+ corepipe->area.get_area());
- if (ifu->exist)
- {
- ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + ifu->area.get_area());
- }
- if (lsu->exist)
- {
- lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + lsu->area.get_area());
- }
- if (exu->exist)
- {
- exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+exu->area.get_area());
- }
- if (mmu->exist)
- {
- mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+mmu->area.get_area());
- }
-
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
-
- area.set_area(area.get_area() + rnu->area.get_area());
- }
- }
-
- if (undiffCore->exist)
- {
- area.set_area(area.get_area() + undiffCore->area.get_area());
- }
-
- if (XML->sys.Private_L2)
- {
- area.set_area(area.get_area() + l2cache->area.get_area());
-
- }
-// //clock power
-// clockNetwork.init_wire_external(is_default, &interface_ip);
-// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
-// clockNetwork.end_wiring_level =5;//toplevel metal
-// clockNetwork.start_wiring_level =5;//toplevel metal
-// clockNetwork.num_regs = corepipe.tot_stage_vector;
-// clockNetwork.optimize_wire();
-}
+Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_)
+ : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL),
+ exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL),
+ ithCore(_ithCore), interface_ip(*interface_ip_) {
+
+ ostringstream os;
+ os << ithCore;
+ name = "Core " + os.str();
+
+ int i = 0;
+ XMLNode* childXML;
+ for (i = 0; i < xml_data->nChildNode("component"); i++) {
+ childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR comp_name = childXML->getAttribute("id");
+ if (!comp_name)
+ continue;
+
+ STRCMP(comp_name, "system.L20") {
+ l2cache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(l2cache);
+ }
+ }
+ }
+ set_core_param();
+ clockRate = core_params.clockRate;
+
+ ifu = new InstFetchU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(ifu);
+ lsu = new LoadStoreU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(lsu);
+ mmu = new MemManU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(mmu);
+ exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height,
+ core_params, core_stats);
+ children.push_back(exu);
+ undiffCore = new UndiffCore(xml_data, &interface_ip, core_params);
+ children.push_back(undiffCore);
+ if (core_params.core_ty == OOO) {
+ rnu = new RENAMINGU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(rnu);
+ }
+ corepipe = new Pipeline(xml_data, &interface_ip, core_params);
+ children.push_back(corepipe);
+
+ double pipeline_area_per_unit;
+ if (core_params.core_ty == OOO) {
+ pipeline_area_per_unit = (corepipe->area.get_area() *
+ core_params.num_pipelines) / 5.0;
+ if (rnu->exist) {
+ rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
+ }
+ } else {
+ pipeline_area_per_unit = (corepipe->area.get_area() *
+ core_params.num_pipelines) / 4.0;
+ }
-void BranchPredictor::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double r_access;
- double w_access;
- if (is_tdp)
- {
- r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
- w_access = 0*coredynp.BR_duty_cycle;
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->tdp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->tdp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->tdp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->tdp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = r_access;
- RAS->stats_t.writeAc.access = w_access;
- RAS->tdp_stats = RAS->stats_t;
- }
- else
- {
- //The resolution of BPT accesses is coarse, but this is
- //because most simulators cannot track finer grained details
- r_access = XML->sys.core[ithCore].branch_instructions;
- w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->rtp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->rtp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->rtp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->rtp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls;
- RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
- RAS->rtp_stats = RAS->stats_t;
- }
-
- globalBPT->power_t.reset();
- L1_localBPT->power_t.reset();
- L2_localBPT->power_t.reset();
- chooser->power_t.reset();
- RAS->power_t.reset();
-
- globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
- globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
- L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
- L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;
-
- L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
- L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;
-
- chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
- chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
- RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
- RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
-
- power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power;
- }
- else
- {
- globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
- rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
+ // Move all of this to computeArea
+ //area.set_area(area.get_area()+ corepipe->area.get_area());
+ if (ifu->exist) {
+ ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + ifu->area.get_area());
+ }
+ if (lsu->exist) {
+ lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + lsu->area.get_area());
+ }
+ if (exu->exist) {
+ exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + exu->area.get_area());
+ }
+ if (mmu->exist) {
+ mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + mmu->area.get_area());
}
-}
-void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- if (is_tdp)
- {
- cout << indent_str<< "Global Predictor:" << endl;
- cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "Local Predictor:" << endl;
- cout << indent_str << "L1_Local Predictor:" << endl;
- cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "L2_Local Predictor:" << endl;
- cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- cout << indent_str << "Chooser:" << endl;
- cout << indent_str_next << "Area = " << chooser->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "RAS:" << endl;
- cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
-// cout << indent_str_next << "Global Predictor Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Global Predictor Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Global Predictor Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Local Predictor Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Local Predictor Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Local Predictor Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Chooser Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Chooser Subthreshold Leakage = " << chooser->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "RAS Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "RAS Subthreshold Leakage = " << RAS->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" << endl;
- }
+ if (core_params.core_ty == OOO) {
+ if (rnu->exist) {
-}
-
-void InstFetchU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
- icache.caches->stats_t.readAc.miss = 0;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->tdp_stats = icache.caches->stats_t;
-
- icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports;
- icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports;
- icache.missb->tdp_stats = icache.missb->stats_t;
-
- icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.ifb->tdp_stats = icache.ifb->stats_t;
-
- icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports;
- icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;
-
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
- IB->tdp_stats = IB->stats_t;
-
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
- BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
+ area.set_area(area.get_area() + rnu->area.get_area());
}
+ }
- ID_inst->stats_t.readAc.access = coredynp.decodeW;
- ID_operand->stats_t.readAc.access = coredynp.decodeW;
- ID_misc->stats_t.readAc.access = coredynp.decodeW;
- ID_inst->tdp_stats = ID_inst->stats_t;
- ID_operand->tdp_stats = ID_operand->stats_t;
- ID_misc->tdp_stats = ID_misc->stats_t;
-
+ if (undiffCore->exist) {
+ area.set_area(area.get_area() + undiffCore->area.get_area());
+ }
+ if (l2cache) {
+ area.set_area(area.get_area() + l2cache->area.get_area());
}
- else
- {
- //init stats for Runtime Dynamic (RTP)
- icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses;
- icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->rtp_stats = icache.caches->stats_t;
+}
- icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->rtp_stats = icache.missb->stats_t;
- icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->rtp_stats = icache.ifb->stats_t;
+void BranchPredictor::computeEnergy() {
+ if (!exist) return;
+
+ // ASSUMPTION: All instructions access the branch predictors at Fetch and
+ // only branch instrucions update the predictors regardless
+ // of the correctness of the prediction.
+ double tdp_read_accesses =
+ core_params.predictionW * core_stats.BR_duty_cycle;
+ globalBPT->tdp_stats.reset();
+ globalBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ globalBPT->tdp_stats.writeAc.access = 0;
+ globalBPT->rtp_stats.reset();
+ globalBPT->rtp_stats.readAc.access = core_stats.total_instructions;
+ globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ globalBPT->power_t.reset();
+ globalBPT->power_t.readOp.dynamic +=
+ globalBPT->local_result.power.readOp.dynamic *
+ globalBPT->tdp_stats.readAc.access +
+ globalBPT->local_result.power.writeOp.dynamic *
+ globalBPT->tdp_stats.writeAc.access;
+ globalBPT->power_t = globalBPT->power_t +
+ globalBPT->local_result.power * pppm_lkg;
+ globalBPT->rt_power.reset();
+ globalBPT->rt_power.readOp.dynamic +=
+ globalBPT->local_result.power.readOp.dynamic *
+ globalBPT->rtp_stats.readAc.access +
+ globalBPT->local_result.power.writeOp.dynamic *
+ globalBPT->rtp_stats.writeAc.access;
+
+ L1_localBPT->tdp_stats.reset();
+ L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ L1_localBPT->tdp_stats.writeAc.access = 0;
+ L1_localBPT->rtp_stats.reset();
+ L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions;
+ L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ L1_localBPT->power_t.reset();
+ L1_localBPT->power_t.readOp.dynamic +=
+ L1_localBPT->local_result.power.readOp.dynamic *
+ L1_localBPT->tdp_stats.readAc.access +
+ L1_localBPT->local_result.power.writeOp.dynamic *
+ L1_localBPT->tdp_stats.writeAc.access;
+ L1_localBPT->power_t = L1_localBPT->power_t +
+ L1_localBPT->local_result.power * pppm_lkg;
+ L1_localBPT->rt_power.reset();
+ L1_localBPT->rt_power.readOp.dynamic +=
+ L1_localBPT->local_result.power.readOp.dynamic *
+ L1_localBPT->rtp_stats.readAc.access +
+ L1_localBPT->local_result.power.writeOp.dynamic *
+ L1_localBPT->rtp_stats.writeAc.access;
+
+ L2_localBPT->tdp_stats.reset();
+ L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ L2_localBPT->tdp_stats.writeAc.access = 0;
+ L2_localBPT->rtp_stats.reset();
+ L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions;
+ L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ L2_localBPT->power_t.reset();
+ L2_localBPT->power_t.readOp.dynamic +=
+ L2_localBPT->local_result.power.readOp.dynamic *
+ L2_localBPT->tdp_stats.readAc.access +
+ L2_localBPT->local_result.power.writeOp.dynamic *
+ L2_localBPT->tdp_stats.writeAc.access;
+ L2_localBPT->power_t = L2_localBPT->power_t +
+ L2_localBPT->local_result.power * pppm_lkg;
+ L2_localBPT->rt_power.reset();
+ L2_localBPT->rt_power.readOp.dynamic +=
+ L2_localBPT->local_result.power.readOp.dynamic *
+ L2_localBPT->rtp_stats.readAc.access +
+ L2_localBPT->local_result.power.writeOp.dynamic *
+ L2_localBPT->rtp_stats.writeAc.access;
+
+ chooser->tdp_stats.reset();
+ chooser->tdp_stats.readAc.access = tdp_read_accesses;
+ chooser->tdp_stats.writeAc.access = 0;
+ chooser->rtp_stats.reset();
+ chooser->rtp_stats.readAc.access = core_stats.total_instructions;
+ chooser->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ chooser->power_t.reset();
+ chooser->power_t.readOp.dynamic +=
+ chooser->local_result.power.readOp.dynamic *
+ chooser->tdp_stats.readAc.access +
+ chooser->local_result.power.writeOp.dynamic *
+ chooser->tdp_stats.writeAc.access;
+ chooser->power_t =
+ chooser->power_t + chooser->local_result.power * pppm_lkg;
+ chooser->rt_power.reset();
+ chooser->rt_power.readOp.dynamic +=
+ chooser->local_result.power.readOp.dynamic *
+ chooser->rtp_stats.readAc.access +
+ chooser->local_result.power.writeOp.dynamic *
+ chooser->rtp_stats.writeAc.access;
+
+ RAS->tdp_stats.reset();
+ RAS->tdp_stats.readAc.access = tdp_read_accesses;
+ RAS->tdp_stats.writeAc.access = 0;
+ RAS->rtp_stats.reset();
+ RAS->rtp_stats.readAc.access = core_stats.function_calls;
+ RAS->rtp_stats.writeAc.access = core_stats.function_calls;
+ RAS->power_t.reset();
+ RAS->power_t.readOp.dynamic +=
+ RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access +
+ RAS->local_result.power.writeOp.dynamic *
+ RAS->tdp_stats.writeAc.access;
+ RAS->power_t = RAS->power_t + RAS->local_result.power *
+ core_params.pppm_lkg_multhread;
+ RAS->rt_power.reset();
+ RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic *
+ RAS->rtp_stats.readAc.access +
+ RAS->local_result.power.writeOp.dynamic *
+ RAS->rtp_stats.writeAc.access;
+
+ output_data.reset();
+ if (globalBPT) {
+ globalBPT->output_data.peak_dynamic_power =
+ globalBPT->power_t.readOp.dynamic * clockRate;
+ globalBPT->output_data.runtime_dynamic_energy =
+ globalBPT->rt_power.readOp.dynamic;
+ output_data += globalBPT->output_data;
+ }
+ if (L1_localBPT) {
+ L1_localBPT->output_data.peak_dynamic_power =
+ L1_localBPT->power_t.readOp.dynamic * clockRate;
+ L1_localBPT->output_data.runtime_dynamic_energy =
+ L1_localBPT->rt_power.readOp.dynamic;
+ output_data += L1_localBPT->output_data;
+ }
+ if (L2_localBPT) {
+ L2_localBPT->output_data.peak_dynamic_power =
+ L2_localBPT->power_t.readOp.dynamic * clockRate;
+ L2_localBPT->output_data.runtime_dynamic_energy =
+ L2_localBPT->rt_power.readOp.dynamic;
+ output_data += L2_localBPT->output_data;
+ }
+ if (chooser) {
+ chooser->output_data.peak_dynamic_power =
+ chooser->power_t.readOp.dynamic * clockRate;
+ chooser->output_data.runtime_dynamic_energy =
+ chooser->rt_power.readOp.dynamic;
+ output_data += chooser->output_data;
+ }
+ if (RAS) {
+ RAS->output_data.peak_dynamic_power =
+ RAS->power_t.readOp.dynamic * clockRate;
+ RAS->output_data.subthreshold_leakage_power =
+ RAS->power_t.readOp.leakage * core_params.num_hthreads;
+ RAS->output_data.gate_leakage_power =
+ RAS->power_t.readOp.gate_leakage * core_params.num_hthreads;
+ RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic;
+ output_data += RAS->output_data;
+ }
+}
- icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;
+void BranchPredictor::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
- IB->rtp_stats = IB->stats_t;
+ McPATComponent::displayData(indent, plevel);
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
- BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
- BTB->rtp_stats = BTB->stats_t;
- }
+ globalBPT->displayData(indent + 4, plevel);
+ L1_localBPT->displayData(indent + 4, plevel);
+ L2_localBPT->displayData(indent + 4, plevel);
+ chooser->displayData(indent + 4, plevel);
+ RAS->displayData(indent + 4, plevel);
+}
- ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_inst->rtp_stats = ID_inst->stats_t;
- ID_operand->rtp_stats = ID_operand->stats_t;
- ID_misc->rtp_stats = ID_misc->stats_t;
+void InstFetchU::computeEnergy() {
+ if (!exist) return;
+ if (BPT) {
+ BPT->computeEnergy();
}
- icache.power_t.reset();
+ IB->tdp_stats.reset();
+ IB->tdp_stats.readAc.access = core_params.peak_issueW;
+ IB->tdp_stats.writeAc.access = core_params.peak_issueW;
+ IB->rtp_stats.reset();
+ IB->rtp_stats.readAc.access = core_stats.total_instructions;
+ IB->rtp_stats.writeAc.access = core_stats.total_instructions;
IB->power_t.reset();
-// ID_inst->power_t.reset();
-// ID_operand->power_t.reset();
-// ID_misc->power_t.reset();
- if (coredynp.predictionW>0)
- {
+ IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic *
+ IB->tdp_stats.readAc.access +
+ IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access;
+ IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg;
+ IB->rt_power.reset();
+ IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic *
+ IB->rtp_stats.readAc.access +
+ IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access;
+
+ if (core_params.predictionW > 0) {
+ BTB->tdp_stats.reset();
+ BTB->tdp_stats.readAc.access = core_params.predictionW;
+ BTB->tdp_stats.writeAc.access = 0;
+ BTB->rtp_stats.reset();
+ BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses;
+ BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses;
BTB->power_t.reset();
+ BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic *
+ BTB->tdp_stats.readAc.access +
+ BTB->local_result.power.writeOp.dynamic *
+ BTB->tdp_stats.writeAc.access;
+ BTB->rt_power.reset();
+ BTB->rt_power.readOp.dynamic +=
+ BTB->local_result.power.readOp.dynamic *
+ BTB->rtp_stats.readAc.access +
+ BTB->local_result.power.writeOp.dynamic *
+ BTB->rtp_stats.writeAc.access;
}
- icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
- //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
- icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
- icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
- icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
- icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
- icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
+ ID_inst->tdp_stats.reset();
+ ID_inst->tdp_stats.readAc.access = core_params.decodeW;
+ ID_inst->power_t.reset();
+ ID_inst->power_t = ID_misc->power;
+ ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic *
+ ID_inst->tdp_stats.readAc.access;
+ ID_inst->rtp_stats.reset();
+ ID_inst->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_inst->rt_power.reset();
+ ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic *
+ ID_inst->rtp_stats.readAc.access;
+
+ ID_operand->tdp_stats.reset();
+ ID_operand->tdp_stats.readAc.access = core_params.decodeW;
+ ID_operand->power_t.reset();
+ ID_operand->power_t = ID_misc->power;
+ ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic *
+ ID_operand->tdp_stats.readAc.access;
+ ID_operand->rtp_stats.reset();
+ ID_operand->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_operand->rt_power.reset();
+ ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic *
+ ID_operand->rtp_stats.readAc.access;
+
+ ID_misc->tdp_stats.reset();
+ ID_misc->tdp_stats.readAc.access = core_params.decodeW;
+ ID_misc->power_t.reset();
+ ID_misc->power_t = ID_misc->power;
+ ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic *
+ ID_misc->tdp_stats.readAc.access;
+ ID_misc->rtp_stats.reset();
+ ID_misc->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_misc->rt_power.reset();
+ ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic *
+ ID_misc->rtp_stats.readAc.access;
+
+ power.reset();
+ rt_power.reset();
+ McPATComponent::computeEnergy();
+
+ output_data.reset();
+ if (icache) {
+ output_data += icache->output_data;
+ }
+ if (IB) {
+ IB->output_data.peak_dynamic_power =
+ IB->power_t.readOp.dynamic * clockRate;
+ IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic;
+ output_data += IB->output_data;
+ }
+ if (BTB) {
+ BTB->output_data.peak_dynamic_power =
+ BTB->power_t.readOp.dynamic * clockRate;
+ BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic;
+ output_data += BTB->output_data;
+ }
+ if (BPT) {
+ output_data += BPT->output_data;
+ }
+ if (ID_inst) {
+ ID_inst->output_data.peak_dynamic_power =
+ ID_inst->power_t.readOp.dynamic * clockRate;
+ ID_inst->output_data.runtime_dynamic_energy =
+ ID_inst->rt_power.readOp.dynamic;
+ output_data += ID_inst->output_data;
+ }
+ if (ID_operand) {
+ ID_operand->output_data.peak_dynamic_power =
+ ID_operand->power_t.readOp.dynamic * clockRate;
+ ID_operand->output_data.runtime_dynamic_energy =
+ ID_operand->rt_power.readOp.dynamic;
+ output_data += ID_operand->output_data;
+ }
+ if (ID_misc) {
+ ID_misc->output_data.peak_dynamic_power =
+ ID_misc->power_t.readOp.dynamic * clockRate;
+ ID_misc->output_data.runtime_dynamic_energy =
+ ID_misc->rt_power.readOp.dynamic;
+ output_data += ID_misc->output_data;
+ }
+}
- IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
- IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
+void InstFetchU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- if (coredynp.predictionW>0)
- {
- BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
- BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;
+ McPATComponent::displayData(indent, plevel);
- BPT->computeEnergy(is_tdp);
+ if (core_params.predictionW > 0) {
+ BTB->displayData(indent + 4, plevel);
+ if (BPT->exist) {
+ BPT->displayData(indent + 4, plevel);
}
+ }
+ IB->displayData(indent + 4, plevel);
+ ID_inst->displayData(indent + 4, plevel);
+ ID_operand->displayData(indent + 4, plevel);
+ ID_misc->displayData(indent + 4, plevel);
+}
- if (is_tdp)
- {
-// icache.power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
- icache.power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
- power = power + icache.power + IB->power;
- if (coredynp.predictionW>0)
- {
- BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- power = power + BTB->power + BPT->power;
- }
+void RENAMINGU::computeEnergy() {
+ if (!exist) return;
+
+ idcl->tdp_stats.reset();
+ idcl->rtp_stats.reset();
+ idcl->power_t.reset();
+ idcl->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ idcl->tdp_stats.readAc.access = core_params.decodeW;
+ idcl->rtp_stats.readAc.access = 3 * core_params.decodeW *
+ core_params.decodeW * core_stats.rename_reads;
+ } else if (core_params.issueW > 1) {
+ idcl->tdp_stats.readAc.access = core_params.decodeW;
+ idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions;
+ }
+ idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access *
+ idcl->power.readOp.dynamic;
+ idcl->power_t.readOp.leakage = idcl->power.readOp.leakage *
+ core_params.num_hthreads;
+ idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage *
+ core_params.num_hthreads;
+ idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access *
+ idcl->power.readOp.dynamic;
+
+ fdcl->tdp_stats.reset();
+ fdcl->rtp_stats.reset();
+ fdcl->power_t.reset();
+ fdcl->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ fdcl->tdp_stats.readAc.access = core_params.decodeW;
+ fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW *
+ core_params.fp_issueW * core_stats.fp_rename_writes;
+ } else if (core_params.issueW > 1) {
+ fdcl->tdp_stats.readAc.access = core_params.decodeW;
+ fdcl->rtp_stats.readAc.access = core_stats.fp_instructions;
+ }
+ fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access *
+ fdcl->power.readOp.dynamic;
+ fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage *
+ core_params.num_hthreads;
+ fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage *
+ core_params.num_hthreads;
+ fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access *
+ fdcl->power.readOp.dynamic;
+
+ if (iRRAT) {
+ iRRAT->tdp_stats.reset();
+ iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports;
+ iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports;
+ iRRAT->rtp_stats.reset();
+ iRRAT->rtp_stats.readAc.access = core_stats.rename_writes;
+ iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
+ iRRAT->power_t.reset();
+ iRRAT->power_t.readOp.dynamic +=
+ iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic +
+ iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
+ iRRAT->rt_power.reset();
+ iRRAT->rt_power.readOp.dynamic +=
+ iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic +
+ iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
+ iRRAT->power_t.readOp.leakage =
+ iRRAT->power.readOp.leakage * core_params.num_hthreads;
+ iRRAT->power_t.readOp.gate_leakage =
+ iRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic;
- ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
- ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic;
-
- ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access;
- ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
- ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access;
-
- power = power + (ID_inst->power +
- ID_operand->power +
- ID_misc->power);
- }
- else
- {
-// icache.rt_power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
-
- icache.rt_power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
- rt_power = rt_power + icache.rt_power + IB->rt_power;
- if (coredynp.predictionW>0)
- {
- BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- rt_power = rt_power + BTB->rt_power + BPT->rt_power;
- }
+ if (ifreeL) {
+ ifreeL->tdp_stats.reset();
+ ifreeL->tdp_stats.readAc.access = core_params.decodeW;
+ ifreeL->tdp_stats.writeAc.access = core_params.decodeW;
+ ifreeL->rtp_stats.reset();
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ ifreeL->rtp_stats.readAc.access = core_stats.rename_reads;
+ ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes;
+ } else if (core_params.scheu_ty == ReservationStation) {
+ ifreeL->rtp_stats.readAc.access =
+ core_stats.rename_reads + core_stats.fp_rename_reads;
+ ifreeL->rtp_stats.writeAc.access =
+ 2 * (core_stats.rename_writes + core_stats.fp_rename_writes);
+ }
+ ifreeL->power_t.reset();
+ ifreeL->power_t.readOp.dynamic +=
+ ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic +
+ ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
+ ifreeL->rt_power.reset();
+ ifreeL->rt_power.readOp.dynamic +=
+ ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic +
+ ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
+ ifreeL->power_t.readOp.leakage =
+ ifreeL->power.readOp.leakage * core_params.num_hthreads;
+ ifreeL->power_t.readOp.gate_leakage =
+ ifreeL->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
- ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
- ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;
+ if (fRRAT) {
+ fRRAT->tdp_stats.reset();
+ fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports;
+ fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports;
+ fRRAT->rtp_stats.reset();
+ fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes;
+ fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
+ fRRAT->power_t.reset();
+ fRRAT->power_t.readOp.dynamic +=
+ fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic +
+ fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
+ fRRAT->rt_power.reset();
+ fRRAT->rt_power.readOp.dynamic +=
+ fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic +
+ fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
+ fRRAT->power_t.readOp.leakage =
+ fRRAT->power.readOp.leakage * core_params.num_hthreads;
+ fRRAT->power_t.readOp.gate_leakage =
+ fRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- rt_power = rt_power + (ID_inst->rt_power +
- ID_operand->rt_power +
- ID_misc->rt_power);
+ if (ffreeL) {
+ ffreeL->tdp_stats.reset();
+ ffreeL->tdp_stats.readAc.access = core_params.decodeW;
+ ffreeL->tdp_stats.writeAc.access = core_params.decodeW;
+ ffreeL->rtp_stats.reset();
+ ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads;
+ ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes;
+ ffreeL->power_t.reset();
+ ffreeL->power_t.readOp.dynamic +=
+ ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic +
+ ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
+ ffreeL->rt_power.reset();
+ ffreeL->rt_power.readOp.dynamic +=
+ ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic +
+ ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
+ ffreeL->power_t.readOp.leakage =
+ ffreeL->power.readOp.leakage * core_params.num_hthreads;
+ ffreeL->power_t.readOp.gate_leakage =
+ ffreeL->power.readOp.gate_leakage * core_params.num_hthreads;
}
-}
-void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
-
- cout << indent_str<< "Instruction Cache:" << endl;
- cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.predictionW>0)
- {
- cout << indent_str<< "Branch Target Buffer:" << endl;
- cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (BPT->exist)
- {
- cout << indent_str<< "Branch Predictor:" << endl;
- cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3)
- {
- BPT->displayEnergy(indent+4, plevel, is_tdp);
- }
- }
- }
- cout << indent_str<< "Instruction Buffer:" << endl;
- cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Instruction Decoder:" << endl;
- cout << indent_str_next << "Area = " << (ID_inst->area.get_area() +
- ID_operand->area.get_area() +
- ID_misc->area.get_area())*coredynp.decodeW*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic +
- ID_operand->power.readOp.dynamic +
- ID_misc->power.readOp.dynamic)*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? (ID_inst->power.readOp.longer_channel_leakage +
- ID_operand->power.readOp.longer_channel_leakage +
- ID_misc->power.readOp.longer_channel_leakage):
- (ID_inst->power.readOp.leakage +
- ID_operand->power.readOp.leakage +
- ID_misc->power.readOp.leakage)) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage +
- ID_operand->power.readOp.gate_leakage +
- ID_misc->power.readOp.gate_leakage) << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic +
- ID_operand->rt_power.readOp.dynamic +
- ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
-// cout << indent_str_next << "Instruction Cache Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Cache Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Subthreshold Leakage = " << IB->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Gate Leakage = " << IB->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Subthreshold Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Gate Leakage = " << BTB->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Subthreshold Leakage = " << BPT->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Gate Leakage = " << BPT->rt_power.readOp.gate_leakage << " W" << endl;
+ if (iFRAT) {
+ tdp_stats.reset();
+ if (core_params.rm_ty == RAMbased) {
+ iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports;
+ iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
+ iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports;
+ } else if ((core_params.rm_ty == CAMbased)) {
+ iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports;
+ iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
+ }
+ rtp_stats.reset();
+ iFRAT->rtp_stats.readAc.access = core_stats.rename_reads;
+ iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
+ if (core_params.scheu_ty == ReservationStation &&
+ core_params.rm_ty == RAMbased) {
+ iFRAT->rtp_stats.searchAc.access =
+ core_stats.committed_int_instructions;
+ }
+ iFRAT->power_t.reset();
+ iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access
+ * (iFRAT->local_result.power.readOp.dynamic
+ + idcl->power.readOp.dynamic)
+ + iFRAT->tdp_stats.writeAc.access
+ * iFRAT->local_result.power.writeOp.dynamic
+ + iFRAT->tdp_stats.searchAc.access
+ * iFRAT->local_result.power.searchOp.dynamic;
+ iFRAT->power_t.readOp.leakage =
+ iFRAT->power.readOp.leakage * core_params.num_hthreads;
+ iFRAT->power_t.readOp.gate_leakage =
+ iFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ iFRAT->rt_power.reset();
+ iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access
+ * (iFRAT->local_result.power.readOp.dynamic
+ + idcl->power.readOp.dynamic)
+ + iFRAT->rtp_stats.writeAc.access
+ * iFRAT->local_result.power.writeOp.dynamic
+ + iFRAT->rtp_stats.searchAc.access
+ * iFRAT->local_result.power.searchOp.dynamic;
+ }
+
+ if (fFRAT) {
+ tdp_stats.reset();
+ fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports;
+ if ((core_params.rm_ty == CAMbased)) {
+ fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports;
+ } else if (core_params.rm_ty == RAMbased) {
+ fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports;
+ if (core_params.scheu_ty == ReservationStation) {
+ fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports;
+ }
}
+ rtp_stats.reset();
+ fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads;
+ fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
+ if (core_params.scheu_ty == ReservationStation &&
+ core_params.rm_ty == RAMbased) {
+ fFRAT->rtp_stats.searchAc.access =
+ core_stats.committed_fp_instructions;
+ }
+ fFRAT->power_t.reset();
+ fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access
+ * (fFRAT->local_result.power.readOp.dynamic
+ + fdcl->power.readOp.dynamic)
+ + fFRAT->tdp_stats.writeAc.access
+ * fFRAT->local_result.power.writeOp.dynamic
+ + fFRAT->tdp_stats.searchAc.access
+ * fFRAT->local_result.power.searchOp.dynamic;
+ fFRAT->power_t.readOp.leakage =
+ fFRAT->power.readOp.leakage * core_params.num_hthreads;
+ fFRAT->power_t.readOp.gate_leakage =
+ fFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ fFRAT->rt_power.reset();
+ fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access
+ * (fFRAT->local_result.power.readOp.dynamic
+ + fdcl->power.readOp.dynamic)
+ + fFRAT->rtp_stats.writeAc.access
+ * fFRAT->local_result.power.writeOp.dynamic
+ + fFRAT->rtp_stats.searchAc.access
+ * fFRAT->local_result.power.searchOp.dynamic;
+ }
+ output_data.reset();
+ if (iFRAT) {
+ iFRAT->output_data.peak_dynamic_power =
+ iFRAT->power_t.readOp.dynamic * clockRate;
+ iFRAT->output_data.subthreshold_leakage_power =
+ iFRAT->power_t.readOp.leakage;
+ iFRAT->output_data.gate_leakage_power =
+ iFRAT->power_t.readOp.gate_leakage;
+ iFRAT->output_data.runtime_dynamic_energy =
+ iFRAT->rt_power.readOp.dynamic;
+ output_data += iFRAT->output_data;
+ }
+ if (fFRAT) {
+ fFRAT->output_data.peak_dynamic_power =
+ fFRAT->power_t.readOp.dynamic * clockRate;
+ fFRAT->output_data.subthreshold_leakage_power =
+ fFRAT->power_t.readOp.leakage;
+ fFRAT->output_data.gate_leakage_power =
+ fFRAT->power_t.readOp.gate_leakage;
+ fFRAT->output_data.runtime_dynamic_energy =
+ fFRAT->rt_power.readOp.dynamic;
+ output_data += fFRAT->output_data;
+ }
+ if (iRRAT) {
+ iRRAT->output_data.peak_dynamic_power =
+ iRRAT->power_t.readOp.dynamic * clockRate;
+ iRRAT->output_data.subthreshold_leakage_power =
+ iRRAT->power_t.readOp.leakage;
+ iRRAT->output_data.gate_leakage_power =
+ iRRAT->power_t.readOp.gate_leakage;
+ iRRAT->output_data.runtime_dynamic_energy =
+ iRRAT->rt_power.readOp.dynamic;
+ output_data += iRRAT->output_data;
+ }
+ if (fRRAT) {
+ fRRAT->output_data.peak_dynamic_power =
+ fRRAT->power_t.readOp.dynamic * clockRate;
+ fRRAT->output_data.subthreshold_leakage_power =
+ fRRAT->power_t.readOp.leakage;
+ fRRAT->output_data.gate_leakage_power =
+ fRRAT->power_t.readOp.gate_leakage;
+ fRRAT->output_data.runtime_dynamic_energy =
+ fRRAT->rt_power.readOp.dynamic;
+ output_data += fRRAT->output_data;
+ }
+ if (ifreeL) {
+ ifreeL->output_data.peak_dynamic_power =
+ ifreeL->power_t.readOp.dynamic * clockRate;
+ ifreeL->output_data.subthreshold_leakage_power =
+ ifreeL->power_t.readOp.leakage;
+ ifreeL->output_data.gate_leakage_power =
+ ifreeL->power_t.readOp.gate_leakage;
+ ifreeL->output_data.runtime_dynamic_energy =
+ ifreeL->rt_power.readOp.dynamic;
+ output_data += ifreeL->output_data;
+ }
+ if (ffreeL) {
+ ffreeL->output_data.peak_dynamic_power =
+ ffreeL->power_t.readOp.dynamic * clockRate;
+ ffreeL->output_data.subthreshold_leakage_power =
+ ffreeL->power_t.readOp.leakage;
+ ffreeL->output_data.gate_leakage_power =
+ ffreeL->power_t.readOp.gate_leakage;
+ ffreeL->output_data.runtime_dynamic_energy =
+ ffreeL->rt_power.readOp.dynamic;
+ output_data += ffreeL->output_data;
+ }
+ if (idcl) {
+ idcl->output_data.peak_dynamic_power =
+ idcl->power_t.readOp.dynamic * clockRate;
+ idcl->output_data.subthreshold_leakage_power =
+ idcl->power_t.readOp.leakage;
+ idcl->output_data.gate_leakage_power =
+ idcl->power_t.readOp.gate_leakage;
+ idcl->output_data.runtime_dynamic_energy =
+ idcl->rt_power.readOp.dynamic;
+ output_data += idcl->output_data;
+ }
+ if (fdcl) {
+ fdcl->output_data.peak_dynamic_power =
+ fdcl->power_t.readOp.dynamic * clockRate;
+ fdcl->output_data.subthreshold_leakage_power =
+ fdcl->power_t.readOp.leakage;
+ fdcl->output_data.gate_leakage_power =
+ fdcl->power_t.readOp.gate_leakage;
+ fdcl->output_data.runtime_dynamic_energy =
+ fdcl->rt_power.readOp.dynamic;
+ output_data += fdcl->output_data;
+ }
+ if (RAHT) {
+ output_data += RAHT->output_data;
+ }
}
-void RENAMINGU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double pppm_t[4] = {1,1,1,1};
- if (is_tdp)
- {//init stats for Peak
- if (coredynp.core_ty==OOO){
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
- }
-
- iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports;
- iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports;
- iRRAT->tdp_stats = iRRAT->stats_t;
-
- fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports;
- fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports;
- fRRAT->tdp_stats = fRRAT->stats_t;
-
- ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;;
- ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
- ifreeL->tdp_stats = ifreeL->stats_t;
-
- ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports;
- ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports;
- ffreeL->tdp_stats = ffreeL->stats_t;
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
- }
- //Unified free list for both int and fp
- ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;
- ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
- ifreeL->tdp_stats = ifreeL->stats_t;
- }
- idcl->stats_t.readAc.access = coredynp.decodeW;
- fdcl->stats_t.readAc.access = coredynp.decodeW;
- idcl->tdp_stats = idcl->stats_t;
- fdcl->tdp_stats = fdcl->stats_t;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->stats_t.readAc.access = coredynp.decodeW;
- fdcl->stats_t.readAc.access = coredynp.decodeW;
- idcl->tdp_stats = idcl->stats_t;
- fdcl->tdp_stats = fdcl->stats_t;
- }
- }
+void RENAMINGU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- }
- else
- {//init stats for Runtime Dynamic (RTP)
- if (coredynp.core_ty==OOO){
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
-
- iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16
- iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iRRAT->rtp_stats = iRRAT->stats_t;
-
- fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16
- fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fRRAT->rtp_stats = fRRAT->stats_t;
-
- ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes;
- ifreeL->rtp_stats = ifreeL->stats_t;
-
- ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes;
- ffreeL->rtp_stats = ffreeL->stats_t;
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs.
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- //Unified free list for both int and fp since the ROB act as physcial registers
- ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads +
- XML->sys.core[ithCore].fp_rename_reads;
- ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes +
- XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group
- //are terminated early
- ifreeL->rtp_stats = ifreeL->stats_t;
- }
- idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads;
- fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes;
- idcl->rtp_stats = idcl->stats_t;
- fdcl->rtp_stats = fdcl->stats_t;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions;
- fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions;
- idcl->rtp_stats = idcl->stats_t;
- fdcl->rtp_stats = fdcl->stats_t;
- }
- }
+ McPATComponent::displayData(indent, plevel);
+ if (core_params.core_ty == OOO) {
+ iFRAT->displayData(indent + 4, plevel);
+ fFRAT->displayData(indent + 4, plevel);
+ ifreeL->displayData(indent + 4, plevel);
+
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ iRRAT->displayData(indent + 4, plevel);
+ fRRAT->displayData(indent + 4, plevel);
+ ffreeL->displayData(indent + 4, plevel);
}
- /* Compute engine */
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
-
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
-
- iRRAT->power_t.reset();
- fRRAT->power_t.reset();
- ifreeL->power_t.reset();
- ffreeL->power_t.reset();
-
- iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic
- +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic);
- fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic
- +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic);
- ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
- +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
- ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic
- +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic);
+ }
+ idcl->displayData(indent + 4, plevel);
+ fdcl->displayData(indent + 4, plevel);
+}
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
-
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic
- +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic
- +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic);
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
- ifreeL->power_t.reset();
- ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
- +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
- }
+void SchedulerU::computeEnergy() {
+ if (!exist) return;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->power_t.reset();
- fdcl->power_t.reset();
- set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
- idcl->power_t = idcl->power * pppm_t;
- set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
- fdcl->power_t = fdcl->power * pppm_t;
- }
+ double ROB_duty_cycle;
+ ROB_duty_cycle = 1;
- }
+ if (int_instruction_selection) {
+ int_instruction_selection->computeEnergy();
+ }
- //assign value to tpd and rtp
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
- power = power + (iFRAT->power + fFRAT->power)
- + (iRRAT->power + fRRAT->power)
- + (ifreeL->power + ffreeL->power);
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- power = power + (iFRAT->power + fFRAT->power)
- + ifreeL->power;
- }
- }
- else
- {
- power = power + idcl->power_t + fdcl->power_t;
- }
+ if (fp_instruction_selection) {
+ fp_instruction_selection->computeEnergy();
+ }
+ if (int_inst_window) {
+ int_inst_window->tdp_stats.reset();
+ int_inst_window->rtp_stats.reset();
+ int_inst_window->power_t.reset();
+ int_inst_window->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ int_inst_window->tdp_stats.readAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.writeAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.searchAc.access =
+ core_params.issueW * core_params.num_pipelines;
+
+ int_inst_window->power_t.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->tdp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->tdp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->tdp_stats.writeAc.access;
+
+ int_inst_window->rtp_stats.readAc.access =
+ core_stats.inst_window_reads;
+ int_inst_window->rtp_stats.writeAc.access =
+ core_stats.inst_window_writes;
+ int_inst_window->rtp_stats.searchAc.access =
+ core_stats.inst_window_wakeup_accesses;
+
+ int_inst_window->rt_power.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->rtp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->rtp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->rtp_stats.writeAc.access;
+ } else if (core_params.multithreaded) {
+ int_inst_window->tdp_stats.readAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.writeAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.searchAc.access =
+ core_params.issueW * core_params.num_pipelines;
+
+ int_inst_window->power_t.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->tdp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->tdp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->tdp_stats.writeAc.access;
+
+ int_inst_window->rtp_stats.readAc.access =
+ core_stats.int_instructions + core_stats.fp_instructions;
+ int_inst_window->rtp_stats.writeAc.access =
+ core_stats.int_instructions + core_stats.fp_instructions;
+ int_inst_window->rtp_stats.searchAc.access =
+ 2 * (core_stats.int_instructions + core_stats.fp_instructions);
+
+ int_inst_window->rt_power.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->rtp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->rtp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->rtp_stats.writeAc.access;
}
- else
- {
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
- + (iRRAT->rt_power + fRRAT->rt_power)
- + (ifreeL->rt_power + ffreeL->rt_power);
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
- + ifreeL->rt_power;
- }
- }
- else
- {
- rt_power = rt_power + idcl->power_t + fdcl->power_t;
- }
+ }
- }
-}
+ if (fp_inst_window) {
+ fp_inst_window->tdp_stats.reset();
+ fp_inst_window->tdp_stats.readAc.access =
+ fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines;
+ fp_inst_window->tdp_stats.writeAc.access =
+ fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines;
+ fp_inst_window->tdp_stats.searchAc.access =
+ fp_inst_window->l_ip.num_search_ports *
+ core_params.num_fp_pipelines;
+
+ fp_inst_window->rtp_stats.reset();
+ fp_inst_window->rtp_stats.readAc.access =
+ core_stats.fp_inst_window_reads;
+ fp_inst_window->rtp_stats.writeAc.access =
+ core_stats.fp_inst_window_writes;
+ fp_inst_window->rtp_stats.searchAc.access =
+ core_stats.fp_inst_window_wakeup_accesses;
+
+ fp_inst_window->power_t.reset();
+ fp_inst_window->power_t.readOp.dynamic +=
+ fp_inst_window->power.readOp.dynamic *
+ fp_inst_window->tdp_stats.readAc.access +
+ fp_inst_window->power.searchOp.dynamic *
+ fp_inst_window->tdp_stats.searchAc.access +
+ fp_inst_window->power.writeOp.dynamic *
+ fp_inst_window->tdp_stats.writeAc.access;
+
+ fp_inst_window->rt_power.reset();
+ fp_inst_window->rt_power.readOp.dynamic +=
+ fp_inst_window->power.readOp.dynamic *
+ fp_inst_window->rtp_stats.readAc.access +
+ fp_inst_window->power.searchOp.dynamic *
+ fp_inst_window->rtp_stats.searchAc.access +
+ fp_inst_window->power.writeOp.dynamic *
+ fp_inst_window->rtp_stats.writeAc.access;
+ }
-void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
-
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str<< "Int Front End RAT:" << endl;
- cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Front End RAT:" << endl;
- cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<<"Free List:" << endl;
- cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- cout << indent_str<< "Int Retire RAT: " << endl;
- cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Retire RAT:" << endl;
- cout << indent_str_next << "Area = " << fRRAT->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Free List:" << endl;
- cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str<< "Int DCL:" << endl;
- cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout << indent_str<<"FP DCL:" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
- }
- }
- else
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str_next << "Int Front End RAT Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Int Front End RAT Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Free List Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Free List Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- cout << indent_str_next << "Int Retire RAT Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Int Retire RAT Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Free List Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Free List Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
- else
- {
- cout << indent_str_next << "Int DCL Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int DCL Subthreshold Leakage = " << idcl->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP DCL Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ if (ROB) {
+ ROB->tdp_stats.reset();
+ ROB->tdp_stats.readAc.access = core_params.commitW *
+ core_params.num_pipelines * ROB_duty_cycle;
+ ROB->tdp_stats.writeAc.access = core_params.issueW *
+ core_params.num_pipelines * ROB_duty_cycle;
+ ROB->rtp_stats.reset();
+ ROB->rtp_stats.readAc.access = core_stats.ROB_reads;
+ ROB->rtp_stats.writeAc.access = core_stats.ROB_writes;
+ ROB->power_t.reset();
+ ROB->power_t.readOp.dynamic +=
+ ROB->local_result.power.readOp.dynamic *
+ ROB->tdp_stats.readAc.access +
+ ROB->local_result.power.writeOp.dynamic *
+ ROB->tdp_stats.writeAc.access;
+ ROB->rt_power.reset();
+ ROB->rt_power.readOp.dynamic +=
+ ROB->local_result.power.readOp.dynamic *
+ ROB->rtp_stats.readAc.access +
+ ROB->local_result.power.writeOp.dynamic *
+ ROB->rtp_stats.writeAc.access;
+ }
+
+ output_data.reset();
+ if (int_inst_window) {
+ int_inst_window->output_data.subthreshold_leakage_power =
+ int_inst_window->power_t.readOp.leakage;
+ int_inst_window->output_data.gate_leakage_power =
+ int_inst_window->power_t.readOp.gate_leakage;
+ int_inst_window->output_data.peak_dynamic_power =
+ int_inst_window->power_t.readOp.dynamic * clockRate;
+ int_inst_window->output_data.runtime_dynamic_energy =
+ int_inst_window->rt_power.readOp.dynamic;
+ output_data += int_inst_window->output_data;
+ }
+ if (fp_inst_window) {
+ fp_inst_window->output_data.subthreshold_leakage_power =
+ fp_inst_window->power_t.readOp.leakage;
+ fp_inst_window->output_data.gate_leakage_power =
+ fp_inst_window->power_t.readOp.gate_leakage;
+ fp_inst_window->output_data.peak_dynamic_power =
+ fp_inst_window->power_t.readOp.dynamic * clockRate;
+ fp_inst_window->output_data.runtime_dynamic_energy =
+ fp_inst_window->rt_power.readOp.dynamic;
+ output_data += fp_inst_window->output_data;
+ }
+ if (ROB) {
+ ROB->output_data.peak_dynamic_power =
+ ROB->power_t.readOp.dynamic * clockRate;
+ ROB->output_data.runtime_dynamic_energy =
+ ROB->rt_power.readOp.dynamic;
+ output_data += ROB->output_data;
+ }
+ // Integer and FP instruction selection logic is not included in the
+ // roll-up due to the uninitialized area
+ /*
+ if (int_instruction_selection) {
+ output_data += int_instruction_selection->output_data;
+ }
+ if (fp_instruction_selection) {
+ output_data += fp_instruction_selection->output_data;
+ }
+ */
}
+void SchedulerU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
-void SchedulerU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double ROB_duty_cycle;
-// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
-// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
-// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1;
- ROB_duty_cycle = 1;
- //init stats
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
- int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
- int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
- int_inst_window->tdp_stats = int_inst_window->stats_t;
- fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines;
- fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines;
- fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines;
- fp_inst_window->tdp_stats = fp_inst_window->stats_t;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle;
- ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle;
- ROB->tdp_stats = ROB->stats_t;
-
- /*
- * When inst commits, ROB must be read.
- * Because for Physcial register based cores, physical register tag in ROB
- * need to be read out and write into RRAT/CAM based RAT.
- * For RS based cores, register content that stored in ROB must be
- * read out and stored in architectural registers.
- *
- * if no-register is involved, the ROB read out operation when instruction commits can be ignored.
- * assuming 20% insts. belong this type.
- * TODO: ROB duty_cycle need to be revisited
- */
- }
+ McPATComponent::displayData(indent, plevel);
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
- int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
- int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
- int_inst_window->tdp_stats = int_inst_window->stats_t;
- }
+ if (core_params.core_ty == OOO) {
+ int_inst_window->displayData(indent + 4, plevel);
+ fp_inst_window->displayData(indent + 4, plevel);
+ if (core_params.ROB_size > 0) {
+ ROB->displayData(indent + 4, plevel);
+ }
+ } else if (core_params.multithreaded) {
+ int_inst_window->displayData(indent + 4, plevel);
+ }
- }
- else
- {//rtp
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads;
- int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes;
- int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses;
- int_inst_window->rtp_stats = int_inst_window->stats_t;
- fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads;
- fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes;
- fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses;
- fp_inst_window->rtp_stats = fp_inst_window->stats_t;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
-
- ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads;
- ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes;
- /* ROB need to be updated in RS based OOO when new values are produced,
- * this update may happen before the commit stage when ROB entry is released
- * 1. ROB write at instruction inserted in
- * 2. ROB write as results produced (for RS based OOO only)
- * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF
- * For Physical reg based OOO, no data stored in ROB, but register tags need to be
- * read out and used to set the RRAT and to recycle the register tag to free list buffer
- */
- ROB->rtp_stats = ROB->stats_t;
- }
+ // Integer and FP instruction selection logic is not included in the
+ // roll-up due to the uninitialized area
+ /*
+ if (int_instruction_selection) {
+ int_instruction_selection->displayData(indent + 4, plevel);
+ }
+ if (fp_instruction_selection) {
+ fp_instruction_selection->displayData(indent + 4, plevel);
+ }
+ */
+}
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
- int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
- int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions);
- int_inst_window->rtp_stats = int_inst_window->stats_t;
- }
+void LoadStoreU::computeEnergy() {
+ if (!exist) return;
+
+ LSQ->tdp_stats.reset();
+ LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LSQ->rtp_stats.reset();
+ // Flush overhead conidered
+ LSQ->rtp_stats.readAc.access = (core_stats.load_instructions +
+ core_stats.store_instructions) * 2;
+ LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions +
+ core_stats.store_instructions) * 2;
+ LSQ->power_t.reset();
+ //every memory access invloves at least two operations on LSQ
+ LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access *
+ (LSQ->local_result.power.searchOp.dynamic +
+ LSQ->local_result.power.readOp.dynamic) +
+ LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
+ LSQ->rt_power.reset();
+ //every memory access invloves at least two operations on LSQ
+ LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access *
+ (LSQ->local_result.power.searchOp.dynamic +
+ LSQ->local_result.power.readOp.dynamic) +
+ LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
+
+ if (LoadQ) {
+ LoadQ->tdp_stats.reset();
+ LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LoadQ->rtp_stats.reset();
+ LoadQ->rtp_stats.readAc.access = core_stats.load_instructions +
+ core_stats.store_instructions;
+ LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions +
+ core_stats.store_instructions;
+ LoadQ->power_t.reset();
+ //every memory access invloves at least two operations on LoadQ
+ LoadQ->power_t.readOp.dynamic +=
+ LoadQ->tdp_stats.readAc.access *
+ (LoadQ->local_result.power.searchOp.dynamic +
+ LoadQ->local_result.power.readOp.dynamic) +
+ LoadQ->tdp_stats.writeAc.access *
+ LoadQ->local_result.power.writeOp.dynamic;
+ LoadQ->rt_power.reset();
+ //every memory access invloves at least two operations on LoadQ
+ LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access *
+ (LoadQ->local_result.power.searchOp.dynamic +
+ LoadQ->local_result.power.readOp.dynamic) +
+ LoadQ->rtp_stats.writeAc.access *
+ LoadQ->local_result.power.writeOp.dynamic;
}
- //computation engine
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->power_t.reset();
- fp_inst_window->power_t.reset();
-
- /* each instruction needs to write to scheduler, read out when all resources and source operands are ready
- * two search ops with one for each source operand
- *
- */
- int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
- + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
- + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
- + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic;
-
- fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access
- + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access
- + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access
- + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->power_t.reset();
- ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access +
- ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic;
- }
+ McPATComponent::computeEnergy();
+
+ output_data.reset();
+ if (dcache) {
+ output_data += dcache->output_data;
+ }
+ if (LSQ) {
+ LSQ->output_data.peak_dynamic_power =
+ LSQ->power_t.readOp.dynamic * clockRate;
+ LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic;
+ output_data += LSQ->output_data;
+ }
+ if (LoadQ) {
+ LoadQ->output_data.peak_dynamic_power =
+ LoadQ->power_t.readOp.dynamic * clockRate;
+ LoadQ->output_data.runtime_dynamic_energy =
+ LoadQ->rt_power.readOp.dynamic;
+ output_data += LoadQ->output_data;
+ }
+}
+void LoadStoreU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
+ McPATComponent::displayData(indent, plevel);
+ if (LoadQ) {
+ LoadQ->displayData(indent + 4, plevel);
+ }
+ LSQ->displayData(indent + 4, plevel);
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->power_t.reset();
- int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
- + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
- + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
- + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
- }
+}
- //assign values
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- power = power + int_inst_window->power + fp_inst_window->power;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg;
- power = power + ROB->power;
- }
+void MemManU::computeEnergy() {
+ if (!exist) return;
- }
- else if (coredynp.multithreaded)
- {
- // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
- int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- power = power + int_inst_window->power;
- }
+ itlb->tdp_stats.reset();
+ itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports;
+ itlb->tdp_stats.readAc.miss = 0;
+ itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access -
+ itlb->tdp_stats.readAc.miss;
+ itlb->rtp_stats.reset();
+ itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses;
+ itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses;
- }
- else
- {//rtp
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg;
- rt_power = rt_power + ROB->rt_power;
- }
+ itlb->power_t.reset();
+ //FA spent most power in tag, so use total access not hits
+ itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access *
+ itlb->local_result.power.searchOp.dynamic +
+ itlb->tdp_stats.readAc.miss *
+ itlb->local_result.power.writeOp.dynamic;
+ itlb->rt_power.reset();
+ //FA spent most power in tag, so use total access not hits
+ itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access *
+ itlb->local_result.power.searchOp.dynamic +
+ itlb->rtp_stats.writeAc.access *
+ itlb->local_result.power.writeOp.dynamic;
+
+ dtlb->tdp_stats.reset();
+ dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ dtlb->tdp_stats.readAc.miss = 0;
+ dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access -
+ dtlb->tdp_stats.readAc.miss;
+ dtlb->rtp_stats.reset();
+ dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses +
+ mem_man_stats.dtlb_write_misses;
+ dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses +
+ mem_man_stats.dtlb_read_misses;
- }
- else if (coredynp.multithreaded)
- {
- // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
- int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- rt_power = rt_power + int_inst_window->rt_power;
- }
+ dtlb->power_t.reset();
+ //FA spent most power in tag, so use total access not hits
+ dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access *
+ dtlb->local_result.power.searchOp.dynamic +
+ dtlb->tdp_stats.readAc.miss *
+ dtlb->local_result.power.writeOp.dynamic;
+ dtlb->rt_power.reset();
+ //FA spent most power in tag, so use total access not hits
+ dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access *
+ dtlb->local_result.power.searchOp.dynamic +
+ dtlb->rtp_stats.writeAc.access *
+ dtlb->local_result.power.writeOp.dynamic;
+
+ output_data.reset();
+ if (itlb) {
+ itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic *
+ clockRate;
+ itlb->output_data.runtime_dynamic_energy =
+ itlb->rt_power.readOp.dynamic;
+ output_data += itlb->output_data;
+ }
+ if (dtlb) {
+ dtlb->output_data.peak_dynamic_power =
+ dtlb->power_t.readOp.dynamic * clockRate;
+ dtlb->output_data.runtime_dynamic_energy =
+ dtlb->rt_power.readOp.dynamic;
+ output_data += dtlb->output_data;
}
-// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
-// cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl;
-// cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access +
-// + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl;
-// cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl;
}
-void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str << "Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "FP Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- cout << indent_str<<"ROB:" << endl;
- cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else if (coredynp.multithreaded)
- {
- cout << indent_str << "Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
- else if (coredynp.multithreaded)
- {
- cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+void MemManU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
+
+ McPATComponent::displayData(indent, plevel);
+ itlb->displayData(indent + 4, plevel);
+ dtlb->displayData(indent + 4, plevel);
}
-void LoadStoreU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
- dcache.caches->stats_t.readAc.miss = 0;
- dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
- dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
- dcache.caches->stats_t.writeAc.miss = 0;
- dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
- dcache.caches->tdp_stats = dcache.caches->stats_t;
-
- dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports;
- dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports;
- dcache.missb->tdp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.ifb->tdp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports;
- dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t;
- if (cache_p==Write_back)
- {
- dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports;
- dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports;
- dcache.wbb->tdp_stats = dcache.wbb->stats_t;
- }
+void RegFU::computeEnergy() {
+ /*
+ * Architecture RF and physical RF cannot be present at the same time.
+ * Therefore, the RF stats can only refer to either ARF or PRF;
+ * And the same stats can be used for both.
+ */
+ if (!exist) return;
+
+ IRF->tdp_stats.reset();
+ IRF->tdp_stats.readAc.access =
+ core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS *
+ (core_stats.ALU_duty_cycle * 1.1 +
+ (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
+ core_params.num_pipelines;
+ IRF->tdp_stats.writeAc.access =
+ core_params.issueW *
+ (core_stats.ALU_duty_cycle * 1.1 +
+ (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
+ core_params.num_pipelines;
+ IRF->rtp_stats.reset();
+ IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads;
+ IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes;
+ if (core_params.regWindowing) {
+ IRF->rtp_stats.readAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ IRF->rtp_stats.writeAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ }
+ IRF->power_t.reset();
+ IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access *
+ IRF->local_result.power.readOp.dynamic +
+ IRF->tdp_stats.writeAc.access *
+ IRF->local_result.power.writeOp.dynamic;
+ IRF->rt_power.reset();
+ IRF->rt_power.readOp.dynamic +=
+ IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic +
+ IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic;
+
+ FRF->tdp_stats.reset();
+ FRF->tdp_stats.readAc.access =
+ FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 *
+ core_params.num_fp_pipelines;
+ FRF->tdp_stats.writeAc.access =
+ FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 *
+ core_params.num_fp_pipelines;
+ FRF->rtp_stats.reset();
+ FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads;
+ FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes;
+ if (core_params.regWindowing) {
+ FRF->rtp_stats.readAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ FRF->rtp_stats.writeAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ }
+ FRF->power_t.reset();
+ FRF->power_t.readOp.dynamic +=
+ FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
+ FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
+ FRF->rt_power.reset();
+ FRF->rt_power.readOp.dynamic +=
+ FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
+ FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
+
+ if (core_params.regWindowing) {
+ RFWIN->tdp_stats.reset();
+ RFWIN->tdp_stats.readAc.access = 0;
+ RFWIN->tdp_stats.writeAc.access = 0;
+ RFWIN->rtp_stats.reset();
+ RFWIN->rtp_stats.readAc.access =
+ core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
+ RFWIN->rtp_stats.writeAc.access =
+ core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
+ RFWIN->power_t.reset();
+ RFWIN->power_t.readOp.dynamic +=
+ RFWIN->tdp_stats.readAc.access *
+ RFWIN->local_result.power.readOp.dynamic +
+ RFWIN->tdp_stats.writeAc.access *
+ RFWIN->local_result.power.writeOp.dynamic;
+ RFWIN->rt_power.reset();
+ RFWIN->rt_power.readOp.dynamic +=
+ RFWIN->rtp_stats.readAc.access *
+ RFWIN->local_result.power.readOp.dynamic +
+ RFWIN->rtp_stats.writeAc.access *
+ RFWIN->local_result.power.writeOp.dynamic;
+ }
- LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- LSQ->tdp_stats = LSQ->stats_t;
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- LoadQ->tdp_stats = LoadQ->stats_t;
- }
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses;
- dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses;
- dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
- dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses;
- dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses;
- dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
- dcache.caches->rtp_stats = dcache.caches->stats_t;
-
- if (cache_p==Write_back)
- {
- dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.missb->rtp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.ifb->rtp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
-
- dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.wbb->rtp_stats = dcache.wbb->stats_t;
- }
- else
- {
- dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.missb->rtp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.ifb->rtp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
- }
+ output_data.reset();
+ if (IRF) {
+ IRF->output_data.peak_dynamic_power =
+ IRF->power_t.readOp.dynamic * clockRate;
+ IRF->output_data.subthreshold_leakage_power *=
+ core_params.num_hthreads;
+ IRF->output_data.gate_leakage_power *= core_params.num_hthreads;
+ IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic;
+ output_data += IRF->output_data;
+ }
+ if (FRF) {
+ FRF->output_data.peak_dynamic_power =
+ FRF->power_t.readOp.dynamic * clockRate;
+ FRF->output_data.subthreshold_leakage_power *=
+ core_params.num_hthreads;
+ FRF->output_data.gate_leakage_power *= core_params.num_hthreads;
+ FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic;
+ output_data += FRF->output_data;
+ }
+ if (RFWIN) {
+ RFWIN->output_data.peak_dynamic_power =
+ RFWIN->power_t.readOp.dynamic * clockRate;
+ RFWIN->output_data.runtime_dynamic_energy =
+ RFWIN->rt_power.readOp.dynamic;
+ output_data += RFWIN->output_data;
+ }
+}
- LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered
- LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;
- LSQ->rtp_stats = LSQ->stats_t;
+void RegFU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
- LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
- LoadQ->rtp_stats = LoadQ->stats_t;
- }
+ McPATComponent::displayData(indent, plevel);
- }
+ IRF->displayData(indent + 4, plevel);
+ FRF->displayData(indent + 4, plevel);
+ if (core_params.regWindowing) {
+ RFWIN->displayData(indent + 4, plevel);
+ }
+}
- dcache.power_t.reset();
- LSQ->power_t.reset();
- dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+
- dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+
- dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+
- dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic);
+void EXECU::computeEnergy() {
+ if (!exist) return;
- if (cache_p==Write_back)
- {//write miss will generate a write later
- dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic;
- }
+ int_bypass->set_params_stats(core_params.execu_int_bypass_ports,
+ core_stats.ALU_cdb_duty_cycle,
+ core_stats.cdb_alu_accesses);
- dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic +
- dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic +
- dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic;
- dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic +
- dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic;
- if (cache_p==Write_back)
- {
- dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic
- + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic;
- }
+ intTagBypass->set_params_stats(core_params.execu_int_bypass_ports,
+ core_stats.ALU_cdb_duty_cycle,
+ core_stats.cdb_alu_accesses);
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->power_t.reset();
- LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+
- LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ
-
- LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
- + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
-
- }
- else
- {
- LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
- + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
-
- }
-
- if (is_tdp)
- {
-// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg +
-// (dcache.missb->local_result.power +
-// dcache.ifb->local_result.power +
-// dcache.prefetchb->local_result.power +
-// dcache.wbb->local_result.power)*pppm_Isub;
- dcache.power = dcache.power_t + (dcache.caches->local_result.power +
- dcache.missb->local_result.power +
- dcache.ifb->local_result.power +
- dcache.prefetchb->local_result.power) *pppm_lkg;
- if (cache_p==Write_back)
- {
- dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg;
- }
+ if (core_params.num_muls > 0) {
+ int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports,
+ core_stats.MUL_cdb_duty_cycle,
+ core_stats.cdb_mul_accesses);
- LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
- power = power + dcache.power + LSQ->power;
+ intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports,
+ core_stats.MUL_cdb_duty_cycle,
+ core_stats.cdb_mul_accesses);
+ }
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
- power = power + LoadQ->power;
- }
+ if (core_params.num_fpus > 0) {
+ fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports,
+ core_stats.FPU_cdb_duty_cycle,
+ core_stats.cdb_fpu_accesses);
+
+ fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports,
+ core_stats.FPU_cdb_duty_cycle,
+ core_stats.cdb_fpu_accesses);
}
- else
- {
-// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
-// dcache.missb->local_result.power +
-// dcache.ifb->local_result.power +
-// dcache.prefetchb->local_result.power +
-// dcache.wbb->local_result.power)*pppm_lkg;
- dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
- dcache.missb->local_result.power +
- dcache.ifb->local_result.power +
- dcache.prefetchb->local_result.power )*pppm_lkg;
-
- if (cache_p==Write_back)
- {
- dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg;
- }
- LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
- rt_power = rt_power + dcache.rt_power + LSQ->rt_power;
+ McPATComponent::computeEnergy();
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
- rt_power = rt_power + LoadQ->rt_power;
- }
+ if (rfu) {
+ rfu->computeEnergy();
+ output_data += rfu->output_data;
+ }
+ if (scheu) {
+ scheu->computeEnergy();
+ output_data += scheu->output_data;
+ }
+ if (fp_u) {
+ fp_u->computeEnergy();
+ output_data += fp_u->output_data;
+ }
+ if (exeu) {
+ exeu->computeEnergy();
+ output_data += exeu->output_data;
+ }
+ if (mul) {
+ mul->computeEnergy();
+ output_data += mul->output_data;
}
}
+void EXECU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
-void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
- cout << indent_str << "Data Cache:" << endl;
- cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.core_ty==Inorder)
- {
- cout << indent_str << "Load/Store Queue:" << endl;
- cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
-
- {
- if (XML->sys.core[ithCore].load_buffer_size >0)
- {
- cout << indent_str << "LoadQ:" << endl;
- cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- cout << indent_str<< "StoreQ:" << endl;
- cout << indent_str_next << "Area = " << LSQ->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str_next << "Data Cache Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Data Cache Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Data Cache Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.core_ty==Inorder)
- {
- cout << indent_str_next << "Load/Store Queue Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Load/Store Queue Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
- }
- else
- {
- cout << indent_str_next << "LoadQ Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "LoadQ Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "StoreQ Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ McPATComponent::displayData(indent, plevel);
+ rfu->displayData(indent + 4, plevel);
+ if (scheu) {
+ scheu->displayData(indent + 4, plevel);
+ }
+ exeu->displayData(indent + 4, plevel);
+ if (core_params.num_fpus > 0) {
+ fp_u->displayData(indent + 4, plevel);
+ }
+ if (core_params.num_muls > 0) {
+ mul->displayData(indent + 4, plevel);
+ }
}
-void MemManU::computeEnergy(bool is_tdp)
-{
-
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports;
- itlb->stats_t.readAc.miss = 0;
- itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
- itlb->tdp_stats = itlb->stats_t;
-
- dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- dtlb->stats_t.readAc.miss = 0;
- dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
- dtlb->tdp_stats = dtlb->stats_t;
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses;
- itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses;
- itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
- itlb->rtp_stats = itlb->stats_t;
-
- dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses;
- dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses;
- dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
- dtlb->rtp_stats = dtlb->stats_t;
+void Core::computeEnergy() {
+ ifu->computeEnergy();
+ lsu->computeEnergy();
+ mmu->computeEnergy();
+ exu->computeEnergy();
+ if (core_params.core_ty == OOO) {
+ rnu->computeEnergy();
}
- itlb->power_t.reset();
- dtlb->power_t.reset();
- itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
- +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic;
- dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
- +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg;
- dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
- power = power + itlb->power + dtlb->power;
- }
- else
- {
- itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg;
- dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
- rt_power = rt_power + itlb->rt_power + dtlb->rt_power;
- }
+ output_data.reset();
+ if (ifu) {
+ output_data += ifu->output_data;
+ }
+ if (lsu) {
+ output_data += lsu->output_data;
+ }
+ if (mmu) {
+ output_data += mmu->output_data;
+ }
+ if (exu) {
+ output_data += exu->output_data;
+ }
+ if (rnu) {
+ output_data += rnu->output_data;
+ }
+ if (corepipe) {
+ output_data += corepipe->output_data;
+ }
+ if (undiffCore) {
+ output_data += undiffCore->output_data;
+ }
+ if (l2cache) {
+ output_data += l2cache->output_data;
+ }
}
-void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
-
-
- if (is_tdp)
- {
- cout << indent_str << "Itlb:" << endl;
- cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Dtlb:" << endl;
- cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str_next << "Itlb Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Dtlb Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage << " W" << endl;
- }
-
-}
+InstFetchU ::~InstFetchU() {
-void RegFU::computeEnergy(bool is_tdp)
-{
-/*
- * Architecture RF and physical RF cannot be present at the same time.
- * Therefore, the RF stats can only refer to either ARF or PRF;
- * And the same stats can be used for both.
- */
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+
- (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
- IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+
- (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
- //Rule of Thumb: about 10% RF related instructions do not need to access ALUs
- IRF->tdp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
- FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
- FRF->tdp_stats = FRF->stats_t;
- if (coredynp.regWindowing)
- {
- RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
- RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
- RFWIN->tdp_stats = RFWIN->stats_t;
- }
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy
- IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes;
- IRF->rtp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads;
- FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes;
- FRF->rtp_stats = FRF->stats_t;
- if (coredynp.regWindowing)
- {
- RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16;
- RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16;
- RFWIN->rtp_stats = RFWIN->stats_t;
-
- IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads +
- XML->sys.core[ithCore].function_calls*16;
- IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes +
- XML->sys.core[ithCore].function_calls*16;
- IRF->rtp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads +
- XML->sys.core[ithCore].function_calls*16;;
- FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+
- XML->sys.core[ithCore].function_calls*16;;
- FRF->rtp_stats = FRF->stats_t;
- }
+ if (!exist) return;
+ if (IB) {
+ delete IB;
+ IB = NULL;
}
- IRF->power_t.reset();
- FRF->power_t.reset();
- IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic
- +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic);
- FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic
- +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic);
- if (coredynp.regWindowing)
- {
- RFWIN->power_t.reset();
- RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic +
- RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic);
- }
-
- if (is_tdp)
- {
- IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
- FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
- power = power + (IRF->power + FRF->power);
- if (coredynp.regWindowing)
- {
- RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
- power = power + RFWIN->power;
- }
+ if (ID_inst) {
+ delete ID_inst;
+ ID_inst = NULL;
+ }
+ if (ID_operand) {
+ delete ID_operand;
+ ID_operand = NULL;
+ }
+ if (ID_misc) {
+ delete ID_misc;
+ ID_misc = NULL;
+ }
+ if (core_params.predictionW > 0) {
+ if (BTB) {
+ delete BTB;
+ BTB = NULL;
}
- else
- {
- IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
- FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (IRF->power_t + FRF->power_t);
- if (coredynp.regWindowing)
- {
- RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
- rt_power = rt_power + RFWIN->rt_power;
- }
+ if (BPT) {
+ delete BPT;
+ BPT = NULL;
}
+ }
+ if (icache) {
+ delete icache;
+ }
}
+BranchPredictor ::~BranchPredictor() {
-void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- { cout << indent_str << "Integer RF:" << endl;
- cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Floating Point RF:" << endl;
- cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.regWindowing)
- {
- cout << indent_str << "Register Windows:" << endl;
- cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str_next << "Integer RF Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Integer RF Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Floating Point RF Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " << FRF->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Floating Point RF Gate Leakage = " << FRF->rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.regWindowing)
- {
- cout << indent_str_next << "Register Windows Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Register Windows Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Register Windows Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ if (!exist) return;
+ if (globalBPT) {
+ delete globalBPT;
+ globalBPT = NULL;
+ }
+ if (localBPT) {
+ delete localBPT;
+ localBPT = NULL;
+ }
+ if (L1_localBPT) {
+ delete L1_localBPT;
+ L1_localBPT = NULL;
+ }
+ if (L2_localBPT) {
+ delete L2_localBPT;
+ L2_localBPT = NULL;
+ }
+ if (chooser) {
+ delete chooser;
+ chooser = NULL;
+ }
+ if (RAS) {
+ delete RAS;
+ RAS = NULL;
+ }
}
+RENAMINGU ::~RENAMINGU() {
-void EXECU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double pppm_t[4] = {1,1,1,1};
-// rfu->power.reset();
-// rfu->rt_power.reset();
-// scheu->power.reset();
-// scheu->rt_power.reset();
-// exeu->power.reset();
-// exeu->rt_power.reset();
-
- rfu->computeEnergy(is_tdp);
- scheu->computeEnergy(is_tdp);
- exeu->computeEnergy(is_tdp);
- if (coredynp.num_fpus >0)
- {
- fp_u->computeEnergy(is_tdp);
- }
- if (coredynp.num_muls >0)
- {
- mul->computeEnergy(is_tdp);
- }
-
- if (is_tdp)
- {
- set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
- bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t;
- if (coredynp.num_muls >0)
- {
- set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
- bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
- power = power + mul->power;
- }
- if (coredynp.num_fpus>0)
- {
- set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction.
- bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ;
- power = power + fp_u->power;
- }
-
- power = power + rfu->power + exeu->power + bypass.power + scheu->power;
- }
- else
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses);
- bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t;
- bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t;
-
- if (coredynp.num_muls >0)
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction.
- bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
- rt_power = rt_power + mul->rt_power;
- }
-
- if (coredynp.num_fpus>0)
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses);
- bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t;
- bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t;
- rt_power = rt_power + fp_u->rt_power;
- }
- rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power;
- }
+ if (!exist) return;
+ if (iFRAT) {
+ delete iFRAT;
+ iFRAT = NULL;
+ }
+ if (fFRAT) {
+ delete fFRAT;
+ fFRAT = NULL;
+ }
+ if (iRRAT) {
+ delete iRRAT;
+ iRRAT = NULL;
+ }
+ if (iFRAT) {
+ delete iFRAT;
+ iFRAT = NULL;
+ }
+ if (ifreeL) {
+ delete ifreeL;
+ ifreeL = NULL;
+ }
+ if (ffreeL) {
+ delete ffreeL;
+ ffreeL = NULL;
+ }
+ if (idcl) {
+ delete idcl;
+ idcl = NULL;
+ }
+ if (fdcl) {
+ delete fdcl;
+ fdcl = NULL;
+ }
+ if (RAHT) {
+ delete RAHT;
+ RAHT = NULL;
+ }
}
-void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
-// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
- if (is_tdp)
- {
- cout << indent_str << "Register Files:" << endl;
- cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3){
- rfu->displayEnergy(indent+4,is_tdp);
- }
- cout << indent_str << "Instruction Scheduler:" << endl;
- cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3){
- scheu->displayEnergy(indent+4,is_tdp);
- }
- exeu->displayEnergy(indent,is_tdp);
- if (coredynp.num_fpus>0)
- {
- fp_u->displayEnergy(indent,is_tdp);
- }
- if (coredynp.num_muls >0)
- {
- mul->displayEnergy(indent,is_tdp);
- }
- cout << indent_str << "Results Broadcast Bus:" << endl;
- cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str_next << "Register Files Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Register Files Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Register Files Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " << scheu->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Gate Leakage = " << scheu->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " << bypass.rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " << bypass.rt_power.readOp.gate_leakage << " W" << endl;
- }
+LoadStoreU ::~LoadStoreU() {
+ if (!exist) return;
+ if (LSQ) {
+ delete LSQ;
+ LSQ = NULL;
+ }
+ if (dcache) {
+ delete dcache;
+ dcache = NULL;
+ }
}
-void Core::computeEnergy(bool is_tdp)
-{
- //power_point_product_masks
- double pppm_t[4] = {1,1,1,1};
- double rtp_pipeline_coe;
- double num_units = 4.0;
- if (is_tdp)
- {
- ifu->computeEnergy(is_tdp);
- lsu->computeEnergy(is_tdp);
- mmu->computeEnergy(is_tdp);
- exu->computeEnergy(is_tdp);
-
- if (coredynp.core_ty==OOO)
- {
- num_units = 5.0;
- rnu->computeEnergy(is_tdp);
- set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- if (rnu->exist)
- {
- rnu->power = rnu->power + corepipe->power*pppm_t;
- power = power + rnu->power;
- }
- }
-
- if (ifu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
-// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
- ifu->power = ifu->power + corepipe->power*pppm_t;
-// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
-// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl;
- power = power + ifu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (lsu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- lsu->power = lsu->power + corepipe->power*pppm_t;
-// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + lsu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (exu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- exu->power = exu->power + corepipe->power*pppm_t;
-// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + exu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (mmu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- mmu->power = mmu->power + corepipe->power*pppm_t;
-// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + mmu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
+MemManU ::~MemManU() {
- power = power + undiffCore->power;
+ if (!exist) return;
+ if (itlb) {
+ delete itlb;
+ itlb = NULL;
+ }
+ if (dtlb) {
+ delete dtlb;
+ dtlb = NULL;
+ }
+}
- if (XML->sys.Private_L2)
- {
+RegFU ::~RegFU() {
- l2cache->computeEnergy(is_tdp);
- set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1);
- //l2cache->power = l2cache->power*pppm_t;
- power = power + l2cache->power*pppm_t;
- }
- }
- else
- {
- ifu->computeEnergy(is_tdp);
- lsu->computeEnergy(is_tdp);
- mmu->computeEnergy(is_tdp);
- exu->computeEnergy(is_tdp);
- if (coredynp.core_ty==OOO)
- {
- num_units = 5.0;
- rnu->computeEnergy(is_tdp);
- set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- if (rnu->exist)
- {
- rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t;
-
- rt_power = rt_power + rnu->rt_power;
- }
- }
- else
- {
- if (XML->sys.homogeneous_cores==1)
- {
- rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
- }
- else
- {
- rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
- }
- set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- }
+ if (!exist) return;
+ if (IRF) {
+ delete IRF;
+ IRF = NULL;
+ }
+ if (FRF) {
+ delete FRF;
+ FRF = NULL;
+ }
+ if (RFWIN) {
+ delete RFWIN;
+ RFWIN = NULL;
+ }
+}
- if (ifu->exist)
- {
- ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + ifu->rt_power ;
- }
- if (lsu->exist)
- {
- lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + lsu->rt_power;
- }
- if (exu->exist)
- {
- exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + exu->rt_power;
- }
- if (mmu->exist)
- {
- mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + mmu->rt_power ;
- }
+SchedulerU ::~SchedulerU() {
- rt_power = rt_power + undiffCore->power;
-// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- if (XML->sys.Private_L2)
- {
+ if (!exist) return;
+ if (int_inst_window) {
+ delete int_inst_window;
+ int_inst_window = NULL;
+ }
+ if (fp_inst_window) {
+ delete int_inst_window;
+ int_inst_window = NULL;
+ }
+ if (ROB) {
+ delete ROB;
+ ROB = NULL;
+ }
+ if (int_instruction_selection) {
+ delete int_instruction_selection;
+ int_instruction_selection = NULL;
+ }
+ if (fp_instruction_selection) {
+ delete fp_instruction_selection;
+ fp_instruction_selection = NULL;
+ }
+}
- l2cache->computeEnergy(is_tdp);
- //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1);
- //l2cache->rt_power = l2cache->rt_power*pppm_t;
- rt_power = rt_power + l2cache->rt_power;
- }
- }
+EXECU ::~EXECU() {
+ if (!exist) return;
+ if (int_bypass) {
+ delete int_bypass;
+ int_bypass = NULL;
+ }
+ if (intTagBypass) {
+ delete intTagBypass;
+ intTagBypass = NULL;
+ }
+ if (int_mul_bypass) {
+ delete int_mul_bypass;
+ int_mul_bypass = NULL;
+ }
+ if (intTag_mul_Bypass) {
+ delete intTag_mul_Bypass;
+ intTag_mul_Bypass = NULL;
+ }
+ if (fp_bypass) {
+ delete fp_bypass;
+ fp_bypass = NULL;
+ }
+ if (fpTagBypass) {
+ delete fpTagBypass;
+ fpTagBypass = NULL;
+ }
+ if (fp_u) {
+ delete fp_u;
+ fp_u = NULL;
+ }
+ if (exeu) {
+ delete exeu;
+ exeu = NULL;
+ }
+ if (mul) {
+ delete mul;
+ mul = NULL;
+ }
+ if (rfu) {
+ delete rfu;
+ rfu = NULL;
+ }
+ if (scheu) {
+ delete scheu;
+ scheu = NULL;
+ }
}
-void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- if (is_tdp)
- {
- cout << "Core:" << endl;
- cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str << "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout<<endl;
- if (ifu->exist)
- {
- cout << indent_str << "Instruction Fetch Unit:" << endl;
- cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- ifu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
- cout << indent_str<< "Renaming Unit:" << endl;
- cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- rnu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
+Core::~Core() {
- }
- if (lsu->exist)
- {
- cout << indent_str<< "Load Store Unit:" << endl;
- cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- lsu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (mmu->exist)
- {
- cout << indent_str<< "Memory Management Unit:" << endl;
- cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- mmu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (exu->exist)
- {
- cout << indent_str<< "Execution Unit:" << endl;
- cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- exu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
-// if (plevel >2)
-// {
-// if (undiffCore->exist)
-// {
-// cout << indent_str << "Undifferentiated Core" << endl;
-// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl;
-// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl;
-//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Subthreshold Leakage = "
-// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl;
-// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl;
-// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl;
-// cout <<endl;
-// }
-// }
- if (XML->sys.Private_L2)
- {
-
- l2cache->displayEnergy(4,is_tdp);
- }
-
- }
- else
- {
-// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
- }
-}
-InstFetchU ::~InstFetchU(){
-
- if (!exist) return;
- if(IB) {delete IB; IB = 0;}
- if(ID_inst) {delete ID_inst; ID_inst = 0;}
- if(ID_operand) {delete ID_operand; ID_operand = 0;}
- if(ID_misc) {delete ID_misc; ID_misc = 0;}
- if (coredynp.predictionW>0)
- {
- if(BTB) {delete BTB; BTB = 0;}
- if(BPT) {delete BPT; BPT = 0;}
- }
+ if (ifu) {
+ delete ifu;
+ ifu = NULL;
+ }
+ if (lsu) {
+ delete lsu;
+ lsu = NULL;
+ }
+ if (rnu) {
+ delete rnu;
+ rnu = NULL;
+ }
+ if (mmu) {
+ delete mmu;
+ mmu = NULL;
+ }
+ if (exu) {
+ delete exu;
+ exu = NULL;
+ }
+ if (corepipe) {
+ delete corepipe;
+ corepipe = NULL;
+ }
+ if (undiffCore) {
+ delete undiffCore;
+ undiffCore = NULL;
+ }
+ if (l2cache) {
+ delete l2cache;
+ l2cache = NULL;
+ }
}
-BranchPredictor ::~BranchPredictor(){
+void Core::initialize_params() {
+ memset(&core_params, 0, sizeof(CoreParameters));
+ core_params.peak_issueW = -1;
+ core_params.peak_commitW = -1;
+}
- if (!exist) return;
- if(globalBPT) {delete globalBPT; globalBPT = 0;}
- if(localBPT) {delete localBPT; localBPT = 0;}
- if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;}
- if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;}
- if(chooser) {delete chooser; chooser = 0;}
- if(RAS) {delete RAS; RAS = 0;}
- }
+void Core::initialize_stats() {
+ memset(&core_stats, 0, sizeof(CoreStatistics));
+ core_stats.IFU_duty_cycle = 1.0;
+ core_stats.ALU_duty_cycle = 1.0;
+ core_stats.FPU_duty_cycle = 1.0;
+ core_stats.MUL_duty_cycle = 1.0;
+ core_stats.ALU_cdb_duty_cycle = 1.0;
+ core_stats.FPU_cdb_duty_cycle = 1.0;
+ core_stats.MUL_cdb_duty_cycle = 1.0;
+ core_stats.pipeline_duty_cycle = 1.0;
+ core_stats.IFU_duty_cycle = 1.0;
+ core_stats.LSU_duty_cycle = 1.0;
+ core_stats.MemManU_D_duty_cycle = 1.0;
+ core_stats.MemManU_I_duty_cycle = 1.0;
+}
-RENAMINGU ::~RENAMINGU(){
-
- if (!exist) return;
- if(iFRAT ) {delete iFRAT; iFRAT = 0;}
- if(fFRAT ) {delete fFRAT; fFRAT =0;}
- if(iRRAT) {delete iRRAT; iRRAT = 0;}
- if(iFRAT) {delete iFRAT; iFRAT = 0;}
- if(ifreeL) {delete ifreeL;ifreeL= 0;}
- if(ffreeL) {delete ffreeL;ffreeL= 0;}
- if(idcl) {delete idcl; idcl = 0;}
- if(fdcl) {delete fdcl; fdcl = 0;}
- if(RAHT) {delete RAHT; RAHT = 0;}
+void Core::set_core_param() {
+ initialize_params();
+ initialize_stats();
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_STR_IF("name", name);
+ ASSIGN_INT_IF("opt_local", core_params.opt_local);
+ ASSIGN_FP_IF("clock_rate", core_params.clockRate);
+ ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
+ ASSIGN_INT_IF("opcode_width", core_params.opcode_width);
+ ASSIGN_INT_IF("x86", core_params.x86);
+ ASSIGN_INT_IF("Embedded", core_params.Embedded);
+ ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type);
+ ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length);
+ ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads);
+ ASSIGN_INT_IF("fetch_width", core_params.fetchW);
+ ASSIGN_INT_IF("decode_width", core_params.decodeW);
+ ASSIGN_INT_IF("issue_width", core_params.issueW);
+ ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW);
+ ASSIGN_INT_IF("commit_width", core_params.commitW);
+ ASSIGN_INT_IF("prediction_width", core_params.predictionW);
+ ASSIGN_INT_IF("ALU_per_core", core_params.num_alus);
+ ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus);
+ ASSIGN_INT_IF("MUL_per_core", core_params.num_muls);
+ ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW);
+ ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty,
+ Scheduler_type);
+ ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type);
+ ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size);
+ ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size);
+ ASSIGN_INT_IF("ROB_size", core_params.ROB_size);
+ ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc);
+ ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks);
+ ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width);
+ ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc);
+ ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks);
+ ASSIGN_INT_IF("register_window_size",
+ core_params.register_window_size);
+ ASSIGN_INT_IF("register_window_throughput",
+ core_params.register_window_throughput);
+ ASSIGN_INT_IF("register_window_latency",
+ core_params.register_window_latency);
+ ASSIGN_INT_IF("register_window_assoc",
+ core_params.register_window_assoc);
+ ASSIGN_INT_IF("register_window_nbanks",
+ core_params.register_window_nbanks);
+ ASSIGN_INT_IF("register_window_tag_width",
+ core_params.register_window_tag_width);
+ ASSIGN_INT_IF("register_window_rw_ports",
+ core_params.register_window_rw_ports);
+ ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size);
+ ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc);
+ ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks);
+ ASSIGN_INT_IF("phy_Regs_IRF_tag_width",
+ core_params.phy_Regs_IRF_tag_width);
+ ASSIGN_INT_IF("phy_Regs_IRF_rd_ports",
+ core_params.phy_Regs_IRF_rd_ports);
+ ASSIGN_INT_IF("phy_Regs_IRF_wr_ports",
+ core_params.phy_Regs_IRF_wr_ports);
+ ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size);
+ ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc);
+ ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks);
+ ASSIGN_INT_IF("phy_Regs_FRF_tag_width",
+ core_params.phy_Regs_FRF_tag_width);
+ ASSIGN_INT_IF("phy_Regs_FRF_rd_ports",
+ core_params.phy_Regs_FRF_rd_ports);
+ ASSIGN_INT_IF("phy_Regs_FRF_wr_ports",
+ core_params.phy_Regs_FRF_wr_ports);
+ ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks);
+ ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports);
+ ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks);
+ ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports);
+ ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks);
+ ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports);
+ ASSIGN_INT_IF("memory_ports", core_params.memory_ports);
+ ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size);
+ ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc);
+ ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks);
+ ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size);
+ ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc);
+ ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks);
+ ASSIGN_INT_IF("instruction_window_size",
+ core_params.instruction_window_size);
+ ASSIGN_INT_IF("fp_instruction_window_size",
+ core_params.fp_instruction_window_size);
+ ASSIGN_INT_IF("instruction_buffer_size",
+ core_params.instruction_buffer_size);
+ ASSIGN_INT_IF("instruction_buffer_assoc",
+ core_params.instruction_buffer_assoc);
+ ASSIGN_INT_IF("instruction_buffer_nbanks",
+ core_params.instruction_buffer_nbanks);
+ ASSIGN_INT_IF("instruction_buffer_tag_width",
+ core_params.instruction_buffer_tag_width);
+ ASSIGN_INT_IF("number_instruction_fetch_ports",
+ core_params.number_instruction_fetch_ports);
+ ASSIGN_INT_IF("RAS_size", core_params.RAS_size);
+ ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt,
+ Wire_type);
+ ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type);
+ ASSIGN_INT_IF("execu_int_bypass_ports",
+ core_params.execu_int_bypass_ports);
+ ASSIGN_INT_IF("execu_mul_bypass_ports",
+ core_params.execu_mul_bypass_ports);
+ ASSIGN_INT_IF("execu_fp_bypass_ports",
+ core_params.execu_fp_bypass_ports);
+ ASSIGN_ENUM_IF("execu_bypass_wire_type",
+ core_params.execu_bypass_wire_type, Wire_type);
+ ASSIGN_FP_IF("execu_bypass_base_width",
+ core_params.execu_bypass_base_width);
+ ASSIGN_FP_IF("execu_bypass_base_height",
+ core_params.execu_bypass_base_height);
+ ASSIGN_INT_IF("execu_bypass_start_wiring_level",
+ core_params.execu_bypass_start_wiring_level);
+ ASSIGN_FP_IF("execu_bypass_route_over_perc",
+ core_params.execu_bypass_route_over_perc);
+ ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator);
+ ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages);
+ ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages);
+ ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines);
+ ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines);
+ ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint);
+ ASSIGN_INT_IF("perThreadState", core_params.perThreadState);
+ ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
+
+ else {
+ warnUnrecognizedParam(node_name);
}
+ }
-LoadStoreU ::~LoadStoreU(){
-
- if (!exist) return;
- if(LSQ) {delete LSQ; LSQ = 0;}
+ // Change from MHz to Hz
+ core_params.clockRate *= 1e6;
+ clockRate = core_params.clockRate;
+
+ core_params.peak_commitW = core_params.peak_issueW;
+ core_params.fp_decodeW = core_params.fp_issueW;
+
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle);
+ ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle);
+ ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle);
+ ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle);
+ ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle);
+ ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle);
+ ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle);
+ ASSIGN_FP_IF("total_cycles", core_stats.total_cycles);
+ ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles);
+ ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles);
+ ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle);
+ ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle);
+ ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle);
+ ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle);
+ ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle);
+ ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses);
+ ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses);
+ ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses);
+ ASSIGN_FP_IF("function_calls", core_stats.function_calls);
+ ASSIGN_FP_IF("total_instructions", core_stats.total_instructions);
+ ASSIGN_FP_IF("int_instructions", core_stats.int_instructions);
+ ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions);
+ ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions);
+ ASSIGN_FP_IF("branch_mispredictions",
+ core_stats.branch_mispredictions);
+ ASSIGN_FP_IF("load_instructions", core_stats.load_instructions);
+ ASSIGN_FP_IF("store_instructions", core_stats.store_instructions);
+ ASSIGN_FP_IF("committed_instructions",
+ core_stats.committed_instructions);
+ ASSIGN_FP_IF("committed_int_instructions",
+ core_stats.committed_int_instructions);
+ ASSIGN_FP_IF("committed_fp_instructions",
+ core_stats.committed_fp_instructions);
+ ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads);
+ ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes);
+ ASSIGN_FP_IF("rename_reads", core_stats.rename_reads);
+ ASSIGN_FP_IF("rename_writes", core_stats.rename_writes);
+ ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads);
+ ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes);
+ ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads);
+ ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes);
+ ASSIGN_FP_IF("inst_window_wakeup_accesses",
+ core_stats.inst_window_wakeup_accesses);
+ ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads);
+ ASSIGN_FP_IF("fp_inst_window_writes",
+ core_stats.fp_inst_window_writes);
+ ASSIGN_FP_IF("fp_inst_window_wakeup_accesses",
+ core_stats.fp_inst_window_wakeup_accesses);
+ ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads);
+ ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads);
+ ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes);
+ ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes);
+ ASSIGN_FP_IF("context_switches", core_stats.context_switches);
+ ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses);
+ ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses);
+ ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses);
+
+ else {
+ warnUnrecognizedStat(node_name);
}
+ }
-MemManU ::~MemManU(){
+ // Initialize a few variables
+ core_params.multithreaded = core_params.num_hthreads > 1 ? true : false;
+ core_params.pc_width = virtual_address_width;
+ core_params.v_address_width = virtual_address_width;
+ core_params.p_address_width = physical_address_width;
+ core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32;
+ core_params.fp_data_width = core_params.int_data_width;
+ core_params.arch_ireg_width =
+ int(ceil(log2(core_params.archi_Regs_IRF_size)));
+ core_params.arch_freg_width
+ = int(ceil(log2(core_params.archi_Regs_FRF_size)));
+ core_params.num_IRF_entry = core_params.archi_Regs_IRF_size;
+ core_params.num_FRF_entry = core_params.archi_Regs_FRF_size;
+
+ if (core_params.instruction_length <= 0) {
+ errorNonPositiveParam("instruction_length");
+ }
- if (!exist) return;
- if(itlb) {delete itlb; itlb = 0;}
- if(dtlb) {delete dtlb; dtlb = 0;}
- }
+ if (core_params.num_hthreads <= 0) {
+ errorNonPositiveParam("number_hardware_threads");
+ }
-RegFU ::~RegFU(){
+ if (core_params.opcode_width <= 0) {
+ errorNonPositiveParam("opcode_width");
+ }
- if (!exist) return;
- if(IRF) {delete IRF; IRF = 0;}
- if(FRF) {delete FRF; FRF = 0;}
- if(RFWIN) {delete RFWIN; RFWIN = 0;}
- }
+ if (core_params.instruction_buffer_size <= 0) {
+ errorNonPositiveParam("instruction_buffer_size");
+ }
-SchedulerU ::~SchedulerU(){
+ if (core_params.number_instruction_fetch_ports <= 0) {
+ errorNonPositiveParam("number_instruction_fetch_ports");
+ }
- if (!exist) return;
- if(int_inst_window) {delete int_inst_window; int_inst_window = 0;}
- if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;}
- if(ROB) {delete ROB; ROB = 0;}
- if(instruction_selection) {delete instruction_selection;instruction_selection = 0;}
- }
+ if (core_params.peak_issueW <= 0) {
+ errorNonPositiveParam("peak_issue_width");
+ } else {
+ assert(core_params.peak_commitW > 0);
+ }
-EXECU ::~EXECU(){
-
- if (!exist) return;
- if(int_bypass) {delete int_bypass; int_bypass = 0;}
- if(intTagBypass) {delete intTagBypass; intTagBypass =0;}
- if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;}
- if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;}
- if(fp_bypass) {delete fp_bypass;fp_bypass = 0;}
- if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;}
- if(fp_u) {delete fp_u;fp_u = 0;}
- if(exeu) {delete exeu;exeu = 0;}
- if(mul) {delete mul;mul = 0;}
- if(rfu) {delete rfu;rfu = 0;}
- if(scheu) {delete scheu; scheu = 0;}
+ if (core_params.core_ty == OOO) {
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ core_params.phy_ireg_width =
+ int(ceil(log2(core_params.phy_Regs_IRF_size)));
+ core_params.phy_freg_width =
+ int(ceil(log2(core_params.phy_Regs_FRF_size)));
+ core_params.num_ifreelist_entries =
+ core_params.num_IRF_entry = core_params.phy_Regs_IRF_size;
+ core_params.num_ffreelist_entries =
+ core_params.num_FRF_entry = core_params.phy_Regs_FRF_size;
+ } else if (core_params.scheu_ty == ReservationStation) {
+ core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size)));
+ core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size)));
+ core_params.num_ifreelist_entries = core_params.ROB_size;
+ core_params.num_ffreelist_entries = core_params.ROB_size;
}
+ }
-Core ::~Core(){
+ core_params.regWindowing =
+ (core_params.register_window_size > 0 &&
+ core_params.core_ty == Inorder) ? true : false;
- if(ifu) {delete ifu; ifu = 0;}
- if(lsu) {delete lsu; lsu = 0;}
- if(rnu) {delete rnu; rnu = 0;}
- if(mmu) {delete mmu; mmu = 0;}
- if(exu) {delete exu; exu = 0;}
- if(corepipe) {delete corepipe; corepipe = 0;}
- if(undiffCore) {delete undiffCore;undiffCore = 0;}
- if(l2cache) {delete l2cache;l2cache = 0;}
+ if (core_params.regWindowing) {
+ if (core_params.register_window_throughput <= 0) {
+ errorNonPositiveParam("register_window_throughput");
+ } else if (core_params.register_window_latency <= 0) {
+ errorNonPositiveParam("register_window_latency");
}
+ }
-void Core::set_core_param()
-{
- coredynp.opt_local = XML->sys.core[ithCore].opt_local;
- coredynp.x86 = XML->sys.core[ithCore].x86;
- coredynp.Embedded = XML->sys.Embedded;
- coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type;
- coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme;
- coredynp.fetchW = XML->sys.core[ithCore].fetch_width;
- coredynp.decodeW = XML->sys.core[ithCore].decode_width;
- coredynp.issueW = XML->sys.core[ithCore].issue_width;
- coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width;
- coredynp.commitW = XML->sys.core[ithCore].commit_width;
- coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width;
- coredynp.predictionW = XML->sys.core[ithCore].prediction_width;
- coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width;
- coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width;
- coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core;
- coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core;
- coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core;
-
-
- coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads;
- coredynp.multithreaded = coredynp.num_hthreads>1? true:false;
- coredynp.instruction_length = XML->sys.core[ithCore].instruction_length;
- coredynp.pc_width = XML->sys.virtual_address_width;
-
- coredynp.opcode_length = XML->sys.core[ithCore].opcode_width;
- coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width;
- coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0];
- coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0];
- coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1];
- coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1];
- coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32;
- coredynp.fp_data_width = coredynp.int_data_width;
- coredynp.v_address_width = XML->sys.virtual_address_width;
- coredynp.p_address_width = XML->sys.physical_address_width;
-
- coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme;
- coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size)));
- coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size)));
- coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size;
- coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size;
- coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle;
- coredynp.total_cycles = XML->sys.core[ithCore].total_cycles;
- coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles;
- coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles;
-
- //Max power duty cycle for peak power estimation
-// if (coredynp.core_ty==OOO)
-// {
-// coredynp.IFU_duty_cycle = 1;
-// coredynp.LSU_duty_cycle = 1;
-// coredynp.MemManU_I_duty_cycle =1;
-// coredynp.MemManU_D_duty_cycle =1;
-// coredynp.ALU_duty_cycle =1;
-// coredynp.MUL_duty_cycle =1;
-// coredynp.FPU_duty_cycle =1;
-// coredynp.ALU_cdb_duty_cycle =1;
-// coredynp.MUL_cdb_duty_cycle =1;
-// coredynp.FPU_cdb_duty_cycle =1;
-// }
-// else
-// {
- coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle;
- coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle;
- coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle;
- coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle;
- coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle;
- coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle;
- coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle;
- coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle;
- coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle;
- coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle;
- coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle;
-// }
-
-
- if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder)))
- {
- cout<<"Invalid Core Type"<<endl;
- exit(0);
- }
-// if (coredynp.core_ty==OOO)
-// {
-// cout<<"OOO processor models are being updated and will be available in next release"<<endl;
-// exit(0);
-// }
- if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation)))
- {
- cout<<"Invalid OOO Scheduler Type"<<endl;
- exit(0);
- }
+ set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads,
+ core_params.num_hthreads, 0);
- if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased)))
- {
- cout<<"Invalid OOO Renaming Type"<<endl;
- exit(0);
- }
+ if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) {
+ cout << "Invalid Core Type" << endl;
+ exit(0);
+ }
-if (coredynp.core_ty==OOO)
-{
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size)));
- coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size)));
- coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size;
- coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size;
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {//ROB serves as Phy RF in RS based OOO
- coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
- coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
- coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size;
- coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size;
+ if (!((core_params.scheu_ty == PhysicalRegFile) ||
+ (core_params.scheu_ty == ReservationStation))) {
+ cout << "Invalid OOO Scheduler Type" << endl;
+ exit(0);
+ }
- }
+ if (!((core_params.rm_ty == RAMbased) ||
+ (core_params.rm_ty == CAMbased))) {
+ cout << "Invalid OOO Renaming Type" << endl;
+ exit(0);
+ }
}
- coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference.
- coredynp.perThreadState = 8;
- coredynp.instruction_length = 32;
- coredynp.clockRate = XML->sys.core[ithCore].clock_rate;
- coredynp.clockRate *= 1e6;
- coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false;
- coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate;
- set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0);
-}