summaryrefslogtreecommitdiff
path: root/ext/mcpat/core.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mcpat/core.cc')
-rw-r--r--ext/mcpat/core.cc7640
1 files changed, 3757 insertions, 3883 deletions
diff --git a/ext/mcpat/core.cc b/ext/mcpat/core.cc
index ba9106061..b25c23cac 100644
--- a/ext/mcpat/core.cc
+++ b/ext/mcpat/core.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -33,491 +34,570 @@
#include <cassert>
#include <cmath>
#include <iostream>
+#include <sstream>
#include <string>
-#include "XML_Parse.h"
#include "basic_circuit.h"
+#include "basic_components.h"
+#include "common.h"
#include "const.h"
#include "core.h"
#include "io.h"
#include "parameter.h"
-//#include "globalvar.h"
-
-InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IB (0),
- BTB (0),
- ID_inst (0),
- ID_operand (0),
- ID_misc (0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false, is_default = true;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
- //Assuming all L1 caches are virtually idxed physically tagged.
- //cache
-
- size = (int)XML->sys.core[ithCore].icache.icache_config[0];
- line = (int)XML->sys.core[ithCore].icache.icache_config[1];
- assoc = (int)XML->sys.core[ithCore].icache.icache_config[2];
- banks = (int)XML->sys.core[ithCore].icache.icache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- // interface_ip.obj_func_dyn_energy = 0;
- // interface_ip.obj_func_dyn_power = 0;
- // interface_ip.obj_func_leak_power = 0;
- // interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
- scktRatio = g_tp.sckt_co_eff;
- chip_PR_overhead = g_tp.chip_layout_overhead;
- macro_PR_overhead = g_tp.macro_layout_overhead;
- icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
- area.set_area(area.get_area()+ icache.caches->local_result.area);
- //output_data_csv(icache.caches.local_result);
-
-
- /*
- *iCache controllers
- *miss buffer Each MSHR contains enough state
- *to handle one or more accesses of any type to a single memory line.
- *Due to the generality of the MSHR mechanism,
- *the amount of state involved is non-trivial:
- *including the address, pointers to the cache entry and destination register,
- *written data, and various other pieces of state.
- */
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
- area.set_area(area.get_area()+ icache.missb->local_result.area);
- //output_data_csv(icache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = icache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
- area.set_area(area.get_area()+ icache.ifb->local_result.area);
- //output_data_csv(icache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
- //output_data_csv(icache.prefetchb.local_result);
-
- //Instruction buffer
- data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- interface_ip.pure_cam = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
- XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
- area.set_area(area.get_area()+ IB->local_result.area);
- //output_data_csv(IB.IB.local_result);
-
- // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
- // inst_decoder.init_decoder(is_default, &interface_ip);
- // inst_decoder.full_decoder_power();
-
- if (coredynp.predictionW>0)
- {
- /*
- * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
- * It is only a cache without all the buffers in the cache controller since it is more like a
- * look up table than a cache with cache controller. When access miss, no load from other places
- * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
- * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
- * 2) When BPT@ID stage predicts differently than BTB
- * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
- * 4) when EXEU find out wrong target has been provided from BTB.
- *
- */
- size = XML->sys.core[ithCore].BTB.BTB_config[0];
- line = XML->sys.core[ithCore].BTB.BTB_config[1];
- assoc = XML->sys.core[ithCore].BTB.BTB_config[2];
- banks = XML->sys.core[ithCore].BTB.BTB_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
-// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- interface_ip.pure_cam = false;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:size;
- interface_ip.line_sz = debug?64:line;
- interface_ip.assoc = debug?8:assoc;
- interface_ip.nbanks = debug?1:banks;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
- area.set_area(area.get_area()+ BTB->local_result.area);
- ///cout<<"area="<<area<<endl;
-
- BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp);
- area.set_area(area.get_area()+ BPT->area.get_area());
- }
-
- ID_inst = new inst_decoder(is_default, &interface_ip,
- coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_operand = new inst_decoder(is_default, &interface_ip,
- coredynp.arch_ireg_width, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_misc = new inst_decoder(is_default, &interface_ip,
- 8/* Prefix field etc upto 14B*/, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
- //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
- //So the dynamic power should be multiplied by a few times.
- area.set_area(area.get_area()+ (ID_inst->area.get_area()
- +ID_operand->area.get_area()
- +ID_misc->area.get_area())*coredynp.decodeW);
-}
+int RegFU::RFWIN_ACCESS_MULTIPLIER = 16;
+
+// The five bits are: busy, Issued, Finished, speculative, valid
+int SchedulerU::ROB_STATUS_BITS = 5;
+
+InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL),
+ BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int idx, tag, data, size, line, assoc, banks;
+ bool is_default = true;
+
+ clockRate = core_params.clockRate;
+ name = "Instruction Fetch Unit";
+ // Check if there is an icache child:
+ int i;
+ icache = NULL;
+ for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR name = childXML->getAttribute("name");
+ if (strcmp(name, "Instruction Cache") == 0 ||
+ strcmp(name, "icache") == 0) {
+ icache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(icache);
+ }
+ }
+ }
+ set_params_stats();
-BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- globalBPT(0),
- localBPT(0),
- L1_localBPT(0),
- L2_localBPT(0),
- chooser(0),
- RAS(0),
- exist(exist_)
-{
+ //Instruction buffer
+ data = core_params.instruction_length * core_params.peak_issueW;
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = core_params.num_hthreads * core_params.instruction_buffer_size *
+ line;
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.instruction_buffer_assoc;
+ interface_ip.nbanks = core_params.instruction_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0;
+ interface_ip.tag_w = core_params.instruction_buffer_tag_width;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports =
+ core_params.number_instruction_fetch_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+
+ IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ IB->area.set_area(IB->area.get_area() + IB->local_result.area);
+ area.set_area(area.get_area() + IB->local_result.area);
+
+ if (core_params.predictionW > 0) {
/*
- * Branch Predictor, accessed during ID stage.
- * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
- * including global predictor, local two level predictor, and Chooser.
- * The Branch predictor also includes a RAS (return address stack) for function calls
- * Branch predictors are tagged by thread ID and modeled as 1-way associative $
- * However RAS return address stacks are duplicated for each thread.
- * TODO:Data Width need to be computed more precisely *
+ * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
+ * It is only a cache without all the buffers in the cache controller since it is more like a
+ * look up table than a cache with cache controller. When access miss, no load from other places
+ * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
+ * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
+ * 2) When BPT@ID stage predicts differently than BTB
+ * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
+ * 4) when EXEU find out wrong target has been provided from BTB.
+ *
*/
- if (!exist) return;
- int tag, data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.assoc = 1;
- interface_ip.pure_cam = false;
- if (coredynp.multithreaded)
- {
-
- tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
-
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- }
- else
- {
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
-
- }
- //Global predictor
- data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ size = inst_fetch_params.btb_size;
+ line = inst_fetch_params.btb_block_size;
+ assoc = inst_fetch_params.btb_assoc;
+ banks = inst_fetch_params.btb_num_banks;
+ idx = int(ceil(log2(size / line / assoc)));
+ tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads)))
+ + EXTRA_TAG_BITS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = assoc;
+ interface_ip.nbanks = banks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 1;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
- globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
- area.set_area(area.get_area()+ globalBPT->local_result.area);
-
- //Local BPT (Level 1)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
- area.set_area(area.get_area()+ L1_localBPT->local_result.area);
-
- //Local BPT (Level 2)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
- area.set_area(area.get_area()+ L2_localBPT->local_result.area);
-
- //Chooser
- data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
- chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
- area.set_area(area.get_area()+ chooser->local_result.area);
-
- //RAS return address stacks are Duplicated for each thread.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- data = int(ceil(coredynp.pc_width/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate;
+ interface_ip.latency = inst_fetch_params.btb_latency / clockRate;
+
+ BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + BTB->local_result.area);
+
+ BPT = new BranchPredictor(xml_data, &interface_ip,
+ core_params, core_stats);
+ area.set_area(area.get_area() + BPT->area.get_area());
+ }
+
+ ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder",
+ is_default, &interface_ip,
+ core_params.opcode_width,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+
+ ID_operand = new InstructionDecoder(xml_data,
+ "Instruction Operand Decoder",
+ is_default, &interface_ip,
+ core_params.arch_ireg_width,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+
+ ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder",
+ is_default, &interface_ip,
+ core_params.micro_opcode_length,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+ area.set_area(area.get_area()+ (ID_inst->area.get_area()
+ + ID_operand->area.get_area()
+ + ID_misc->area.get_area())
+ * core_params.decodeW);
+}
+
+void
+InstFetchU::set_params_stats() {
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ memset(&inst_fetch_params,0,sizeof(InstFetchParameters));
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "BranchTargetBuffer") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("size", inst_fetch_params.btb_size);
+ ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size);
+ ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc);
+ ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks);
+ ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency);
+ ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput);
+ ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("read_accesses",
+ inst_fetch_stats.btb_read_accesses);
+ ASSIGN_FP_IF("write_accesses",
+ inst_fetch_stats.btb_write_accesses);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ }
+ }
+
+ // Parameter sanity check
+ if (inst_fetch_params.btb_size <= 0) {
+ errorNonPositiveParam("size");
+ }
+
+ if (inst_fetch_params.btb_block_size <= 0) {
+ errorNonPositiveParam("block_size");
+ }
+
+ if (inst_fetch_params.btb_assoc <= 0) {
+ errorNonPositiveParam("assoc");
+ }
+
+ if (inst_fetch_params.btb_num_banks <= 0) {
+ errorNonPositiveParam("num_banks");
+ }
+}
+
+BranchPredictor::BranchPredictor(XMLNode* _xml_data,
+ InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_)
+ : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL),
+ L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int size;
+
+ clockRate = core_params.clockRate;
+ name = "Branch Predictor";
+
+ // Common interface parameters for the branch predictor structures
+ interface_ip.pure_cam = false;
+
+ if (core_params.multithreaded) {
+ tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS);
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ } else {
+ interface_ip.specific_tag = 0;
+ interface_ip.tag_w = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ }
+
+ // Parse params and stats from XML
+ set_params_stats();
+
+ // Common interface parameters for the branch predictor structures
+ interface_ip.assoc = branch_pred_params.assoc;
+ interface_ip.nbanks = branch_pred_params.nbanks;
+
+ //Global predictor
+ data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE));
+ size = data * branch_pred_params.global_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + globalBPT->local_result.area);
+
+ //Local BPT (Level 1)
+ data = int(ceil(branch_pred_params.local_l1_predictor_size /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.local_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ L1_localBPT = new ArrayST(xml_data, &interface_ip,
+ "Local Predictor, Level 1",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ L1_localBPT->area.set_area(L1_localBPT->area.get_area() +
+ L1_localBPT->local_result.area);
+ area.set_area(area.get_area()+ L1_localBPT->local_result.area);
+
+ //Local BPT (Level 2)
+ data = int(ceil(branch_pred_params.local_l2_predictor_size /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.local_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ L2_localBPT = new ArrayST(xml_data, &interface_ip,
+ "Local Predictor, Level 2",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + L2_localBPT->local_result.area);
+
+ //Chooser
+ data = int(ceil(branch_pred_params.chooser_predictor_bits /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.chooser_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + chooser->local_result.area);
+
+ //RAS return address stacks are Duplicated for each thread.
+ data = int(ceil(core_params.pc_width / BITS_PER_BYTE));
+ size = data * core_params.RAS_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate,
+ core_params.opt_local, core_params.core_ty);
+ RAS->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + RAS->local_result.area *
+ core_params.num_hthreads);
+
+}
+
+void
+BranchPredictor::set_params_stats() {
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "BranchPredictor") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("assoc", branch_pred_params.assoc);
+ ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks);
+ ASSIGN_INT_IF("local_l1_predictor_size",
+ branch_pred_params.local_l1_predictor_size);
+ ASSIGN_INT_IF("local_l2_predictor_size",
+ branch_pred_params.local_l2_predictor_size);
+ ASSIGN_INT_IF("local_predictor_entries",
+ branch_pred_params.local_predictor_entries);
+ ASSIGN_INT_IF("global_predictor_entries",
+ branch_pred_params.global_predictor_entries);
+ ASSIGN_INT_IF("global_predictor_bits",
+ branch_pred_params.global_predictor_bits);
+ ASSIGN_INT_IF("chooser_predictor_entries",
+ branch_pred_params.chooser_predictor_entries);
+ ASSIGN_INT_IF("chooser_predictor_bits",
+ branch_pred_params.chooser_predictor_bits);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ // The core reads in the number of branches and the number of
+ // function calls and these values are passed through the
+ // core_stats variable, so we don't need to read them in here
+ }
+ }
+}
+
+SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), int_inst_window(NULL),
+ fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL),
+ fp_instruction_selection(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int size;
+ int line;
+ bool is_default = true;
+ string tmp_name;
+
+ clockRate = core_params.clockRate;
+ name = "Instruction Scheduler";
+ if ((core_params.core_ty == Inorder && core_params.multithreaded)) {
+ //Instruction issue queue, in-order multi-issue or multithreaded
+ //processor also has this structure. Unified window for Inorder
+ //processors
+ //This tag width is the normal thread state bits based on
+ //Niagara Design
+ tag = int(log2(core_params.num_hthreads) * core_params.perThreadState);
+ data = core_params.instruction_length;
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = core_params.instruction_window_size * line;
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ //NOTE: x86 inst can be very lengthy, up to 15B.
+ //Source: Intel® 64 and IA-32 Architectures
+ //Software Developer’s Manual
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_issueW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
interface_ip.num_se_rd_ports = 0;
- RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
- RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
- area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
+ interface_ip.num_search_ports = core_params.peak_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ int_inst_window = new ArrayST(xml_data, &interface_ip,
+ "InstFetchQueue", Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ int_inst_window->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + int_inst_window->local_result.area *
+ core_params.num_pipelines);
+ Iw_height = int_inst_window->local_result.cache_ht;
-}
+ /*
+ * selection logic
+ * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
+ * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
+ * at the issue stage.
+ */
-SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- int_inst_window(0),
- fp_inst_window(0),
- ROB(0),
- instruction_selection(0),
- exist(exist_)
- {
- if (!exist) return;
- int tag, data;
- bool is_default=true;
- string tmp_name;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
- {
- //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
- tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
- data = XML->sys.core[ithCore].instruction_length;
- //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
- //Software Developer’s Manual
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- //output_data_csv(iRS.RS.local_result);
- Iw_height =int_inst_window->local_result.cache_ht;
-
- /*
- * selection logic
- * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
- * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
- * at the issue stage.
- */
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
- &interface_ip, Core_device, coredynp.core_ty);
+ int_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.instruction_window_size,
+ core_params.peak_issueW *
+ core_params.num_hthreads,
+ &interface_ip,
+ "Int Instruction Selection Logic",
+ core_stats.inst_window_wakeup_accesses,
+ clockRate, Core_device, core_params.core_ty);
+
+ if (core_params.fp_instruction_window_size > 0) {
+ fp_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.fp_instruction_window_size,
+ core_params.fp_issueW *
+ core_params.num_hthreads,
+ &interface_ip,
+ "FP Instruction Selection Logic",
+ core_stats.fp_inst_window_wakeup_accesses,
+ clockRate, Core_device,
+ core_params.core_ty);
}
+ }
- if (coredynp.core_ty==OOO)
- {
+ if (core_params.core_ty == OOO) {
/*
* CAM based instruction window
* For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
@@ -525,3611 +605,3405 @@ SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* in
* It is written once and read twice(two operands) before an instruction can be issued.
* X86 instruction can be very long up to 15B. add instruction length in XML
*/
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
- tmp_name = "InstIssueQueue";
- }
- else
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
- 2*coredynp.int_data_width)/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
-
- tmp_name = "IntReservationStation";
- }
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 2*1.0/clockRate;
- interface_ip.latency = 2*1.0/clockRate;
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ tag = core_params.phy_ireg_width;
+ data = int((ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_ireg_width -
+ core_params.arch_ireg_width)) /
+ (double)NUM_SOURCE_OPERANDS) /
+ BITS_PER_BYTE));
+ tmp_name = "Integer Instruction Window";
+ } else {
+ tag = core_params.phy_ireg_width;
+ data = int(ceil(((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_ireg_width -
+ core_params.arch_ireg_width) +
+ 2 * core_params.int_data_width) /
+ (double)NUM_SOURCE_OPERANDS) /
+ BITS_PER_BYTE));
+ tmp_name = "Integer Reservation Station";
+ }
+
+ size = data * core_params.instruction_window_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_issueW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.peak_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
+ interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
+ int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name,
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ int_inst_window->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + int_inst_window->local_result.area *
+ core_params.num_pipelines);
+ Iw_height = int_inst_window->local_result.cache_ht;
+
+ //FU inst window
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width;
+ data = int(ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_freg_width -
+ core_params.arch_freg_width)) / BITS_PER_BYTE));
+ tmp_name = "FP Instruction Window";
+ } else {
+ tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width;
+ data = int(ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_freg_width -
+ core_params.arch_freg_width) +
+ NUM_SOURCE_OPERANDS * core_params.fp_data_width) /
+ BITS_PER_BYTE));
+ tmp_name = "FP Reservation Station";
+ }
+
+ size = data * core_params.fp_instruction_window_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- Iw_height =int_inst_window->local_result.cache_ht;
- //FU inst window
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
- tmp_name = "FPIssueQueue";
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.fp_issueW;
+ interface_ip.num_wr_ports = core_params.fp_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.fp_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fp_inst_window =
+ new ArrayST(xml_data, &interface_ip, tmp_name, Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ fp_inst_window->output_data.area *= core_params.num_fp_pipelines;
+ area.set_area(area.get_area() + fp_inst_window->local_result.area
+ *core_params.num_fp_pipelines);
+ fp_Iw_height = fp_inst_window->local_result.cache_ht;
+
+ if (core_params.ROB_size > 0) {
+ /*
+ * if ROB_size = 0, then the target processor does not support hardware-based
+ * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
+ * means branch must be resolved before instruction issued into instruction window, since
+ * there is no change to flush miss-predict branch path after instructions are issued in this situation.
+ *
+ * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
+ * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
+ * However, this approach is abandoned due to its high power and poor scalablility.
+ * McPAT uses current implementation of ROB as circular buffer.
+ * ROB is written once when instruction is issued and read once when the instruction is committed. *
+ */
+ int robExtra = int(ceil(ROB_STATUS_BITS +
+ log2(core_params.num_hthreads)));
+
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ //PC is to id the instruction for recover exception.
+ //inst is used to map the renamed dest. registers. so that
+ //commit stage can know which reg/RRAT to update
+ data = int(ceil((robExtra + core_params.pc_width +
+ core_params.phy_ireg_width) / BITS_PER_BYTE));
+ } else {
+ //in RS based OOO, ROB also contains value of destination reg
+ data = int(ceil((robExtra + core_params.pc_width +
+ core_params.phy_ireg_width +
+ core_params.fp_data_width) / BITS_PER_BYTE));
+ }
+
+ interface_ip.cache_sz = data * core_params.ROB_size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.ROB_assoc;
+ interface_ip.nbanks = core_params.ROB_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.ROB_tag_width > 0;
+ interface_ip.tag_w = core_params.ROB_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_commitW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ROB->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + ROB->local_result.area *
+ core_params.num_pipelines);
+ ROB_height = ROB->local_result.cache_ht;
+ }
+
+ int_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.instruction_window_size,
+ core_params.peak_issueW, &interface_ip,
+ "Int Instruction Selection Logic",
+ core_stats.inst_window_wakeup_accesses,
+ clockRate, Core_device, core_params.core_ty);
+
+ if (core_params.fp_instruction_window_size > 0) {
+ fp_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.fp_instruction_window_size,
+ core_params.fp_issueW, &interface_ip,
+ "FP Instruction Selection Logic",
+ core_stats.fp_inst_window_wakeup_accesses,
+ clockRate, Core_device,
+ core_params.core_ty);
}
- else
- {
- tag = 2*coredynp.phy_ireg_width;
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
- 2*coredynp.fp_data_width)/8.0));
- tmp_name = "FPReservationStation";
+
+ }
+}
+
+LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int line;
+ int size;
+ int ldst_opcode = core_params.opcode_width;
+
+ clockRate = core_params.clockRate;
+ name = "Load/Store Unit";
+
+ // Check if there is a dcache child:
+ int i;
+ dcache = NULL;
+ for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR name = childXML->getAttribute("name");
+ if (strcmp(name, "Data Cache") == 0 ||
+ strcmp(name, "dcache") == 0) {
+ dcache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(dcache);
+ }
}
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ }
+
+ /*
+ * LSU--in-order processors do not have separate load queue: unified lsq
+ * partitioned among threads
+ * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
+ */
+ tag = ldst_opcode + virtual_address_width +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ line = int(ceil(data_path_width / BITS_PER_BYTE));
+ size = core_params.store_buffer_size * line * core_params.num_hthreads;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.store_buffer_assoc;
+ interface_ip.nbanks = core_params.store_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + LSQ->local_result.area);
+ area.set_area(area.get_area()*cdb_overhead);
+ lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead);
+
+ if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) {
+ tag = ldst_opcode + virtual_address_width +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ line = int(ceil(data_path_width / BITS_PER_BYTE));
+ size = core_params.load_buffer_size * line * core_params.num_hthreads;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.load_buffer_assoc;
+ interface_ip.nbanks = core_params.load_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_issueW;
- interface_ip.num_wr_ports = coredynp.fp_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.fp_issueW;
- fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- fp_Iw_height =fp_inst_window->local_result.cache_ht;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- /*
- * if ROB_size = 0, then the target processor does not support hardware-based
- * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
- * means branch must be resolved before instruction issued into instruction window, since
- * there is no change to flush miss-predict branch path after instructions are issued in this situation.
- *
- * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
- * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
- * However, this approach is abandoned due to its high power and poor scalablility.
- * McPAT uses current implementation of ROB as circular buffer.
- * ROB is written once when instruction is issued and read once when the instruction is committed. *
- */
- int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
- //5 bits are: busy, Issued, Finished, speculative, valid
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- //PC is to id the instruction for recover exception.
- //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
- data = int(ceil((robExtra+coredynp.pc_width +
- coredynp.phy_ireg_width)/8.0));
- }
- else
- {
- //in RS based OOO, ROB also contains value of destination reg
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
- data = int(ceil((robExtra + coredynp.pc_width +
- coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
- }
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_commitW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = 0;
- ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- ROB_height =ROB->local_result.cache_ht;
- }
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device,
+ clockRate, core_params.opt_local,
+ core_params.core_ty);
+ LoadQ->area.set_area(LoadQ->area.get_area() +
+ LoadQ->local_result.area);
+ area.set_area(area.get_area()*cdb_overhead);
+ lsq_height = (LSQ->local_result.cache_ht +
+ LoadQ->local_result.cache_ht) * sqrt(cdb_overhead);
}
+
}
-LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- LSQ(0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false;
- int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];
-
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- //Dcache
- size = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
- line = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
- assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
- banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
- area.set_area(area.get_area()+ dcache.caches->local_result.area);
- //output_data_csv(dcache.caches.local_result);
-
- //dCache controllers
- //miss buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
- area.set_area(area.get_area()+ dcache.missb->local_result.area);
- //output_data_csv(dcache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
- area.set_area(area.get_area()+ dcache.ifb->local_result.area);
- //output_data_csv(dcache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
- //output_data_csv(dcache.prefetchb.local_result);
-
- //WBB
-
- if (cache_p==Write_back)
- {
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
- area.set_area(area.get_area()+ dcache.wbb->local_result.area);
- //output_data_csv(dcache.wbb.local_result);
- }
-
- /*
- * LSU--in-order processors do not have separate load queue: unified lsq
- * partitioned among threads
- * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
- */
- tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
- data = XML->sys.machine_bits;
- interface_ip.is_cache = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(LSQ.LSQ.local_result);
- lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
-
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(LoadQ.LoadQ.local_result);
- lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
- }
+MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int line;
+
+ clockRate = core_params.clockRate;
+ name = "Memory Management Unit";
+
+ set_params_stats();
+
+ // These are shared between ITLB and DTLB
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ //Itlb TLBs are partioned among threads according to Nigara and Nehalem
+ tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = mem_man_params.itlb_number_entries * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = mem_man_params.itlb_assoc;
+ interface_ip.nbanks = mem_man_params.itlb_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.number_instruction_fetch_ports;
+ interface_ip.throughput = mem_man_params.itlb_throughput / clockRate;
+ interface_ip.latency = mem_man_params.itlb_latency / clockRate;
+ itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + itlb->local_result.area);
+
+ //dtlb
+ tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = mem_man_params.dtlb_assoc;
+ interface_ip.nbanks = mem_man_params.dtlb_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate;
+ interface_ip.latency = mem_man_params.dtlb_latency / clockRate;
+ dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + dtlb->local_result.area);
}
-MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- itlb(0),
- dtlb(0),
- exist(exist_)
-{
- if (!exist) return;
- int tag, data;
- bool debug= false;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.specific_tag = 1;
- //Itlb TLBs are partioned among threads according to Nigara and Nehalem
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
- area.set_area(area.get_area()+ itlb->local_result.area);
- //output_data_csv(itlb.tlb.local_result);
-
- //dtlb
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
- area.set_area(area.get_area()+ dtlb->local_result.area);
- //output_data_csv(dtlb.tlb.local_result);
+void
+MemManU::set_params_stats() {
+ memset(&mem_man_params, 0, sizeof(MemoryManagementParams));
+ memset(&mem_man_stats, 0, sizeof(MemoryManagementStats));
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "InstructionTLB") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("number_entries",
+ mem_man_params.itlb_number_entries);
+ ASSIGN_FP_IF("latency", mem_man_params.itlb_latency);
+ ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput);
+ ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc);
+ ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("total_accesses",
+ mem_man_stats.itlb_total_accesses);
+ ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses);
+ ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ } STRCMP(type, "DataTLB") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("number_entries",
+ mem_man_params.dtlb_number_entries);
+ ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency);
+ ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput);
+ ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc);
+ ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("read_accesses",
+ mem_man_stats.dtlb_read_accesses);
+ ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses);
+ ASSIGN_FP_IF("write_accesses",
+ mem_man_stats.dtlb_write_accesses);
+ ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses);
+ ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ }
+ }
}
-RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IRF (0),
- FRF (0),
- RFWIN (0),
- exist(exist_)
- {
- /*
- * processors have separate architectural register files for each thread.
- * therefore, the bypass buses need to travel across all the register files.
- */
- if (!exist) return;
- int data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- //**********************************IRF***************************************
- data = coredynp.int_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
- area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(IRF.RF.local_result);
-
- //**********************************FRF***************************************
- data = coredynp.fp_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ /*
+ * processors have separate architectural register files for each thread.
+ * therefore, the bypass buses need to travel across all the register files.
+ */
+ if (!exist) return;
+ int data;
+ int line;
+
+ clockRate = core_params.clockRate;
+ name = "Register File Unit";
+
+ //**********************************IRF************************************
+ data = core_params.int_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.num_IRF_entry * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.phy_Regs_IRF_assoc;
+ interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0;
+ interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports;
+ interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ IRF->output_data.area *= core_params.num_hthreads *
+ core_params.num_pipelines * cdb_overhead;
+ area.set_area(area.get_area() + IRF->local_result.area *
+ core_params.num_hthreads * core_params.num_pipelines *
+ cdb_overhead);
+
+ //**********************************FRF************************************
+ data = core_params.fp_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.num_FRF_entry * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.phy_Regs_FRF_assoc;
+ interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0;
+ interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports;
+ interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ FRF->output_data.area *= core_params.num_hthreads *
+ core_params.num_fp_pipelines * cdb_overhead;
+ area.set_area(area.get_area() + FRF->local_result.area *
+ core_params.num_hthreads * core_params.num_fp_pipelines *
+ cdb_overhead);
+ int_regfile_height = IRF->local_result.cache_ht *
+ core_params.num_hthreads * sqrt(cdb_overhead);
+ fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads *
+ sqrt(cdb_overhead);
+ //since a EXU is associated with each pipeline, the cdb should not have
+ //longer length.
+
+ if (core_params.regWindowing) {
+ //*********************************REG_WIN*****************************
+ //ECC, and usually 2 regs are transfered together during window
+ //shifting.Niagara Mega cell
+ data = core_params.int_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.register_window_size *
+ IRF->l_ip.cache_sz * core_params.num_hthreads;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.register_window_assoc;
+ interface_ip.nbanks = core_params.register_window_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.register_window_tag_width > 0;
+ interface_ip.tag_w = core_params.register_window_tag_width;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.register_window_rw_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
- FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
- area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(FRF.RF.local_result);
- int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
- fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
- //since a EXU is associated with each pipeline, the cdb should not have longer length.
- if (coredynp.regWindowing)
- {
- //*********************************REG_WIN************************************
- data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 4.0/clockRate;
- interface_ip.latency = 4.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
- RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- //output_data_csv(RFWIN.RF.local_result);
- }
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput =
+ core_params.register_window_throughput / clockRate;
+ interface_ip.latency =
+ core_params.register_window_latency / clockRate;
+ RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device,
+ clockRate, core_params.opt_local,
+ core_params.core_ty);
+ RFWIN->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + RFWIN->local_result.area *
+ core_params.num_pipelines);
+ }
+}
+EXECU::EXECU(XMLNode* _xml_data,
+ InputParameter* interface_ip_, double lsq_height_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL),
+ exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL),
+ int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL),
+ fpTagBypass(NULL), interface_ip(*interface_ip_),
+ lsq_height(lsq_height_), core_params(_core_params),
+ core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ double fu_height = 0.0;
+ clockRate = core_params.clockRate;
+ name = "Execution Unit";
+ rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats);
+ if (core_params.core_ty == OOO ||
+ (core_params.core_ty == Inorder && core_params.multithreaded)) {
+ scheu = new SchedulerU(xml_data, &interface_ip, core_params,
+ core_stats);
+ area.set_area(area.get_area() + scheu->area.get_area() );
+ }
+ exeu = new FunctionalUnit(xml_data, &interface_ip, core_params,
+ core_stats, ALU);
+ area.set_area(area.get_area() + exeu->area.get_area() +
+ rfu->area.get_area());
+ fu_height = exeu->FU_height;
+ if (core_params.num_fpus > 0) {
+ fp_u = new FunctionalUnit(xml_data, &interface_ip,
+ core_params, core_stats, FPU);
+ area.set_area(area.get_area() + fp_u->area.get_area());
+ }
+ if (core_params.num_muls > 0) {
+ mul = new FunctionalUnit(xml_data, &interface_ip,
+ core_params, core_stats, MUL);
+ area.set_area(area.get_area() + mul->area.get_area());
+ fu_height += mul->FU_height;
+ }
+ /*
+ * broadcast logic, including int-broadcast; int_tag-broadcast;
+ * fp-broadcast; fp_tag-broadcast
+ * integer by pass has two paths and fp has 3 paths.
+ * on the same bus there are multiple tri-state drivers and muxes that go
+ * to different components on the same bus
+ */
+ interface_ip.wt = core_params.execu_broadcast_wt;
+ interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type;
+ interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type;
+ interface_ip.throughput = core_params.broadcast_numerator / clockRate;
+ interface_ip.latency = core_params.broadcast_numerator / clockRate;
+ double scheu_Iw_height = 0.0;
+ double scheu_ROB_height = 0.0;
+ double scheu_fp_Iw_height = 0.0;
+ if (scheu) {
+ scheu_Iw_height = scheu->Iw_height;
+ scheu_ROB_height = scheu->ROB_height;
+ scheu_fp_Iw_height = scheu->fp_Iw_height;
+ }
- }
-
-EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- lsq_height(lsq_height_),
- coredynp(dyn_p_),
- rfu(0),
- scheu(0),
- fp_u(0),
- exeu(0),
- mul(0),
- int_bypass(0),
- intTagBypass(0),
- int_mul_bypass(0),
- intTag_mul_Bypass(0),
- fp_bypass(0),
- fpTagBypass(0),
- exist(exist_)
-{
- if (!exist) return;
- double fu_height = 0.0;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- rfu = new RegFU(XML, ithCore, &interface_ip,coredynp);
- scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
- exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU);
- area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
- fu_height = exeu->FU_height;
- if (coredynp.num_fpus >0)
- {
- fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU);
- area.set_area(area.get_area()+ fp_u->area.get_area());
- }
- if (coredynp.num_muls >0)
- {
- mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL);
- area.set_area(area.get_area()+ mul->area.get_area());
- fu_height += mul->FU_height;
- }
- /*
- * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
- * integer by pass has two paths and fp has 3 paths.
- * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
- */
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
- interface_ip.wire_os_mat_type = 2;
- interface_ip.throughput = 10.0/clockRate; //Do not care
- interface_ip.latency = 10.0/clockRate;
- }
-
- if (coredynp.core_ty==Inorder)
- {
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {//OOO
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- /* For physical register based OOO,
- * data broadcast interconnects cover across functional units, lsq, inst windows and register files,
- * while tag broadcast interconnects also cover across ROB
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {
- /*
- * In RS based processor both data and tag are broadcast together,
- * covering functional units, lsq, nst windows, register files, and ROBs
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
-
-
- }
- area.set_area(area.get_area()+ bypass.area.get_area());
-}
+ // Common bypass logic parameters
+ double base_w = core_params.execu_bypass_base_width;
+ double base_h = core_params.execu_bypass_base_height;
+ int level = core_params.execu_bypass_start_wiring_level;
+ double route_over_perc = core_params.execu_bypass_route_over_perc;
+ Wire_type wire_type = core_params.execu_bypass_wire_type;
+ int data_w;
+ double len;
+
+ if (core_params.core_ty == Inorder) {
+ data_w = int(ceil(data_path_width / 32.0)*32);
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate, false,
+ route_over_perc, core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate, false,
+ route_over_perc, core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(data_path_width / 32.0)*32*1.5);
+ len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
+ lsq_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
+ lsq_height + scheu_Iw_height;
+ intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
+
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(data_path_width / 32.0)*32*1.5);
+ len = rfu->fp_regfile_height + fp_u->FU_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_Iw_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ } else {//OOO
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ /* For physical register based OOO,
+ * data broadcast interconnects cover across functional units, lsq,
+ * inst windows and register files,
+ * while tag broadcast interconnects also cover across ROB
+ */
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ intTag_mul_Bypass = new Interconnect(xml_data,
+ "Mul Bypass Tag",
+ Core_device, base_w,
+ base_h, data_w, len,
+ &interface_ip, level,
+ clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
-RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- iFRAT(0),
- fFRAT(0),
- iRRAT(0),
- fRRAT(0),
- ifreeL(0),
- ffreeL(0),
- idcl(0),
- fdcl(0),
- RAHT(0),
- exist(exist_)
- {
- /*
- * Although renaming logic maybe be used in in-order processors,
- * McPAT assumes no renaming logic is used since the performance gain is very limited and
- * the only major inorder processor with renaming logic is Itainium
- * that is a VLIW processor and different from current McPAT's model.
- * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
- * i,f prefix mean int and fp
- * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
- * FRAT will be read twice and written once per instruction;
- * RRAT will be write once per instruction when committing and reads out all when context switch
- * checkpointing is implicit
- * Renaming logic is duplicated for each different hardware threads
- *
- * No Dual-RAT is needed in RS-based OOO processors,
- * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
- * to make sure all the renamings associated with the ROB to be released are updated at the same time.
- * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
- * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
- *
- * Both RAM and CAM have same DCL
- */
- if (!exist) return;
- int tag, data, out_w;
-// interface_ip.wire_is_mat_type = 0;
-// interface_ip.wire_os_mat_type = 0;
-// interface_ip.wt = Global_30;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if (coredynp.core_ty==OOO)
- {
- //integer pipeline
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions
- data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
-// data = int(ceil(coredynp.phy_ireg_width/8.0));
- out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
-// //RAHT According to Intel, combine GC with FRAT is very costly.
-// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry);
-// out_w = data;
-// interface_ip.is_cache = false;
-// interface_ip.pure_cam = false;
-// interface_ip.pure_ram = true;
-// interface_ip.line_sz = data;
-// interface_ip.cache_sz = data*coredynp.globalCheckpoint;
-// interface_ip.assoc = 1;
-// interface_ip.nbanks = 1;
-// interface_ip.out_w = out_w*8;
-// interface_ip.access_mode = 0;
-// interface_ip.throughput = 1.0/clockRate;
-// interface_ip.latency = 1.0/clockRate;
-// interface_ip.obj_func_dyn_energy = 0;
-// interface_ip.obj_func_dyn_power = 0;
-// interface_ip.obj_func_leak_power = 0;
-// interface_ip.obj_func_cycle_t = 1;
-// interface_ip.num_rw_ports = 1;//the extra one port is for GCs
-// interface_ip.num_rd_ports = 2*coredynp.decodeW;
-// interface_ip.num_wr_ports = coredynp.decodeW;
-// interface_ip.num_se_rd_ports = 0;
-// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT");
-// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
-// area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT floating point
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(core_params.fp_data_width));
+ len = rfu->fp_regfile_height + fp_u->FU_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_freg_width;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ } else {
+ /*
+ * In RS based processor both data and tag are broadcast together,
+ * covering functional units, lsq, nst windows, register files, and ROBs
+ */
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ intTag_mul_Bypass = new Interconnect(xml_data,
+ "Mul Bypass Tag",
+ Core_device, base_w,
+ base_h, data_w, len,
+ &interface_ip, level,
+ clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width;
- data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.decodeW;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT for FP
- tag = coredynp.arch_freg_width;
- data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(core_params.fp_data_width));
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_freg_width;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ }
+ }
+ if (int_bypass) {
+ children.push_back(int_bypass);
+ }
+ if (intTagBypass) {
+ children.push_back(intTagBypass);
+ }
+ if (int_mul_bypass) {
+ children.push_back(int_mul_bypass);
+ }
+ if (intTag_mul_Bypass) {
+ children.push_back(intTag_mul_Bypass);
+ }
+ if (fp_bypass) {
+ children.push_back(fp_bypass);
+ }
+ if (fpTagBypass) {
+ children.push_back(fpTagBypass);
+ }
- }
+ area.set_area(area.get_area() + int_bypass->area.get_area() +
+ intTagBypass->area.get_area());
+ if (core_params.num_muls > 0) {
+ area.set_area(area.get_area() + int_mul_bypass->area.get_area() +
+ intTag_mul_Bypass->area.get_area());
+ }
+ if (core_params.num_fpus > 0) {
+ area.set_area(area.get_area() + fp_bypass->area.get_area() +
+ fpTagBypass->area.get_area());
+ }
+}
- //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL),
+ fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL),
+ RAHT(NULL), interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int out_w;
+ int size;
+
+ // Assumption:
+ // We make an implicit design assumption based on the specific structure
+ // that is being modeled.
+ // 1. RAM-based RATs are direct mapped. However, if the associated
+ // scheduler is a reservation station style, the RATs are fully
+ // associative.
+ // 2. Non-CAM based RATs and free lists do not have tags.
+ // 3. Free lists are direct mapped.
+
+ const int RAM_BASED_RAT_ASSOC = 1;
+ const int RS_RAT_ASSOC = 0;
+ const int NON_CAM_BASED_TAG_WIDTH = 0;
+ const int FREELIST_ASSOC = 1;
+
+ clockRate = core_params.clockRate;
+ name = "Rename Unit";
+ if (core_params.core_ty == OOO) {
+ //integer pipeline
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ if (core_params.rm_ty == RAMbased) {
+ //FRAT with global checkpointing (GCs) please see paper tech
+ //report for detailed explaintions
+
+ data = int(ceil(core_params.phy_ireg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+
+ size = data * core_params.archi_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
- iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iRRAT->area.get_area());
-
- //RRAT for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT floating point
+ data = int(ceil(core_params.phy_freg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fRRAT->area.get_area());
-
- //Freelist of renaming unit always RAM based
- //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
- // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
- //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+
+ } else if ((core_params.rm_ty == CAMbased)) {
+ //IRAT
+ tag = core_params.arch_ireg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil((core_params.arch_ireg_width + 1 *
+ core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
- //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
-
- //freelist for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ffreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT for FP
+ tag = core_params.arch_freg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil((core_params.arch_freg_width + 1 *
+ core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ffreeL->area.get_area());
-
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
-
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased){
- /*
- * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
- * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time.
- * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than
- * CAM based RAT so that it is more scalable as number of ROB/physical regs increases.
- */
- tag = coredynp.phy_ireg_width;
- data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= coredynp.commitW;//TODO
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->local_result.adjust_area();
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FP
- tag = coredynp.phy_freg_width;
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->local_result.adjust_area();
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width;
- data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO
- interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT
- tag = coredynp.arch_freg_width;
- data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+ }
- }
- //No RRAT for RS based OOO
- //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ //RRAT is always RAM based, does not have GCs, and is used only for
+ //record latest non-speculative mapping
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_IRF_size *
+ NUM_SOURCE_OPERANDS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.retire_rat_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.commitW;
+ interface_ip.num_wr_ports = core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ iRRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iRRAT->area.get_area());
+
+ //RRAT for FP
+ data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size *
+ NUM_SOURCE_OPERANDS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.retire_rat_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ fRRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fRRAT->area.get_area());
+
+ //Freelist of renaming unit always RAM based
+ //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
+ // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
+ //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ifreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports =
+ core_params.decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ifreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ifreeL->area.get_area());
+
+ //freelist for FP
+ data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ffreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports =
+ core_params.fp_decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ffreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ffreeL->area.get_area());
+
+ } else if (core_params.scheu_ty == ReservationStation) {
+ if (core_params.rm_ty == RAMbased) {
+ tag = core_params.phy_ireg_width;
+ data = int(ceil(core_params.phy_ireg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RS_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.commitW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->local_result.adjust_area();
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FP
+ tag = core_params.phy_freg_width;
+ data = int(ceil(core_params.phy_freg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RS_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.fp_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->local_result.adjust_area();
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+
+ } else if ((core_params.rm_ty == CAMbased)) {
+ //FRAT
+ //the address of CAM needed to be sent out
+ tag = core_params.arch_ireg_width;
+ data = int(ceil (core_params.arch_ireg_width +
+ 1 * core_params.globalCheckpoint /
+ BITS_PER_BYTE));
+ out_w = int(ceil (core_params.arch_ireg_width /
+ BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT
+ tag = core_params.arch_freg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil(core_params.arch_freg_width +
+ 1 * core_params.globalCheckpoint /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
+ }
+ //No RRAT for RS based OOO
+ //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ifreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports =
+ core_params.decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ifreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ifreeL->area.get_area());
}
-}
- if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
- {
- /* Dependency check logic will only present when decode(issue) width>1.
- * Multiple issue in order processor can do without renaming, but dcl is a must.
- */
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
}
+ idcl =
+ new dep_resource_conflict_check(xml_data,
+ "Instruction Dependency Check?",
+ &interface_ip, core_params,
+ core_params.phy_ireg_width,
+ clockRate);
+ fdcl =
+ new dep_resource_conflict_check(xml_data,
+ "FP Dependency Check?", &interface_ip,
+ core_params,
+ core_params.phy_freg_width, clockRate);
}
-Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- ifu (0),
- lsu (0),
- mmu (0),
- exu (0),
- rnu (0),
- corepipe (0),
- undiffCore (0),
- l2cache (0)
-{
- /*
- * initialize, compute and optimize individual components.
- */
-
- double pipeline_area_per_unit;
- if (XML->sys.Private_L2)
- {
- l2cache = new SharedCache(XML,ithCore, &interface_ip);
-
- }
-// interface_ip.wire_is_mat_type = 2;
-// interface_ip.wire_os_mat_type = 2;
-// interface_ip.wt =Global_30;
- set_core_param();
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp);
- lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp);
- mmu = new MemManU (XML, ithCore, &interface_ip,coredynp);
- exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp);
- undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp);
- if (coredynp.core_ty==OOO)
- {
- rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
- }
- corepipe = new Pipeline(&interface_ip,coredynp);
-
- if (coredynp.core_ty==OOO)
- {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
- if (rnu->exist)
- {
- rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
- }
- }
- else {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
- }
-
- //area.set_area(area.get_area()+ corepipe->area.get_area());
- if (ifu->exist)
- {
- ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + ifu->area.get_area());
- }
- if (lsu->exist)
- {
- lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + lsu->area.get_area());
- }
- if (exu->exist)
- {
- exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+exu->area.get_area());
- }
- if (mmu->exist)
- {
- mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+mmu->area.get_area());
- }
-
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
-
- area.set_area(area.get_area() + rnu->area.get_area());
- }
- }
-
- if (undiffCore->exist)
- {
- area.set_area(area.get_area() + undiffCore->area.get_area());
- }
-
- if (XML->sys.Private_L2)
- {
- area.set_area(area.get_area() + l2cache->area.get_area());
-
- }
-// //clock power
-// clockNetwork.init_wire_external(is_default, &interface_ip);
-// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
-// clockNetwork.end_wiring_level =5;//toplevel metal
-// clockNetwork.start_wiring_level =5;//toplevel metal
-// clockNetwork.num_regs = corepipe.tot_stage_vector;
-// clockNetwork.optimize_wire();
-}
+Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_)
+ : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL),
+ exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL),
+ ithCore(_ithCore), interface_ip(*interface_ip_) {
+
+ ostringstream os;
+ os << ithCore;
+ name = "Core " + os.str();
+
+ int i = 0;
+ XMLNode* childXML;
+ for (i = 0; i < xml_data->nChildNode("component"); i++) {
+ childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR comp_name = childXML->getAttribute("id");
+ if (!comp_name)
+ continue;
+
+ STRCMP(comp_name, "system.L20") {
+ l2cache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(l2cache);
+ }
+ }
+ }
+ set_core_param();
+ clockRate = core_params.clockRate;
+
+ ifu = new InstFetchU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(ifu);
+ lsu = new LoadStoreU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(lsu);
+ mmu = new MemManU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(mmu);
+ exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height,
+ core_params, core_stats);
+ children.push_back(exu);
+ undiffCore = new UndiffCore(xml_data, &interface_ip, core_params);
+ children.push_back(undiffCore);
+ if (core_params.core_ty == OOO) {
+ rnu = new RENAMINGU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(rnu);
+ }
+ corepipe = new Pipeline(xml_data, &interface_ip, core_params);
+ children.push_back(corepipe);
+
+ double pipeline_area_per_unit;
+ if (core_params.core_ty == OOO) {
+ pipeline_area_per_unit = (corepipe->area.get_area() *
+ core_params.num_pipelines) / 5.0;
+ if (rnu->exist) {
+ rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
+ }
+ } else {
+ pipeline_area_per_unit = (corepipe->area.get_area() *
+ core_params.num_pipelines) / 4.0;
+ }
-void BranchPredictor::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double r_access;
- double w_access;
- if (is_tdp)
- {
- r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
- w_access = 0*coredynp.BR_duty_cycle;
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->tdp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->tdp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->tdp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->tdp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = r_access;
- RAS->stats_t.writeAc.access = w_access;
- RAS->tdp_stats = RAS->stats_t;
- }
- else
- {
- //The resolution of BPT accesses is coarse, but this is
- //because most simulators cannot track finer grained details
- r_access = XML->sys.core[ithCore].branch_instructions;
- w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->rtp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->rtp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->rtp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->rtp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls;
- RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
- RAS->rtp_stats = RAS->stats_t;
- }
-
- globalBPT->power_t.reset();
- L1_localBPT->power_t.reset();
- L2_localBPT->power_t.reset();
- chooser->power_t.reset();
- RAS->power_t.reset();
-
- globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
- globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
- L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
- L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;
-
- L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
- L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;
-
- chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
- chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
- RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
- RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
-
- power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power;
- }
- else
- {
- globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
- rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
+ // Move all of this to computeArea
+ //area.set_area(area.get_area()+ corepipe->area.get_area());
+ if (ifu->exist) {
+ ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + ifu->area.get_area());
+ }
+ if (lsu->exist) {
+ lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + lsu->area.get_area());
+ }
+ if (exu->exist) {
+ exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + exu->area.get_area());
+ }
+ if (mmu->exist) {
+ mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + mmu->area.get_area());
}
-}
-void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- if (is_tdp)
- {
- cout << indent_str<< "Global Predictor:" << endl;
- cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "Local Predictor:" << endl;
- cout << indent_str << "L1_Local Predictor:" << endl;
- cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "L2_Local Predictor:" << endl;
- cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- cout << indent_str << "Chooser:" << endl;
- cout << indent_str_next << "Area = " << chooser->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "RAS:" << endl;
- cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
-// cout << indent_str_next << "Global Predictor Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Global Predictor Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Global Predictor Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Local Predictor Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Local Predictor Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Local Predictor Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Chooser Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Chooser Subthreshold Leakage = " << chooser->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "RAS Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "RAS Subthreshold Leakage = " << RAS->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" << endl;
- }
+ if (core_params.core_ty == OOO) {
+ if (rnu->exist) {
-}
-
-void InstFetchU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
- icache.caches->stats_t.readAc.miss = 0;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->tdp_stats = icache.caches->stats_t;
-
- icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports;
- icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports;
- icache.missb->tdp_stats = icache.missb->stats_t;
-
- icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.ifb->tdp_stats = icache.ifb->stats_t;
-
- icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports;
- icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;
-
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
- IB->tdp_stats = IB->stats_t;
-
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
- BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
+ area.set_area(area.get_area() + rnu->area.get_area());
}
+ }
- ID_inst->stats_t.readAc.access = coredynp.decodeW;
- ID_operand->stats_t.readAc.access = coredynp.decodeW;
- ID_misc->stats_t.readAc.access = coredynp.decodeW;
- ID_inst->tdp_stats = ID_inst->stats_t;
- ID_operand->tdp_stats = ID_operand->stats_t;
- ID_misc->tdp_stats = ID_misc->stats_t;
-
+ if (undiffCore->exist) {
+ area.set_area(area.get_area() + undiffCore->area.get_area());
+ }
+ if (l2cache) {
+ area.set_area(area.get_area() + l2cache->area.get_area());
}
- else
- {
- //init stats for Runtime Dynamic (RTP)
- icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses;
- icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->rtp_stats = icache.caches->stats_t;
+}
- icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->rtp_stats = icache.missb->stats_t;
- icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->rtp_stats = icache.ifb->stats_t;
+void BranchPredictor::computeEnergy() {
+ if (!exist) return;
+
+ // ASSUMPTION: All instructions access the branch predictors at Fetch and
+ // only branch instrucions update the predictors regardless
+ // of the correctness of the prediction.
+ double tdp_read_accesses =
+ core_params.predictionW * core_stats.BR_duty_cycle;
+ globalBPT->tdp_stats.reset();
+ globalBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ globalBPT->tdp_stats.writeAc.access = 0;
+ globalBPT->rtp_stats.reset();
+ globalBPT->rtp_stats.readAc.access = core_stats.total_instructions;
+ globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ globalBPT->power_t.reset();
+ globalBPT->power_t.readOp.dynamic +=
+ globalBPT->local_result.power.readOp.dynamic *
+ globalBPT->tdp_stats.readAc.access +
+ globalBPT->local_result.power.writeOp.dynamic *
+ globalBPT->tdp_stats.writeAc.access;
+ globalBPT->power_t = globalBPT->power_t +
+ globalBPT->local_result.power * pppm_lkg;
+ globalBPT->rt_power.reset();
+ globalBPT->rt_power.readOp.dynamic +=
+ globalBPT->local_result.power.readOp.dynamic *
+ globalBPT->rtp_stats.readAc.access +
+ globalBPT->local_result.power.writeOp.dynamic *
+ globalBPT->rtp_stats.writeAc.access;
+
+ L1_localBPT->tdp_stats.reset();
+ L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ L1_localBPT->tdp_stats.writeAc.access = 0;
+ L1_localBPT->rtp_stats.reset();
+ L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions;
+ L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ L1_localBPT->power_t.reset();
+ L1_localBPT->power_t.readOp.dynamic +=
+ L1_localBPT->local_result.power.readOp.dynamic *
+ L1_localBPT->tdp_stats.readAc.access +
+ L1_localBPT->local_result.power.writeOp.dynamic *
+ L1_localBPT->tdp_stats.writeAc.access;
+ L1_localBPT->power_t = L1_localBPT->power_t +
+ L1_localBPT->local_result.power * pppm_lkg;
+ L1_localBPT->rt_power.reset();
+ L1_localBPT->rt_power.readOp.dynamic +=
+ L1_localBPT->local_result.power.readOp.dynamic *
+ L1_localBPT->rtp_stats.readAc.access +
+ L1_localBPT->local_result.power.writeOp.dynamic *
+ L1_localBPT->rtp_stats.writeAc.access;
+
+ L2_localBPT->tdp_stats.reset();
+ L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ L2_localBPT->tdp_stats.writeAc.access = 0;
+ L2_localBPT->rtp_stats.reset();
+ L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions;
+ L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ L2_localBPT->power_t.reset();
+ L2_localBPT->power_t.readOp.dynamic +=
+ L2_localBPT->local_result.power.readOp.dynamic *
+ L2_localBPT->tdp_stats.readAc.access +
+ L2_localBPT->local_result.power.writeOp.dynamic *
+ L2_localBPT->tdp_stats.writeAc.access;
+ L2_localBPT->power_t = L2_localBPT->power_t +
+ L2_localBPT->local_result.power * pppm_lkg;
+ L2_localBPT->rt_power.reset();
+ L2_localBPT->rt_power.readOp.dynamic +=
+ L2_localBPT->local_result.power.readOp.dynamic *
+ L2_localBPT->rtp_stats.readAc.access +
+ L2_localBPT->local_result.power.writeOp.dynamic *
+ L2_localBPT->rtp_stats.writeAc.access;
+
+ chooser->tdp_stats.reset();
+ chooser->tdp_stats.readAc.access = tdp_read_accesses;
+ chooser->tdp_stats.writeAc.access = 0;
+ chooser->rtp_stats.reset();
+ chooser->rtp_stats.readAc.access = core_stats.total_instructions;
+ chooser->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ chooser->power_t.reset();
+ chooser->power_t.readOp.dynamic +=
+ chooser->local_result.power.readOp.dynamic *
+ chooser->tdp_stats.readAc.access +
+ chooser->local_result.power.writeOp.dynamic *
+ chooser->tdp_stats.writeAc.access;
+ chooser->power_t =
+ chooser->power_t + chooser->local_result.power * pppm_lkg;
+ chooser->rt_power.reset();
+ chooser->rt_power.readOp.dynamic +=
+ chooser->local_result.power.readOp.dynamic *
+ chooser->rtp_stats.readAc.access +
+ chooser->local_result.power.writeOp.dynamic *
+ chooser->rtp_stats.writeAc.access;
+
+ RAS->tdp_stats.reset();
+ RAS->tdp_stats.readAc.access = tdp_read_accesses;
+ RAS->tdp_stats.writeAc.access = 0;
+ RAS->rtp_stats.reset();
+ RAS->rtp_stats.readAc.access = core_stats.function_calls;
+ RAS->rtp_stats.writeAc.access = core_stats.function_calls;
+ RAS->power_t.reset();
+ RAS->power_t.readOp.dynamic +=
+ RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access +
+ RAS->local_result.power.writeOp.dynamic *
+ RAS->tdp_stats.writeAc.access;
+ RAS->power_t = RAS->power_t + RAS->local_result.power *
+ core_params.pppm_lkg_multhread;
+ RAS->rt_power.reset();
+ RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic *
+ RAS->rtp_stats.readAc.access +
+ RAS->local_result.power.writeOp.dynamic *
+ RAS->rtp_stats.writeAc.access;
+
+ output_data.reset();
+ if (globalBPT) {
+ globalBPT->output_data.peak_dynamic_power =
+ globalBPT->power_t.readOp.dynamic * clockRate;
+ globalBPT->output_data.runtime_dynamic_energy =
+ globalBPT->rt_power.readOp.dynamic;
+ output_data += globalBPT->output_data;
+ }
+ if (L1_localBPT) {
+ L1_localBPT->output_data.peak_dynamic_power =
+ L1_localBPT->power_t.readOp.dynamic * clockRate;
+ L1_localBPT->output_data.runtime_dynamic_energy =
+ L1_localBPT->rt_power.readOp.dynamic;
+ output_data += L1_localBPT->output_data;
+ }
+ if (L2_localBPT) {
+ L2_localBPT->output_data.peak_dynamic_power =
+ L2_localBPT->power_t.readOp.dynamic * clockRate;
+ L2_localBPT->output_data.runtime_dynamic_energy =
+ L2_localBPT->rt_power.readOp.dynamic;
+ output_data += L2_localBPT->output_data;
+ }
+ if (chooser) {
+ chooser->output_data.peak_dynamic_power =
+ chooser->power_t.readOp.dynamic * clockRate;
+ chooser->output_data.runtime_dynamic_energy =
+ chooser->rt_power.readOp.dynamic;
+ output_data += chooser->output_data;
+ }
+ if (RAS) {
+ RAS->output_data.peak_dynamic_power =
+ RAS->power_t.readOp.dynamic * clockRate;
+ RAS->output_data.subthreshold_leakage_power =
+ RAS->power_t.readOp.leakage * core_params.num_hthreads;
+ RAS->output_data.gate_leakage_power =
+ RAS->power_t.readOp.gate_leakage * core_params.num_hthreads;
+ RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic;
+ output_data += RAS->output_data;
+ }
+}
- icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;
+void BranchPredictor::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
- IB->rtp_stats = IB->stats_t;
+ McPATComponent::displayData(indent, plevel);
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
- BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
- BTB->rtp_stats = BTB->stats_t;
- }
+ globalBPT->displayData(indent + 4, plevel);
+ L1_localBPT->displayData(indent + 4, plevel);
+ L2_localBPT->displayData(indent + 4, plevel);
+ chooser->displayData(indent + 4, plevel);
+ RAS->displayData(indent + 4, plevel);
+}
- ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_inst->rtp_stats = ID_inst->stats_t;
- ID_operand->rtp_stats = ID_operand->stats_t;
- ID_misc->rtp_stats = ID_misc->stats_t;
+void InstFetchU::computeEnergy() {
+ if (!exist) return;
+ if (BPT) {
+ BPT->computeEnergy();
}
- icache.power_t.reset();
+ IB->tdp_stats.reset();
+ IB->tdp_stats.readAc.access = core_params.peak_issueW;
+ IB->tdp_stats.writeAc.access = core_params.peak_issueW;
+ IB->rtp_stats.reset();
+ IB->rtp_stats.readAc.access = core_stats.total_instructions;
+ IB->rtp_stats.writeAc.access = core_stats.total_instructions;
IB->power_t.reset();
-// ID_inst->power_t.reset();
-// ID_operand->power_t.reset();
-// ID_misc->power_t.reset();
- if (coredynp.predictionW>0)
- {
+ IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic *
+ IB->tdp_stats.readAc.access +
+ IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access;
+ IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg;
+ IB->rt_power.reset();
+ IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic *
+ IB->rtp_stats.readAc.access +
+ IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access;
+
+ if (core_params.predictionW > 0) {
+ BTB->tdp_stats.reset();
+ BTB->tdp_stats.readAc.access = core_params.predictionW;
+ BTB->tdp_stats.writeAc.access = 0;
+ BTB->rtp_stats.reset();
+ BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses;
+ BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses;
BTB->power_t.reset();
+ BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic *
+ BTB->tdp_stats.readAc.access +
+ BTB->local_result.power.writeOp.dynamic *
+ BTB->tdp_stats.writeAc.access;
+ BTB->rt_power.reset();
+ BTB->rt_power.readOp.dynamic +=
+ BTB->local_result.power.readOp.dynamic *
+ BTB->rtp_stats.readAc.access +
+ BTB->local_result.power.writeOp.dynamic *
+ BTB->rtp_stats.writeAc.access;
}
- icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
- //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
- icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
- icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
- icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
- icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
- icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
+ ID_inst->tdp_stats.reset();
+ ID_inst->tdp_stats.readAc.access = core_params.decodeW;
+ ID_inst->power_t.reset();
+ ID_inst->power_t = ID_misc->power;
+ ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic *
+ ID_inst->tdp_stats.readAc.access;
+ ID_inst->rtp_stats.reset();
+ ID_inst->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_inst->rt_power.reset();
+ ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic *
+ ID_inst->rtp_stats.readAc.access;
+
+ ID_operand->tdp_stats.reset();
+ ID_operand->tdp_stats.readAc.access = core_params.decodeW;
+ ID_operand->power_t.reset();
+ ID_operand->power_t = ID_misc->power;
+ ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic *
+ ID_operand->tdp_stats.readAc.access;
+ ID_operand->rtp_stats.reset();
+ ID_operand->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_operand->rt_power.reset();
+ ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic *
+ ID_operand->rtp_stats.readAc.access;
+
+ ID_misc->tdp_stats.reset();
+ ID_misc->tdp_stats.readAc.access = core_params.decodeW;
+ ID_misc->power_t.reset();
+ ID_misc->power_t = ID_misc->power;
+ ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic *
+ ID_misc->tdp_stats.readAc.access;
+ ID_misc->rtp_stats.reset();
+ ID_misc->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_misc->rt_power.reset();
+ ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic *
+ ID_misc->rtp_stats.readAc.access;
+
+ power.reset();
+ rt_power.reset();
+ McPATComponent::computeEnergy();
+
+ output_data.reset();
+ if (icache) {
+ output_data += icache->output_data;
+ }
+ if (IB) {
+ IB->output_data.peak_dynamic_power =
+ IB->power_t.readOp.dynamic * clockRate;
+ IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic;
+ output_data += IB->output_data;
+ }
+ if (BTB) {
+ BTB->output_data.peak_dynamic_power =
+ BTB->power_t.readOp.dynamic * clockRate;
+ BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic;
+ output_data += BTB->output_data;
+ }
+ if (BPT) {
+ output_data += BPT->output_data;
+ }
+ if (ID_inst) {
+ ID_inst->output_data.peak_dynamic_power =
+ ID_inst->power_t.readOp.dynamic * clockRate;
+ ID_inst->output_data.runtime_dynamic_energy =
+ ID_inst->rt_power.readOp.dynamic;
+ output_data += ID_inst->output_data;
+ }
+ if (ID_operand) {
+ ID_operand->output_data.peak_dynamic_power =
+ ID_operand->power_t.readOp.dynamic * clockRate;
+ ID_operand->output_data.runtime_dynamic_energy =
+ ID_operand->rt_power.readOp.dynamic;
+ output_data += ID_operand->output_data;
+ }
+ if (ID_misc) {
+ ID_misc->output_data.peak_dynamic_power =
+ ID_misc->power_t.readOp.dynamic * clockRate;
+ ID_misc->output_data.runtime_dynamic_energy =
+ ID_misc->rt_power.readOp.dynamic;
+ output_data += ID_misc->output_data;
+ }
+}
- IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
- IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
+void InstFetchU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- if (coredynp.predictionW>0)
- {
- BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
- BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;
+ McPATComponent::displayData(indent, plevel);
- BPT->computeEnergy(is_tdp);
+ if (core_params.predictionW > 0) {
+ BTB->displayData(indent + 4, plevel);
+ if (BPT->exist) {
+ BPT->displayData(indent + 4, plevel);
}
+ }
+ IB->displayData(indent + 4, plevel);
+ ID_inst->displayData(indent + 4, plevel);
+ ID_operand->displayData(indent + 4, plevel);
+ ID_misc->displayData(indent + 4, plevel);
+}
- if (is_tdp)
- {
-// icache.power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
- icache.power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
- power = power + icache.power + IB->power;
- if (coredynp.predictionW>0)
- {
- BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- power = power + BTB->power + BPT->power;
- }
+void RENAMINGU::computeEnergy() {
+ if (!exist) return;
+
+ idcl->tdp_stats.reset();
+ idcl->rtp_stats.reset();
+ idcl->power_t.reset();
+ idcl->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ idcl->tdp_stats.readAc.access = core_params.decodeW;
+ idcl->rtp_stats.readAc.access = 3 * core_params.decodeW *
+ core_params.decodeW * core_stats.rename_reads;
+ } else if (core_params.issueW > 1) {
+ idcl->tdp_stats.readAc.access = core_params.decodeW;
+ idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions;
+ }
+ idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access *
+ idcl->power.readOp.dynamic;
+ idcl->power_t.readOp.leakage = idcl->power.readOp.leakage *
+ core_params.num_hthreads;
+ idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage *
+ core_params.num_hthreads;
+ idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access *
+ idcl->power.readOp.dynamic;
+
+ fdcl->tdp_stats.reset();
+ fdcl->rtp_stats.reset();
+ fdcl->power_t.reset();
+ fdcl->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ fdcl->tdp_stats.readAc.access = core_params.decodeW;
+ fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW *
+ core_params.fp_issueW * core_stats.fp_rename_writes;
+ } else if (core_params.issueW > 1) {
+ fdcl->tdp_stats.readAc.access = core_params.decodeW;
+ fdcl->rtp_stats.readAc.access = core_stats.fp_instructions;
+ }
+ fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access *
+ fdcl->power.readOp.dynamic;
+ fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage *
+ core_params.num_hthreads;
+ fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage *
+ core_params.num_hthreads;
+ fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access *
+ fdcl->power.readOp.dynamic;
+
+ if (iRRAT) {
+ iRRAT->tdp_stats.reset();
+ iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports;
+ iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports;
+ iRRAT->rtp_stats.reset();
+ iRRAT->rtp_stats.readAc.access = core_stats.rename_writes;
+ iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
+ iRRAT->power_t.reset();
+ iRRAT->power_t.readOp.dynamic +=
+ iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic +
+ iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
+ iRRAT->rt_power.reset();
+ iRRAT->rt_power.readOp.dynamic +=
+ iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic +
+ iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
+ iRRAT->power_t.readOp.leakage =
+ iRRAT->power.readOp.leakage * core_params.num_hthreads;
+ iRRAT->power_t.readOp.gate_leakage =
+ iRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic;
- ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
- ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic;
-
- ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access;
- ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
- ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access;
-
- power = power + (ID_inst->power +
- ID_operand->power +
- ID_misc->power);
- }
- else
- {
-// icache.rt_power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
-
- icache.rt_power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
- rt_power = rt_power + icache.rt_power + IB->rt_power;
- if (coredynp.predictionW>0)
- {
- BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- rt_power = rt_power + BTB->rt_power + BPT->rt_power;
- }
+ if (ifreeL) {
+ ifreeL->tdp_stats.reset();
+ ifreeL->tdp_stats.readAc.access = core_params.decodeW;
+ ifreeL->tdp_stats.writeAc.access = core_params.decodeW;
+ ifreeL->rtp_stats.reset();
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ ifreeL->rtp_stats.readAc.access = core_stats.rename_reads;
+ ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes;
+ } else if (core_params.scheu_ty == ReservationStation) {
+ ifreeL->rtp_stats.readAc.access =
+ core_stats.rename_reads + core_stats.fp_rename_reads;
+ ifreeL->rtp_stats.writeAc.access =
+ 2 * (core_stats.rename_writes + core_stats.fp_rename_writes);
+ }
+ ifreeL->power_t.reset();
+ ifreeL->power_t.readOp.dynamic +=
+ ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic +
+ ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
+ ifreeL->rt_power.reset();
+ ifreeL->rt_power.readOp.dynamic +=
+ ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic +
+ ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
+ ifreeL->power_t.readOp.leakage =
+ ifreeL->power.readOp.leakage * core_params.num_hthreads;
+ ifreeL->power_t.readOp.gate_leakage =
+ ifreeL->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
- ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
- ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;
+ if (fRRAT) {
+ fRRAT->tdp_stats.reset();
+ fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports;
+ fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports;
+ fRRAT->rtp_stats.reset();
+ fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes;
+ fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
+ fRRAT->power_t.reset();
+ fRRAT->power_t.readOp.dynamic +=
+ fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic +
+ fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
+ fRRAT->rt_power.reset();
+ fRRAT->rt_power.readOp.dynamic +=
+ fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic +
+ fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
+ fRRAT->power_t.readOp.leakage =
+ fRRAT->power.readOp.leakage * core_params.num_hthreads;
+ fRRAT->power_t.readOp.gate_leakage =
+ fRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- rt_power = rt_power + (ID_inst->rt_power +
- ID_operand->rt_power +
- ID_misc->rt_power);
+ if (ffreeL) {
+ ffreeL->tdp_stats.reset();
+ ffreeL->tdp_stats.readAc.access = core_params.decodeW;
+ ffreeL->tdp_stats.writeAc.access = core_params.decodeW;
+ ffreeL->rtp_stats.reset();
+ ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads;
+ ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes;
+ ffreeL->power_t.reset();
+ ffreeL->power_t.readOp.dynamic +=
+ ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic +
+ ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
+ ffreeL->rt_power.reset();
+ ffreeL->rt_power.readOp.dynamic +=
+ ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic +
+ ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
+ ffreeL->power_t.readOp.leakage =
+ ffreeL->power.readOp.leakage * core_params.num_hthreads;
+ ffreeL->power_t.readOp.gate_leakage =
+ ffreeL->power.readOp.gate_leakage * core_params.num_hthreads;
}
-}
-void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
-
- cout << indent_str<< "Instruction Cache:" << endl;
- cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.predictionW>0)
- {
- cout << indent_str<< "Branch Target Buffer:" << endl;
- cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (BPT->exist)
- {
- cout << indent_str<< "Branch Predictor:" << endl;
- cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3)
- {
- BPT->displayEnergy(indent+4, plevel, is_tdp);
- }
- }
- }
- cout << indent_str<< "Instruction Buffer:" << endl;
- cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Instruction Decoder:" << endl;
- cout << indent_str_next << "Area = " << (ID_inst->area.get_area() +
- ID_operand->area.get_area() +
- ID_misc->area.get_area())*coredynp.decodeW*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic +
- ID_operand->power.readOp.dynamic +
- ID_misc->power.readOp.dynamic)*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? (ID_inst->power.readOp.longer_channel_leakage +
- ID_operand->power.readOp.longer_channel_leakage +
- ID_misc->power.readOp.longer_channel_leakage):
- (ID_inst->power.readOp.leakage +
- ID_operand->power.readOp.leakage +
- ID_misc->power.readOp.leakage)) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage +
- ID_operand->power.readOp.gate_leakage +
- ID_misc->power.readOp.gate_leakage) << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic +
- ID_operand->rt_power.readOp.dynamic +
- ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
-// cout << indent_str_next << "Instruction Cache Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Cache Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Subthreshold Leakage = " << IB->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Gate Leakage = " << IB->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Subthreshold Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Gate Leakage = " << BTB->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Subthreshold Leakage = " << BPT->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Gate Leakage = " << BPT->rt_power.readOp.gate_leakage << " W" << endl;
+ if (iFRAT) {
+ tdp_stats.reset();
+ if (core_params.rm_ty == RAMbased) {
+ iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports;
+ iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
+ iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports;
+ } else if ((core_params.rm_ty == CAMbased)) {
+ iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports;
+ iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
+ }
+ rtp_stats.reset();
+ iFRAT->rtp_stats.readAc.access = core_stats.rename_reads;
+ iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
+ if (core_params.scheu_ty == ReservationStation &&
+ core_params.rm_ty == RAMbased) {
+ iFRAT->rtp_stats.searchAc.access =
+ core_stats.committed_int_instructions;
+ }
+ iFRAT->power_t.reset();
+ iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access
+ * (iFRAT->local_result.power.readOp.dynamic
+ + idcl->power.readOp.dynamic)
+ + iFRAT->tdp_stats.writeAc.access
+ * iFRAT->local_result.power.writeOp.dynamic
+ + iFRAT->tdp_stats.searchAc.access
+ * iFRAT->local_result.power.searchOp.dynamic;
+ iFRAT->power_t.readOp.leakage =
+ iFRAT->power.readOp.leakage * core_params.num_hthreads;
+ iFRAT->power_t.readOp.gate_leakage =
+ iFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ iFRAT->rt_power.reset();
+ iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access
+ * (iFRAT->local_result.power.readOp.dynamic
+ + idcl->power.readOp.dynamic)
+ + iFRAT->rtp_stats.writeAc.access
+ * iFRAT->local_result.power.writeOp.dynamic
+ + iFRAT->rtp_stats.searchAc.access
+ * iFRAT->local_result.power.searchOp.dynamic;
+ }
+
+ if (fFRAT) {
+ tdp_stats.reset();
+ fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports;
+ if ((core_params.rm_ty == CAMbased)) {
+ fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports;
+ } else if (core_params.rm_ty == RAMbased) {
+ fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports;
+ if (core_params.scheu_ty == ReservationStation) {
+ fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports;
+ }
}
+ rtp_stats.reset();
+ fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads;
+ fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
+ if (core_params.scheu_ty == ReservationStation &&
+ core_params.rm_ty == RAMbased) {
+ fFRAT->rtp_stats.searchAc.access =
+ core_stats.committed_fp_instructions;
+ }
+ fFRAT->power_t.reset();
+ fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access
+ * (fFRAT->local_result.power.readOp.dynamic
+ + fdcl->power.readOp.dynamic)
+ + fFRAT->tdp_stats.writeAc.access
+ * fFRAT->local_result.power.writeOp.dynamic
+ + fFRAT->tdp_stats.searchAc.access
+ * fFRAT->local_result.power.searchOp.dynamic;
+ fFRAT->power_t.readOp.leakage =
+ fFRAT->power.readOp.leakage * core_params.num_hthreads;
+ fFRAT->power_t.readOp.gate_leakage =
+ fFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ fFRAT->rt_power.reset();
+ fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access
+ * (fFRAT->local_result.power.readOp.dynamic
+ + fdcl->power.readOp.dynamic)
+ + fFRAT->rtp_stats.writeAc.access
+ * fFRAT->local_result.power.writeOp.dynamic
+ + fFRAT->rtp_stats.searchAc.access
+ * fFRAT->local_result.power.searchOp.dynamic;
+ }
+ output_data.reset();
+ if (iFRAT) {
+ iFRAT->output_data.peak_dynamic_power =
+ iFRAT->power_t.readOp.dynamic * clockRate;
+ iFRAT->output_data.subthreshold_leakage_power =
+ iFRAT->power_t.readOp.leakage;
+ iFRAT->output_data.gate_leakage_power =
+ iFRAT->power_t.readOp.gate_leakage;
+ iFRAT->output_data.runtime_dynamic_energy =
+ iFRAT->rt_power.readOp.dynamic;
+ output_data += iFRAT->output_data;
+ }
+ if (fFRAT) {
+ fFRAT->output_data.peak_dynamic_power =
+ fFRAT->power_t.readOp.dynamic * clockRate;
+ fFRAT->output_data.subthreshold_leakage_power =
+ fFRAT->power_t.readOp.leakage;
+ fFRAT->output_data.gate_leakage_power =
+ fFRAT->power_t.readOp.gate_leakage;
+ fFRAT->output_data.runtime_dynamic_energy =
+ fFRAT->rt_power.readOp.dynamic;
+ output_data += fFRAT->output_data;
+ }
+ if (iRRAT) {
+ iRRAT->output_data.peak_dynamic_power =
+ iRRAT->power_t.readOp.dynamic * clockRate;
+ iRRAT->output_data.subthreshold_leakage_power =
+ iRRAT->power_t.readOp.leakage;
+ iRRAT->output_data.gate_leakage_power =
+ iRRAT->power_t.readOp.gate_leakage;
+ iRRAT->output_data.runtime_dynamic_energy =
+ iRRAT->rt_power.readOp.dynamic;
+ output_data += iRRAT->output_data;
+ }
+ if (fRRAT) {
+ fRRAT->output_data.peak_dynamic_power =
+ fRRAT->power_t.readOp.dynamic * clockRate;
+ fRRAT->output_data.subthreshold_leakage_power =
+ fRRAT->power_t.readOp.leakage;
+ fRRAT->output_data.gate_leakage_power =
+ fRRAT->power_t.readOp.gate_leakage;
+ fRRAT->output_data.runtime_dynamic_energy =
+ fRRAT->rt_power.readOp.dynamic;
+ output_data += fRRAT->output_data;
+ }
+ if (ifreeL) {
+ ifreeL->output_data.peak_dynamic_power =
+ ifreeL->power_t.readOp.dynamic * clockRate;
+ ifreeL->output_data.subthreshold_leakage_power =
+ ifreeL->power_t.readOp.leakage;
+ ifreeL->output_data.gate_leakage_power =
+ ifreeL->power_t.readOp.gate_leakage;
+ ifreeL->output_data.runtime_dynamic_energy =
+ ifreeL->rt_power.readOp.dynamic;
+ output_data += ifreeL->output_data;
+ }
+ if (ffreeL) {
+ ffreeL->output_data.peak_dynamic_power =
+ ffreeL->power_t.readOp.dynamic * clockRate;
+ ffreeL->output_data.subthreshold_leakage_power =
+ ffreeL->power_t.readOp.leakage;
+ ffreeL->output_data.gate_leakage_power =
+ ffreeL->power_t.readOp.gate_leakage;
+ ffreeL->output_data.runtime_dynamic_energy =
+ ffreeL->rt_power.readOp.dynamic;
+ output_data += ffreeL->output_data;
+ }
+ if (idcl) {
+ idcl->output_data.peak_dynamic_power =
+ idcl->power_t.readOp.dynamic * clockRate;
+ idcl->output_data.subthreshold_leakage_power =
+ idcl->power_t.readOp.leakage;
+ idcl->output_data.gate_leakage_power =
+ idcl->power_t.readOp.gate_leakage;
+ idcl->output_data.runtime_dynamic_energy =
+ idcl->rt_power.readOp.dynamic;
+ output_data += idcl->output_data;
+ }
+ if (fdcl) {
+ fdcl->output_data.peak_dynamic_power =
+ fdcl->power_t.readOp.dynamic * clockRate;
+ fdcl->output_data.subthreshold_leakage_power =
+ fdcl->power_t.readOp.leakage;
+ fdcl->output_data.gate_leakage_power =
+ fdcl->power_t.readOp.gate_leakage;
+ fdcl->output_data.runtime_dynamic_energy =
+ fdcl->rt_power.readOp.dynamic;
+ output_data += fdcl->output_data;
+ }
+ if (RAHT) {
+ output_data += RAHT->output_data;
+ }
}
-void RENAMINGU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double pppm_t[4] = {1,1,1,1};
- if (is_tdp)
- {//init stats for Peak
- if (coredynp.core_ty==OOO){
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
- }
-
- iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports;
- iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports;
- iRRAT->tdp_stats = iRRAT->stats_t;
-
- fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports;
- fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports;
- fRRAT->tdp_stats = fRRAT->stats_t;
-
- ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;;
- ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
- ifreeL->tdp_stats = ifreeL->stats_t;
-
- ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports;
- ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports;
- ffreeL->tdp_stats = ffreeL->stats_t;
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
- }
- //Unified free list for both int and fp
- ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;
- ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
- ifreeL->tdp_stats = ifreeL->stats_t;
- }
- idcl->stats_t.readAc.access = coredynp.decodeW;
- fdcl->stats_t.readAc.access = coredynp.decodeW;
- idcl->tdp_stats = idcl->stats_t;
- fdcl->tdp_stats = fdcl->stats_t;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->stats_t.readAc.access = coredynp.decodeW;
- fdcl->stats_t.readAc.access = coredynp.decodeW;
- idcl->tdp_stats = idcl->stats_t;
- fdcl->tdp_stats = fdcl->stats_t;
- }
- }
+void RENAMINGU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- }
- else
- {//init stats for Runtime Dynamic (RTP)
- if (coredynp.core_ty==OOO){
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
-
- iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16
- iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iRRAT->rtp_stats = iRRAT->stats_t;
-
- fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16
- fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fRRAT->rtp_stats = fRRAT->stats_t;
-
- ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes;
- ifreeL->rtp_stats = ifreeL->stats_t;
-
- ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes;
- ffreeL->rtp_stats = ffreeL->stats_t;
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs.
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- //Unified free list for both int and fp since the ROB act as physcial registers
- ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads +
- XML->sys.core[ithCore].fp_rename_reads;
- ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes +
- XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group
- //are terminated early
- ifreeL->rtp_stats = ifreeL->stats_t;
- }
- idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads;
- fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes;
- idcl->rtp_stats = idcl->stats_t;
- fdcl->rtp_stats = fdcl->stats_t;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions;
- fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions;
- idcl->rtp_stats = idcl->stats_t;
- fdcl->rtp_stats = fdcl->stats_t;
- }
- }
+ McPATComponent::displayData(indent, plevel);
+ if (core_params.core_ty == OOO) {
+ iFRAT->displayData(indent + 4, plevel);
+ fFRAT->displayData(indent + 4, plevel);
+ ifreeL->displayData(indent + 4, plevel);
+
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ iRRAT->displayData(indent + 4, plevel);
+ fRRAT->displayData(indent + 4, plevel);
+ ffreeL->displayData(indent + 4, plevel);
}
- /* Compute engine */
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
-
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
-
- iRRAT->power_t.reset();
- fRRAT->power_t.reset();
- ifreeL->power_t.reset();
- ffreeL->power_t.reset();
-
- iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic
- +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic);
- fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic
- +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic);
- ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
- +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
- ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic
- +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic);
+ }
+ idcl->displayData(indent + 4, plevel);
+ fdcl->displayData(indent + 4, plevel);
+}
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
-
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic
- +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic
- +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic);
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
- ifreeL->power_t.reset();
- ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
- +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
- }
+void SchedulerU::computeEnergy() {
+ if (!exist) return;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->power_t.reset();
- fdcl->power_t.reset();
- set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
- idcl->power_t = idcl->power * pppm_t;
- set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
- fdcl->power_t = fdcl->power * pppm_t;
- }
+ double ROB_duty_cycle;
+ ROB_duty_cycle = 1;
- }
+ if (int_instruction_selection) {
+ int_instruction_selection->computeEnergy();
+ }
- //assign value to tpd and rtp
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
- power = power + (iFRAT->power + fFRAT->power)
- + (iRRAT->power + fRRAT->power)
- + (ifreeL->power + ffreeL->power);
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- power = power + (iFRAT->power + fFRAT->power)
- + ifreeL->power;
- }
- }
- else
- {
- power = power + idcl->power_t + fdcl->power_t;
- }
+ if (fp_instruction_selection) {
+ fp_instruction_selection->computeEnergy();
+ }
+ if (int_inst_window) {
+ int_inst_window->tdp_stats.reset();
+ int_inst_window->rtp_stats.reset();
+ int_inst_window->power_t.reset();
+ int_inst_window->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ int_inst_window->tdp_stats.readAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.writeAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.searchAc.access =
+ core_params.issueW * core_params.num_pipelines;
+
+ int_inst_window->power_t.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->tdp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->tdp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->tdp_stats.writeAc.access;
+
+ int_inst_window->rtp_stats.readAc.access =
+ core_stats.inst_window_reads;
+ int_inst_window->rtp_stats.writeAc.access =
+ core_stats.inst_window_writes;
+ int_inst_window->rtp_stats.searchAc.access =
+ core_stats.inst_window_wakeup_accesses;
+
+ int_inst_window->rt_power.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->rtp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->rtp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->rtp_stats.writeAc.access;
+ } else if (core_params.multithreaded) {
+ int_inst_window->tdp_stats.readAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.writeAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.searchAc.access =
+ core_params.issueW * core_params.num_pipelines;
+
+ int_inst_window->power_t.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->tdp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->tdp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->tdp_stats.writeAc.access;
+
+ int_inst_window->rtp_stats.readAc.access =
+ core_stats.int_instructions + core_stats.fp_instructions;
+ int_inst_window->rtp_stats.writeAc.access =
+ core_stats.int_instructions + core_stats.fp_instructions;
+ int_inst_window->rtp_stats.searchAc.access =
+ 2 * (core_stats.int_instructions + core_stats.fp_instructions);
+
+ int_inst_window->rt_power.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->rtp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->rtp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->rtp_stats.writeAc.access;
}
- else
- {
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
- + (iRRAT->rt_power + fRRAT->rt_power)
- + (ifreeL->rt_power + ffreeL->rt_power);
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
- + ifreeL->rt_power;
- }
- }
- else
- {
- rt_power = rt_power + idcl->power_t + fdcl->power_t;
- }
+ }
- }
-}
+ if (fp_inst_window) {
+ fp_inst_window->tdp_stats.reset();
+ fp_inst_window->tdp_stats.readAc.access =
+ fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines;
+ fp_inst_window->tdp_stats.writeAc.access =
+ fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines;
+ fp_inst_window->tdp_stats.searchAc.access =
+ fp_inst_window->l_ip.num_search_ports *
+ core_params.num_fp_pipelines;
+
+ fp_inst_window->rtp_stats.reset();
+ fp_inst_window->rtp_stats.readAc.access =
+ core_stats.fp_inst_window_reads;
+ fp_inst_window->rtp_stats.writeAc.access =
+ core_stats.fp_inst_window_writes;
+ fp_inst_window->rtp_stats.searchAc.access =
+ core_stats.fp_inst_window_wakeup_accesses;
+
+ fp_inst_window->power_t.reset();
+ fp_inst_window->power_t.readOp.dynamic +=
+ fp_inst_window->power.readOp.dynamic *
+ fp_inst_window->tdp_stats.readAc.access +
+ fp_inst_window->power.searchOp.dynamic *
+ fp_inst_window->tdp_stats.searchAc.access +
+ fp_inst_window->power.writeOp.dynamic *
+ fp_inst_window->tdp_stats.writeAc.access;
+
+ fp_inst_window->rt_power.reset();
+ fp_inst_window->rt_power.readOp.dynamic +=
+ fp_inst_window->power.readOp.dynamic *
+ fp_inst_window->rtp_stats.readAc.access +
+ fp_inst_window->power.searchOp.dynamic *
+ fp_inst_window->rtp_stats.searchAc.access +
+ fp_inst_window->power.writeOp.dynamic *
+ fp_inst_window->rtp_stats.writeAc.access;
+ }
-void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
-
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str<< "Int Front End RAT:" << endl;
- cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Front End RAT:" << endl;
- cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<<"Free List:" << endl;
- cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- cout << indent_str<< "Int Retire RAT: " << endl;
- cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Retire RAT:" << endl;
- cout << indent_str_next << "Area = " << fRRAT->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Free List:" << endl;
- cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str<< "Int DCL:" << endl;
- cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout << indent_str<<"FP DCL:" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
- }
- }
- else
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str_next << "Int Front End RAT Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Int Front End RAT Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Free List Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Free List Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- cout << indent_str_next << "Int Retire RAT Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Int Retire RAT Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Free List Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Free List Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
- else
- {
- cout << indent_str_next << "Int DCL Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int DCL Subthreshold Leakage = " << idcl->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP DCL Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ if (ROB) {
+ ROB->tdp_stats.reset();
+ ROB->tdp_stats.readAc.access = core_params.commitW *
+ core_params.num_pipelines * ROB_duty_cycle;
+ ROB->tdp_stats.writeAc.access = core_params.issueW *
+ core_params.num_pipelines * ROB_duty_cycle;
+ ROB->rtp_stats.reset();
+ ROB->rtp_stats.readAc.access = core_stats.ROB_reads;
+ ROB->rtp_stats.writeAc.access = core_stats.ROB_writes;
+ ROB->power_t.reset();
+ ROB->power_t.readOp.dynamic +=
+ ROB->local_result.power.readOp.dynamic *
+ ROB->tdp_stats.readAc.access +
+ ROB->local_result.power.writeOp.dynamic *
+ ROB->tdp_stats.writeAc.access;
+ ROB->rt_power.reset();
+ ROB->rt_power.readOp.dynamic +=
+ ROB->local_result.power.readOp.dynamic *
+ ROB->rtp_stats.readAc.access +
+ ROB->local_result.power.writeOp.dynamic *
+ ROB->rtp_stats.writeAc.access;
+ }
+
+ output_data.reset();
+ if (int_inst_window) {
+ int_inst_window->output_data.subthreshold_leakage_power =
+ int_inst_window->power_t.readOp.leakage;
+ int_inst_window->output_data.gate_leakage_power =
+ int_inst_window->power_t.readOp.gate_leakage;
+ int_inst_window->output_data.peak_dynamic_power =
+ int_inst_window->power_t.readOp.dynamic * clockRate;
+ int_inst_window->output_data.runtime_dynamic_energy =
+ int_inst_window->rt_power.readOp.dynamic;
+ output_data += int_inst_window->output_data;
+ }
+ if (fp_inst_window) {
+ fp_inst_window->output_data.subthreshold_leakage_power =
+ fp_inst_window->power_t.readOp.leakage;
+ fp_inst_window->output_data.gate_leakage_power =
+ fp_inst_window->power_t.readOp.gate_leakage;
+ fp_inst_window->output_data.peak_dynamic_power =
+ fp_inst_window->power_t.readOp.dynamic * clockRate;
+ fp_inst_window->output_data.runtime_dynamic_energy =
+ fp_inst_window->rt_power.readOp.dynamic;
+ output_data += fp_inst_window->output_data;
+ }
+ if (ROB) {
+ ROB->output_data.peak_dynamic_power =
+ ROB->power_t.readOp.dynamic * clockRate;
+ ROB->output_data.runtime_dynamic_energy =
+ ROB->rt_power.readOp.dynamic;
+ output_data += ROB->output_data;
+ }
+ // Integer and FP instruction selection logic is not included in the
+ // roll-up due to the uninitialized area
+ /*
+ if (int_instruction_selection) {
+ output_data += int_instruction_selection->output_data;
+ }
+ if (fp_instruction_selection) {
+ output_data += fp_instruction_selection->output_data;
+ }
+ */
}
+void SchedulerU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
-void SchedulerU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double ROB_duty_cycle;
-// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
-// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
-// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1;
- ROB_duty_cycle = 1;
- //init stats
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
- int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
- int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
- int_inst_window->tdp_stats = int_inst_window->stats_t;
- fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines;
- fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines;
- fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines;
- fp_inst_window->tdp_stats = fp_inst_window->stats_t;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle;
- ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle;
- ROB->tdp_stats = ROB->stats_t;
-
- /*
- * When inst commits, ROB must be read.
- * Because for Physcial register based cores, physical register tag in ROB
- * need to be read out and write into RRAT/CAM based RAT.
- * For RS based cores, register content that stored in ROB must be
- * read out and stored in architectural registers.
- *
- * if no-register is involved, the ROB read out operation when instruction commits can be ignored.
- * assuming 20% insts. belong this type.
- * TODO: ROB duty_cycle need to be revisited
- */
- }
+ McPATComponent::displayData(indent, plevel);
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
- int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
- int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
- int_inst_window->tdp_stats = int_inst_window->stats_t;
- }
+ if (core_params.core_ty == OOO) {
+ int_inst_window->displayData(indent + 4, plevel);
+ fp_inst_window->displayData(indent + 4, plevel);
+ if (core_params.ROB_size > 0) {
+ ROB->displayData(indent + 4, plevel);
+ }
+ } else if (core_params.multithreaded) {
+ int_inst_window->displayData(indent + 4, plevel);
+ }
- }
- else
- {//rtp
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads;
- int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes;
- int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses;
- int_inst_window->rtp_stats = int_inst_window->stats_t;
- fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads;
- fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes;
- fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses;
- fp_inst_window->rtp_stats = fp_inst_window->stats_t;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
-
- ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads;
- ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes;
- /* ROB need to be updated in RS based OOO when new values are produced,
- * this update may happen before the commit stage when ROB entry is released
- * 1. ROB write at instruction inserted in
- * 2. ROB write as results produced (for RS based OOO only)
- * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF
- * For Physical reg based OOO, no data stored in ROB, but register tags need to be
- * read out and used to set the RRAT and to recycle the register tag to free list buffer
- */
- ROB->rtp_stats = ROB->stats_t;
- }
+ // Integer and FP instruction selection logic is not included in the
+ // roll-up due to the uninitialized area
+ /*
+ if (int_instruction_selection) {
+ int_instruction_selection->displayData(indent + 4, plevel);
+ }
+ if (fp_instruction_selection) {
+ fp_instruction_selection->displayData(indent + 4, plevel);
+ }
+ */
+}
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
- int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
- int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions);
- int_inst_window->rtp_stats = int_inst_window->stats_t;
- }
+void LoadStoreU::computeEnergy() {
+ if (!exist) return;
+
+ LSQ->tdp_stats.reset();
+ LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LSQ->rtp_stats.reset();
+ // Flush overhead conidered
+ LSQ->rtp_stats.readAc.access = (core_stats.load_instructions +
+ core_stats.store_instructions) * 2;
+ LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions +
+ core_stats.store_instructions) * 2;
+ LSQ->power_t.reset();
+ //every memory access invloves at least two operations on LSQ
+ LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access *
+ (LSQ->local_result.power.searchOp.dynamic +
+ LSQ->local_result.power.readOp.dynamic) +
+ LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
+ LSQ->rt_power.reset();
+ //every memory access invloves at least two operations on LSQ
+ LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access *
+ (LSQ->local_result.power.searchOp.dynamic +
+ LSQ->local_result.power.readOp.dynamic) +
+ LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
+
+ if (LoadQ) {
+ LoadQ->tdp_stats.reset();
+ LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LoadQ->rtp_stats.reset();
+ LoadQ->rtp_stats.readAc.access = core_stats.load_instructions +
+ core_stats.store_instructions;
+ LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions +
+ core_stats.store_instructions;
+ LoadQ->power_t.reset();
+ //every memory access invloves at least two operations on LoadQ
+ LoadQ->power_t.readOp.dynamic +=
+ LoadQ->tdp_stats.readAc.access *
+ (LoadQ->local_result.power.searchOp.dynamic +
+ LoadQ->local_result.power.readOp.dynamic) +
+ LoadQ->tdp_stats.writeAc.access *
+ LoadQ->local_result.power.writeOp.dynamic;
+ LoadQ->rt_power.reset();
+ //every memory access invloves at least two operations on LoadQ
+ LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access *
+ (LoadQ->local_result.power.searchOp.dynamic +
+ LoadQ->local_result.power.readOp.dynamic) +
+ LoadQ->rtp_stats.writeAc.access *
+ LoadQ->local_result.power.writeOp.dynamic;
}
- //computation engine
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->power_t.reset();
- fp_inst_window->power_t.reset();
-
- /* each instruction needs to write to scheduler, read out when all resources and source operands are ready
- * two search ops with one for each source operand
- *
- */
- int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
- + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
- + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
- + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic;
-
- fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access
- + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access
- + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access
- + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->power_t.reset();
- ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access +
- ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic;
- }
+ McPATComponent::computeEnergy();
+
+ output_data.reset();
+ if (dcache) {
+ output_data += dcache->output_data;
+ }
+ if (LSQ) {
+ LSQ->output_data.peak_dynamic_power =
+ LSQ->power_t.readOp.dynamic * clockRate;
+ LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic;
+ output_data += LSQ->output_data;
+ }
+ if (LoadQ) {
+ LoadQ->output_data.peak_dynamic_power =
+ LoadQ->power_t.readOp.dynamic * clockRate;
+ LoadQ->output_data.runtime_dynamic_energy =
+ LoadQ->rt_power.readOp.dynamic;
+ output_data += LoadQ->output_data;
+ }
+}
+void LoadStoreU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
+ McPATComponent::displayData(indent, plevel);
+ if (LoadQ) {
+ LoadQ->displayData(indent + 4, plevel);
+ }
+ LSQ->displayData(indent + 4, plevel);
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->power_t.reset();
- int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
- + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
- + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
- + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
- }
+}
- //assign values
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- power = power + int_inst_window->power + fp_inst_window->power;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg;
- power = power + ROB->power;
- }
+void MemManU::computeEnergy() {
+ if (!exist) return;
- }
- else if (coredynp.multithreaded)
- {
- // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
- int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- power = power + int_inst_window->power;
- }
+ itlb->tdp_stats.reset();
+ itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports;
+ itlb->tdp_stats.readAc.miss = 0;
+ itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access -
+ itlb->tdp_stats.readAc.miss;
+ itlb->rtp_stats.reset();
+ itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses;
+ itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses;
- }
- else
- {//rtp
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg;
- rt_power = rt_power + ROB->rt_power;
- }
+ itlb->power_t.reset();
+ //FA spent most power in tag, so use total access not hits
+ itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access *
+ itlb->local_result.power.searchOp.dynamic +
+ itlb->tdp_stats.readAc.miss *
+ itlb->local_result.power.writeOp.dynamic;
+ itlb->rt_power.reset();
+ //FA spent most power in tag, so use total access not hits
+ itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access *
+ itlb->local_result.power.searchOp.dynamic +
+ itlb->rtp_stats.writeAc.access *
+ itlb->local_result.power.writeOp.dynamic;
+
+ dtlb->tdp_stats.reset();
+ dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ dtlb->tdp_stats.readAc.miss = 0;
+ dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access -
+ dtlb->tdp_stats.readAc.miss;
+ dtlb->rtp_stats.reset();
+ dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses +
+ mem_man_stats.dtlb_write_misses;
+ dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses +
+ mem_man_stats.dtlb_read_misses;
- }
- else if (coredynp.multithreaded)
- {
- // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
- int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- rt_power = rt_power + int_inst_window->rt_power;
- }
+ dtlb->power_t.reset();
+ //FA spent most power in tag, so use total access not hits
+ dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access *
+ dtlb->local_result.power.searchOp.dynamic +
+ dtlb->tdp_stats.readAc.miss *
+ dtlb->local_result.power.writeOp.dynamic;
+ dtlb->rt_power.reset();
+ //FA spent most power in tag, so use total access not hits
+ dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access *
+ dtlb->local_result.power.searchOp.dynamic +
+ dtlb->rtp_stats.writeAc.access *
+ dtlb->local_result.power.writeOp.dynamic;
+
+ output_data.reset();
+ if (itlb) {
+ itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic *
+ clockRate;
+ itlb->output_data.runtime_dynamic_energy =
+ itlb->rt_power.readOp.dynamic;
+ output_data += itlb->output_data;
+ }
+ if (dtlb) {
+ dtlb->output_data.peak_dynamic_power =
+ dtlb->power_t.readOp.dynamic * clockRate;
+ dtlb->output_data.runtime_dynamic_energy =
+ dtlb->rt_power.readOp.dynamic;
+ output_data += dtlb->output_data;
}
-// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
-// cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl;
-// cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access +
-// + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl;
-// cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl;
}
-void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str << "Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "FP Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- cout << indent_str<<"ROB:" << endl;
- cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else if (coredynp.multithreaded)
- {
- cout << indent_str << "Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
- else if (coredynp.multithreaded)
- {
- cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+void MemManU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
+
+ McPATComponent::displayData(indent, plevel);
+ itlb->displayData(indent + 4, plevel);
+ dtlb->displayData(indent + 4, plevel);
}
-void LoadStoreU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
- dcache.caches->stats_t.readAc.miss = 0;
- dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
- dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
- dcache.caches->stats_t.writeAc.miss = 0;
- dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
- dcache.caches->tdp_stats = dcache.caches->stats_t;
-
- dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports;
- dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports;
- dcache.missb->tdp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.ifb->tdp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports;
- dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t;
- if (cache_p==Write_back)
- {
- dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports;
- dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports;
- dcache.wbb->tdp_stats = dcache.wbb->stats_t;
- }
+void RegFU::computeEnergy() {
+ /*
+ * Architecture RF and physical RF cannot be present at the same time.
+ * Therefore, the RF stats can only refer to either ARF or PRF;
+ * And the same stats can be used for both.
+ */
+ if (!exist) return;
+
+ IRF->tdp_stats.reset();
+ IRF->tdp_stats.readAc.access =
+ core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS *
+ (core_stats.ALU_duty_cycle * 1.1 +
+ (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
+ core_params.num_pipelines;
+ IRF->tdp_stats.writeAc.access =
+ core_params.issueW *
+ (core_stats.ALU_duty_cycle * 1.1 +
+ (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
+ core_params.num_pipelines;
+ IRF->rtp_stats.reset();
+ IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads;
+ IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes;
+ if (core_params.regWindowing) {
+ IRF->rtp_stats.readAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ IRF->rtp_stats.writeAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ }
+ IRF->power_t.reset();
+ IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access *
+ IRF->local_result.power.readOp.dynamic +
+ IRF->tdp_stats.writeAc.access *
+ IRF->local_result.power.writeOp.dynamic;
+ IRF->rt_power.reset();
+ IRF->rt_power.readOp.dynamic +=
+ IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic +
+ IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic;
+
+ FRF->tdp_stats.reset();
+ FRF->tdp_stats.readAc.access =
+ FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 *
+ core_params.num_fp_pipelines;
+ FRF->tdp_stats.writeAc.access =
+ FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 *
+ core_params.num_fp_pipelines;
+ FRF->rtp_stats.reset();
+ FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads;
+ FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes;
+ if (core_params.regWindowing) {
+ FRF->rtp_stats.readAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ FRF->rtp_stats.writeAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ }
+ FRF->power_t.reset();
+ FRF->power_t.readOp.dynamic +=
+ FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
+ FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
+ FRF->rt_power.reset();
+ FRF->rt_power.readOp.dynamic +=
+ FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
+ FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
+
+ if (core_params.regWindowing) {
+ RFWIN->tdp_stats.reset();
+ RFWIN->tdp_stats.readAc.access = 0;
+ RFWIN->tdp_stats.writeAc.access = 0;
+ RFWIN->rtp_stats.reset();
+ RFWIN->rtp_stats.readAc.access =
+ core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
+ RFWIN->rtp_stats.writeAc.access =
+ core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
+ RFWIN->power_t.reset();
+ RFWIN->power_t.readOp.dynamic +=
+ RFWIN->tdp_stats.readAc.access *
+ RFWIN->local_result.power.readOp.dynamic +
+ RFWIN->tdp_stats.writeAc.access *
+ RFWIN->local_result.power.writeOp.dynamic;
+ RFWIN->rt_power.reset();
+ RFWIN->rt_power.readOp.dynamic +=
+ RFWIN->rtp_stats.readAc.access *
+ RFWIN->local_result.power.readOp.dynamic +
+ RFWIN->rtp_stats.writeAc.access *
+ RFWIN->local_result.power.writeOp.dynamic;
+ }
- LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- LSQ->tdp_stats = LSQ->stats_t;
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- LoadQ->tdp_stats = LoadQ->stats_t;
- }
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses;
- dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses;
- dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
- dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses;
- dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses;
- dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
- dcache.caches->rtp_stats = dcache.caches->stats_t;
-
- if (cache_p==Write_back)
- {
- dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.missb->rtp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.ifb->rtp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
-
- dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.wbb->rtp_stats = dcache.wbb->stats_t;
- }
- else
- {
- dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.missb->rtp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.ifb->rtp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
- }
+ output_data.reset();
+ if (IRF) {
+ IRF->output_data.peak_dynamic_power =
+ IRF->power_t.readOp.dynamic * clockRate;
+ IRF->output_data.subthreshold_leakage_power *=
+ core_params.num_hthreads;
+ IRF->output_data.gate_leakage_power *= core_params.num_hthreads;
+ IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic;
+ output_data += IRF->output_data;
+ }
+ if (FRF) {
+ FRF->output_data.peak_dynamic_power =
+ FRF->power_t.readOp.dynamic * clockRate;
+ FRF->output_data.subthreshold_leakage_power *=
+ core_params.num_hthreads;
+ FRF->output_data.gate_leakage_power *= core_params.num_hthreads;
+ FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic;
+ output_data += FRF->output_data;
+ }
+ if (RFWIN) {
+ RFWIN->output_data.peak_dynamic_power =
+ RFWIN->power_t.readOp.dynamic * clockRate;
+ RFWIN->output_data.runtime_dynamic_energy =
+ RFWIN->rt_power.readOp.dynamic;
+ output_data += RFWIN->output_data;
+ }
+}
- LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered
- LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;
- LSQ->rtp_stats = LSQ->stats_t;
+void RegFU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
- LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
- LoadQ->rtp_stats = LoadQ->stats_t;
- }
+ McPATComponent::displayData(indent, plevel);
- }
+ IRF->displayData(indent + 4, plevel);
+ FRF->displayData(indent + 4, plevel);
+ if (core_params.regWindowing) {
+ RFWIN->displayData(indent + 4, plevel);
+ }
+}
- dcache.power_t.reset();
- LSQ->power_t.reset();
- dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+
- dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+
- dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+
- dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic);
+void EXECU::computeEnergy() {
+ if (!exist) return;
- if (cache_p==Write_back)
- {//write miss will generate a write later
- dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic;
- }
+ int_bypass->set_params_stats(core_params.execu_int_bypass_ports,
+ core_stats.ALU_cdb_duty_cycle,
+ core_stats.cdb_alu_accesses);
- dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic +
- dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic +
- dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic;
- dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic +
- dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic;
- if (cache_p==Write_back)
- {
- dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic
- + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic;
- }
+ intTagBypass->set_params_stats(core_params.execu_int_bypass_ports,
+ core_stats.ALU_cdb_duty_cycle,
+ core_stats.cdb_alu_accesses);
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->power_t.reset();
- LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+
- LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ
-
- LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
- + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
-
- }
- else
- {
- LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
- + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
-
- }
-
- if (is_tdp)
- {
-// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg +
-// (dcache.missb->local_result.power +
-// dcache.ifb->local_result.power +
-// dcache.prefetchb->local_result.power +
-// dcache.wbb->local_result.power)*pppm_Isub;
- dcache.power = dcache.power_t + (dcache.caches->local_result.power +
- dcache.missb->local_result.power +
- dcache.ifb->local_result.power +
- dcache.prefetchb->local_result.power) *pppm_lkg;
- if (cache_p==Write_back)
- {
- dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg;
- }
+ if (core_params.num_muls > 0) {
+ int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports,
+ core_stats.MUL_cdb_duty_cycle,
+ core_stats.cdb_mul_accesses);
- LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
- power = power + dcache.power + LSQ->power;
+ intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports,
+ core_stats.MUL_cdb_duty_cycle,
+ core_stats.cdb_mul_accesses);
+ }
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
- power = power + LoadQ->power;
- }
+ if (core_params.num_fpus > 0) {
+ fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports,
+ core_stats.FPU_cdb_duty_cycle,
+ core_stats.cdb_fpu_accesses);
+
+ fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports,
+ core_stats.FPU_cdb_duty_cycle,
+ core_stats.cdb_fpu_accesses);
}
- else
- {
-// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
-// dcache.missb->local_result.power +
-// dcache.ifb->local_result.power +
-// dcache.prefetchb->local_result.power +
-// dcache.wbb->local_result.power)*pppm_lkg;
- dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
- dcache.missb->local_result.power +
- dcache.ifb->local_result.power +
- dcache.prefetchb->local_result.power )*pppm_lkg;
-
- if (cache_p==Write_back)
- {
- dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg;
- }
- LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
- rt_power = rt_power + dcache.rt_power + LSQ->rt_power;
+ McPATComponent::computeEnergy();
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
- rt_power = rt_power + LoadQ->rt_power;
- }
+ if (rfu) {
+ rfu->computeEnergy();
+ output_data += rfu->output_data;
+ }
+ if (scheu) {
+ scheu->computeEnergy();
+ output_data += scheu->output_data;
+ }
+ if (fp_u) {
+ fp_u->computeEnergy();
+ output_data += fp_u->output_data;
+ }
+ if (exeu) {
+ exeu->computeEnergy();
+ output_data += exeu->output_data;
+ }
+ if (mul) {
+ mul->computeEnergy();
+ output_data += mul->output_data;
}
}
+void EXECU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
-void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
- cout << indent_str << "Data Cache:" << endl;
- cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.core_ty==Inorder)
- {
- cout << indent_str << "Load/Store Queue:" << endl;
- cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
-
- {
- if (XML->sys.core[ithCore].load_buffer_size >0)
- {
- cout << indent_str << "LoadQ:" << endl;
- cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- cout << indent_str<< "StoreQ:" << endl;
- cout << indent_str_next << "Area = " << LSQ->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str_next << "Data Cache Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Data Cache Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Data Cache Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.core_ty==Inorder)
- {
- cout << indent_str_next << "Load/Store Queue Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Load/Store Queue Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
- }
- else
- {
- cout << indent_str_next << "LoadQ Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "LoadQ Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "StoreQ Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ McPATComponent::displayData(indent, plevel);
+ rfu->displayData(indent + 4, plevel);
+ if (scheu) {
+ scheu->displayData(indent + 4, plevel);
+ }
+ exeu->displayData(indent + 4, plevel);
+ if (core_params.num_fpus > 0) {
+ fp_u->displayData(indent + 4, plevel);
+ }
+ if (core_params.num_muls > 0) {
+ mul->displayData(indent + 4, plevel);
+ }
}
-void MemManU::computeEnergy(bool is_tdp)
-{
-
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports;
- itlb->stats_t.readAc.miss = 0;
- itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
- itlb->tdp_stats = itlb->stats_t;
-
- dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- dtlb->stats_t.readAc.miss = 0;
- dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
- dtlb->tdp_stats = dtlb->stats_t;
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses;
- itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses;
- itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
- itlb->rtp_stats = itlb->stats_t;
-
- dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses;
- dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses;
- dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
- dtlb->rtp_stats = dtlb->stats_t;
+void Core::computeEnergy() {
+ ifu->computeEnergy();
+ lsu->computeEnergy();
+ mmu->computeEnergy();
+ exu->computeEnergy();
+ if (core_params.core_ty == OOO) {
+ rnu->computeEnergy();
}
- itlb->power_t.reset();
- dtlb->power_t.reset();
- itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
- +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic;
- dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
- +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg;
- dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
- power = power + itlb->power + dtlb->power;
- }
- else
- {
- itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg;
- dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
- rt_power = rt_power + itlb->rt_power + dtlb->rt_power;
- }
+ output_data.reset();
+ if (ifu) {
+ output_data += ifu->output_data;
+ }
+ if (lsu) {
+ output_data += lsu->output_data;
+ }
+ if (mmu) {
+ output_data += mmu->output_data;
+ }
+ if (exu) {
+ output_data += exu->output_data;
+ }
+ if (rnu) {
+ output_data += rnu->output_data;
+ }
+ if (corepipe) {
+ output_data += corepipe->output_data;
+ }
+ if (undiffCore) {
+ output_data += undiffCore->output_data;
+ }
+ if (l2cache) {
+ output_data += l2cache->output_data;
+ }
}
-void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
-
-
- if (is_tdp)
- {
- cout << indent_str << "Itlb:" << endl;
- cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Dtlb:" << endl;
- cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str_next << "Itlb Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Dtlb Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage << " W" << endl;
- }
-
-}
+InstFetchU ::~InstFetchU() {
-void RegFU::computeEnergy(bool is_tdp)
-{
-/*
- * Architecture RF and physical RF cannot be present at the same time.
- * Therefore, the RF stats can only refer to either ARF or PRF;
- * And the same stats can be used for both.
- */
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+
- (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
- IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+
- (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
- //Rule of Thumb: about 10% RF related instructions do not need to access ALUs
- IRF->tdp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
- FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
- FRF->tdp_stats = FRF->stats_t;
- if (coredynp.regWindowing)
- {
- RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
- RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
- RFWIN->tdp_stats = RFWIN->stats_t;
- }
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy
- IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes;
- IRF->rtp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads;
- FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes;
- FRF->rtp_stats = FRF->stats_t;
- if (coredynp.regWindowing)
- {
- RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16;
- RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16;
- RFWIN->rtp_stats = RFWIN->stats_t;
-
- IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads +
- XML->sys.core[ithCore].function_calls*16;
- IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes +
- XML->sys.core[ithCore].function_calls*16;
- IRF->rtp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads +
- XML->sys.core[ithCore].function_calls*16;;
- FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+
- XML->sys.core[ithCore].function_calls*16;;
- FRF->rtp_stats = FRF->stats_t;
- }
+ if (!exist) return;
+ if (IB) {
+ delete IB;
+ IB = NULL;
}
- IRF->power_t.reset();
- FRF->power_t.reset();
- IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic
- +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic);
- FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic
- +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic);
- if (coredynp.regWindowing)
- {
- RFWIN->power_t.reset();
- RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic +
- RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic);
- }
-
- if (is_tdp)
- {
- IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
- FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
- power = power + (IRF->power + FRF->power);
- if (coredynp.regWindowing)
- {
- RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
- power = power + RFWIN->power;
- }
+ if (ID_inst) {
+ delete ID_inst;
+ ID_inst = NULL;
+ }
+ if (ID_operand) {
+ delete ID_operand;
+ ID_operand = NULL;
+ }
+ if (ID_misc) {
+ delete ID_misc;
+ ID_misc = NULL;
+ }
+ if (core_params.predictionW > 0) {
+ if (BTB) {
+ delete BTB;
+ BTB = NULL;
}
- else
- {
- IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
- FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (IRF->power_t + FRF->power_t);
- if (coredynp.regWindowing)
- {
- RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
- rt_power = rt_power + RFWIN->rt_power;
- }
+ if (BPT) {
+ delete BPT;
+ BPT = NULL;
}
+ }
+ if (icache) {
+ delete icache;
+ }
}
+BranchPredictor ::~BranchPredictor() {
-void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- { cout << indent_str << "Integer RF:" << endl;
- cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Floating Point RF:" << endl;
- cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.regWindowing)
- {
- cout << indent_str << "Register Windows:" << endl;
- cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str_next << "Integer RF Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Integer RF Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Floating Point RF Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " << FRF->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Floating Point RF Gate Leakage = " << FRF->rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.regWindowing)
- {
- cout << indent_str_next << "Register Windows Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Register Windows Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Register Windows Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ if (!exist) return;
+ if (globalBPT) {
+ delete globalBPT;
+ globalBPT = NULL;
+ }
+ if (localBPT) {
+ delete localBPT;
+ localBPT = NULL;
+ }
+ if (L1_localBPT) {
+ delete L1_localBPT;
+ L1_localBPT = NULL;
+ }
+ if (L2_localBPT) {
+ delete L2_localBPT;
+ L2_localBPT = NULL;
+ }
+ if (chooser) {
+ delete chooser;
+ chooser = NULL;
+ }
+ if (RAS) {
+ delete RAS;
+ RAS = NULL;
+ }
}
+RENAMINGU ::~RENAMINGU() {
-void EXECU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double pppm_t[4] = {1,1,1,1};
-// rfu->power.reset();
-// rfu->rt_power.reset();
-// scheu->power.reset();
-// scheu->rt_power.reset();
-// exeu->power.reset();
-// exeu->rt_power.reset();
-
- rfu->computeEnergy(is_tdp);
- scheu->computeEnergy(is_tdp);
- exeu->computeEnergy(is_tdp);
- if (coredynp.num_fpus >0)
- {
- fp_u->computeEnergy(is_tdp);
- }
- if (coredynp.num_muls >0)
- {
- mul->computeEnergy(is_tdp);
- }
-
- if (is_tdp)
- {
- set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
- bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t;
- if (coredynp.num_muls >0)
- {
- set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
- bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
- power = power + mul->power;
- }
- if (coredynp.num_fpus>0)
- {
- set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction.
- bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ;
- power = power + fp_u->power;
- }
-
- power = power + rfu->power + exeu->power + bypass.power + scheu->power;
- }
- else
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses);
- bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t;
- bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t;
-
- if (coredynp.num_muls >0)
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction.
- bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
- rt_power = rt_power + mul->rt_power;
- }
-
- if (coredynp.num_fpus>0)
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses);
- bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t;
- bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t;
- rt_power = rt_power + fp_u->rt_power;
- }
- rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power;
- }
+ if (!exist) return;
+ if (iFRAT) {
+ delete iFRAT;
+ iFRAT = NULL;
+ }
+ if (fFRAT) {
+ delete fFRAT;
+ fFRAT = NULL;
+ }
+ if (iRRAT) {
+ delete iRRAT;
+ iRRAT = NULL;
+ }
+ if (iFRAT) {
+ delete iFRAT;
+ iFRAT = NULL;
+ }
+ if (ifreeL) {
+ delete ifreeL;
+ ifreeL = NULL;
+ }
+ if (ffreeL) {
+ delete ffreeL;
+ ffreeL = NULL;
+ }
+ if (idcl) {
+ delete idcl;
+ idcl = NULL;
+ }
+ if (fdcl) {
+ delete fdcl;
+ fdcl = NULL;
+ }
+ if (RAHT) {
+ delete RAHT;
+ RAHT = NULL;
+ }
}
-void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
-// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
- if (is_tdp)
- {
- cout << indent_str << "Register Files:" << endl;
- cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3){
- rfu->displayEnergy(indent+4,is_tdp);
- }
- cout << indent_str << "Instruction Scheduler:" << endl;
- cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3){
- scheu->displayEnergy(indent+4,is_tdp);
- }
- exeu->displayEnergy(indent,is_tdp);
- if (coredynp.num_fpus>0)
- {
- fp_u->displayEnergy(indent,is_tdp);
- }
- if (coredynp.num_muls >0)
- {
- mul->displayEnergy(indent,is_tdp);
- }
- cout << indent_str << "Results Broadcast Bus:" << endl;
- cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str_next << "Register Files Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Register Files Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Register Files Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " << scheu->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Gate Leakage = " << scheu->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " << bypass.rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " << bypass.rt_power.readOp.gate_leakage << " W" << endl;
- }
+LoadStoreU ::~LoadStoreU() {
+ if (!exist) return;
+ if (LSQ) {
+ delete LSQ;
+ LSQ = NULL;
+ }
+ if (dcache) {
+ delete dcache;
+ dcache = NULL;
+ }
}
-void Core::computeEnergy(bool is_tdp)
-{
- //power_point_product_masks
- double pppm_t[4] = {1,1,1,1};
- double rtp_pipeline_coe;
- double num_units = 4.0;
- if (is_tdp)
- {
- ifu->computeEnergy(is_tdp);
- lsu->computeEnergy(is_tdp);
- mmu->computeEnergy(is_tdp);
- exu->computeEnergy(is_tdp);
-
- if (coredynp.core_ty==OOO)
- {
- num_units = 5.0;
- rnu->computeEnergy(is_tdp);
- set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- if (rnu->exist)
- {
- rnu->power = rnu->power + corepipe->power*pppm_t;
- power = power + rnu->power;
- }
- }
-
- if (ifu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
-// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
- ifu->power = ifu->power + corepipe->power*pppm_t;
-// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
-// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl;
- power = power + ifu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (lsu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- lsu->power = lsu->power + corepipe->power*pppm_t;
-// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + lsu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (exu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- exu->power = exu->power + corepipe->power*pppm_t;
-// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + exu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (mmu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- mmu->power = mmu->power + corepipe->power*pppm_t;
-// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + mmu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
+MemManU ::~MemManU() {
- power = power + undiffCore->power;
+ if (!exist) return;
+ if (itlb) {
+ delete itlb;
+ itlb = NULL;
+ }
+ if (dtlb) {
+ delete dtlb;
+ dtlb = NULL;
+ }
+}
- if (XML->sys.Private_L2)
- {
+RegFU ::~RegFU() {
- l2cache->computeEnergy(is_tdp);
- set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1);
- //l2cache->power = l2cache->power*pppm_t;
- power = power + l2cache->power*pppm_t;
- }
- }
- else
- {
- ifu->computeEnergy(is_tdp);
- lsu->computeEnergy(is_tdp);
- mmu->computeEnergy(is_tdp);
- exu->computeEnergy(is_tdp);
- if (coredynp.core_ty==OOO)
- {
- num_units = 5.0;
- rnu->computeEnergy(is_tdp);
- set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- if (rnu->exist)
- {
- rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t;
-
- rt_power = rt_power + rnu->rt_power;
- }
- }
- else
- {
- if (XML->sys.homogeneous_cores==1)
- {
- rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
- }
- else
- {
- rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
- }
- set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- }
+ if (!exist) return;
+ if (IRF) {
+ delete IRF;
+ IRF = NULL;
+ }
+ if (FRF) {
+ delete FRF;
+ FRF = NULL;
+ }
+ if (RFWIN) {
+ delete RFWIN;
+ RFWIN = NULL;
+ }
+}
- if (ifu->exist)
- {
- ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + ifu->rt_power ;
- }
- if (lsu->exist)
- {
- lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + lsu->rt_power;
- }
- if (exu->exist)
- {
- exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + exu->rt_power;
- }
- if (mmu->exist)
- {
- mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + mmu->rt_power ;
- }
+SchedulerU ::~SchedulerU() {
- rt_power = rt_power + undiffCore->power;
-// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- if (XML->sys.Private_L2)
- {
+ if (!exist) return;
+ if (int_inst_window) {
+ delete int_inst_window;
+ int_inst_window = NULL;
+ }
+ if (fp_inst_window) {
+ delete int_inst_window;
+ int_inst_window = NULL;
+ }
+ if (ROB) {
+ delete ROB;
+ ROB = NULL;
+ }
+ if (int_instruction_selection) {
+ delete int_instruction_selection;
+ int_instruction_selection = NULL;
+ }
+ if (fp_instruction_selection) {
+ delete fp_instruction_selection;
+ fp_instruction_selection = NULL;
+ }
+}
- l2cache->computeEnergy(is_tdp);
- //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1);
- //l2cache->rt_power = l2cache->rt_power*pppm_t;
- rt_power = rt_power + l2cache->rt_power;
- }
- }
+EXECU ::~EXECU() {
+ if (!exist) return;
+ if (int_bypass) {
+ delete int_bypass;
+ int_bypass = NULL;
+ }
+ if (intTagBypass) {
+ delete intTagBypass;
+ intTagBypass = NULL;
+ }
+ if (int_mul_bypass) {
+ delete int_mul_bypass;
+ int_mul_bypass = NULL;
+ }
+ if (intTag_mul_Bypass) {
+ delete intTag_mul_Bypass;
+ intTag_mul_Bypass = NULL;
+ }
+ if (fp_bypass) {
+ delete fp_bypass;
+ fp_bypass = NULL;
+ }
+ if (fpTagBypass) {
+ delete fpTagBypass;
+ fpTagBypass = NULL;
+ }
+ if (fp_u) {
+ delete fp_u;
+ fp_u = NULL;
+ }
+ if (exeu) {
+ delete exeu;
+ exeu = NULL;
+ }
+ if (mul) {
+ delete mul;
+ mul = NULL;
+ }
+ if (rfu) {
+ delete rfu;
+ rfu = NULL;
+ }
+ if (scheu) {
+ delete scheu;
+ scheu = NULL;
+ }
}
-void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- if (is_tdp)
- {
- cout << "Core:" << endl;
- cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str << "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout<<endl;
- if (ifu->exist)
- {
- cout << indent_str << "Instruction Fetch Unit:" << endl;
- cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- ifu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
- cout << indent_str<< "Renaming Unit:" << endl;
- cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- rnu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
+Core::~Core() {
- }
- if (lsu->exist)
- {
- cout << indent_str<< "Load Store Unit:" << endl;
- cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- lsu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (mmu->exist)
- {
- cout << indent_str<< "Memory Management Unit:" << endl;
- cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- mmu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (exu->exist)
- {
- cout << indent_str<< "Execution Unit:" << endl;
- cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- exu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
-// if (plevel >2)
-// {
-// if (undiffCore->exist)
-// {
-// cout << indent_str << "Undifferentiated Core" << endl;
-// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl;
-// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl;
-//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Subthreshold Leakage = "
-// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl;
-// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl;
-// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl;
-// cout <<endl;
-// }
-// }
- if (XML->sys.Private_L2)
- {
-
- l2cache->displayEnergy(4,is_tdp);
- }
-
- }
- else
- {
-// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
- }
-}
-InstFetchU ::~InstFetchU(){
-
- if (!exist) return;
- if(IB) {delete IB; IB = 0;}
- if(ID_inst) {delete ID_inst; ID_inst = 0;}
- if(ID_operand) {delete ID_operand; ID_operand = 0;}
- if(ID_misc) {delete ID_misc; ID_misc = 0;}
- if (coredynp.predictionW>0)
- {
- if(BTB) {delete BTB; BTB = 0;}
- if(BPT) {delete BPT; BPT = 0;}
- }
+ if (ifu) {
+ delete ifu;
+ ifu = NULL;
+ }
+ if (lsu) {
+ delete lsu;
+ lsu = NULL;
+ }
+ if (rnu) {
+ delete rnu;
+ rnu = NULL;
+ }
+ if (mmu) {
+ delete mmu;
+ mmu = NULL;
+ }
+ if (exu) {
+ delete exu;
+ exu = NULL;
+ }
+ if (corepipe) {
+ delete corepipe;
+ corepipe = NULL;
+ }
+ if (undiffCore) {
+ delete undiffCore;
+ undiffCore = NULL;
+ }
+ if (l2cache) {
+ delete l2cache;
+ l2cache = NULL;
+ }
}
-BranchPredictor ::~BranchPredictor(){
+void Core::initialize_params() {
+ memset(&core_params, 0, sizeof(CoreParameters));
+ core_params.peak_issueW = -1;
+ core_params.peak_commitW = -1;
+}
- if (!exist) return;
- if(globalBPT) {delete globalBPT; globalBPT = 0;}
- if(localBPT) {delete localBPT; localBPT = 0;}
- if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;}
- if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;}
- if(chooser) {delete chooser; chooser = 0;}
- if(RAS) {delete RAS; RAS = 0;}
- }
+void Core::initialize_stats() {
+ memset(&core_stats, 0, sizeof(CoreStatistics));
+ core_stats.IFU_duty_cycle = 1.0;
+ core_stats.ALU_duty_cycle = 1.0;
+ core_stats.FPU_duty_cycle = 1.0;
+ core_stats.MUL_duty_cycle = 1.0;
+ core_stats.ALU_cdb_duty_cycle = 1.0;
+ core_stats.FPU_cdb_duty_cycle = 1.0;
+ core_stats.MUL_cdb_duty_cycle = 1.0;
+ core_stats.pipeline_duty_cycle = 1.0;
+ core_stats.IFU_duty_cycle = 1.0;
+ core_stats.LSU_duty_cycle = 1.0;
+ core_stats.MemManU_D_duty_cycle = 1.0;
+ core_stats.MemManU_I_duty_cycle = 1.0;
+}
-RENAMINGU ::~RENAMINGU(){
-
- if (!exist) return;
- if(iFRAT ) {delete iFRAT; iFRAT = 0;}
- if(fFRAT ) {delete fFRAT; fFRAT =0;}
- if(iRRAT) {delete iRRAT; iRRAT = 0;}
- if(iFRAT) {delete iFRAT; iFRAT = 0;}
- if(ifreeL) {delete ifreeL;ifreeL= 0;}
- if(ffreeL) {delete ffreeL;ffreeL= 0;}
- if(idcl) {delete idcl; idcl = 0;}
- if(fdcl) {delete fdcl; fdcl = 0;}
- if(RAHT) {delete RAHT; RAHT = 0;}
+void Core::set_core_param() {
+ initialize_params();
+ initialize_stats();
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_STR_IF("name", name);
+ ASSIGN_INT_IF("opt_local", core_params.opt_local);
+ ASSIGN_FP_IF("clock_rate", core_params.clockRate);
+ ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
+ ASSIGN_INT_IF("opcode_width", core_params.opcode_width);
+ ASSIGN_INT_IF("x86", core_params.x86);
+ ASSIGN_INT_IF("Embedded", core_params.Embedded);
+ ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type);
+ ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length);
+ ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads);
+ ASSIGN_INT_IF("fetch_width", core_params.fetchW);
+ ASSIGN_INT_IF("decode_width", core_params.decodeW);
+ ASSIGN_INT_IF("issue_width", core_params.issueW);
+ ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW);
+ ASSIGN_INT_IF("commit_width", core_params.commitW);
+ ASSIGN_INT_IF("prediction_width", core_params.predictionW);
+ ASSIGN_INT_IF("ALU_per_core", core_params.num_alus);
+ ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus);
+ ASSIGN_INT_IF("MUL_per_core", core_params.num_muls);
+ ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW);
+ ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty,
+ Scheduler_type);
+ ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type);
+ ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size);
+ ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size);
+ ASSIGN_INT_IF("ROB_size", core_params.ROB_size);
+ ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc);
+ ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks);
+ ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width);
+ ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc);
+ ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks);
+ ASSIGN_INT_IF("register_window_size",
+ core_params.register_window_size);
+ ASSIGN_INT_IF("register_window_throughput",
+ core_params.register_window_throughput);
+ ASSIGN_INT_IF("register_window_latency",
+ core_params.register_window_latency);
+ ASSIGN_INT_IF("register_window_assoc",
+ core_params.register_window_assoc);
+ ASSIGN_INT_IF("register_window_nbanks",
+ core_params.register_window_nbanks);
+ ASSIGN_INT_IF("register_window_tag_width",
+ core_params.register_window_tag_width);
+ ASSIGN_INT_IF("register_window_rw_ports",
+ core_params.register_window_rw_ports);
+ ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size);
+ ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc);
+ ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks);
+ ASSIGN_INT_IF("phy_Regs_IRF_tag_width",
+ core_params.phy_Regs_IRF_tag_width);
+ ASSIGN_INT_IF("phy_Regs_IRF_rd_ports",
+ core_params.phy_Regs_IRF_rd_ports);
+ ASSIGN_INT_IF("phy_Regs_IRF_wr_ports",
+ core_params.phy_Regs_IRF_wr_ports);
+ ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size);
+ ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc);
+ ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks);
+ ASSIGN_INT_IF("phy_Regs_FRF_tag_width",
+ core_params.phy_Regs_FRF_tag_width);
+ ASSIGN_INT_IF("phy_Regs_FRF_rd_ports",
+ core_params.phy_Regs_FRF_rd_ports);
+ ASSIGN_INT_IF("phy_Regs_FRF_wr_ports",
+ core_params.phy_Regs_FRF_wr_ports);
+ ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks);
+ ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports);
+ ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks);
+ ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports);
+ ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks);
+ ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports);
+ ASSIGN_INT_IF("memory_ports", core_params.memory_ports);
+ ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size);
+ ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc);
+ ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks);
+ ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size);
+ ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc);
+ ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks);
+ ASSIGN_INT_IF("instruction_window_size",
+ core_params.instruction_window_size);
+ ASSIGN_INT_IF("fp_instruction_window_size",
+ core_params.fp_instruction_window_size);
+ ASSIGN_INT_IF("instruction_buffer_size",
+ core_params.instruction_buffer_size);
+ ASSIGN_INT_IF("instruction_buffer_assoc",
+ core_params.instruction_buffer_assoc);
+ ASSIGN_INT_IF("instruction_buffer_nbanks",
+ core_params.instruction_buffer_nbanks);
+ ASSIGN_INT_IF("instruction_buffer_tag_width",
+ core_params.instruction_buffer_tag_width);
+ ASSIGN_INT_IF("number_instruction_fetch_ports",
+ core_params.number_instruction_fetch_ports);
+ ASSIGN_INT_IF("RAS_size", core_params.RAS_size);
+ ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt,
+ Wire_type);
+ ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type);
+ ASSIGN_INT_IF("execu_int_bypass_ports",
+ core_params.execu_int_bypass_ports);
+ ASSIGN_INT_IF("execu_mul_bypass_ports",
+ core_params.execu_mul_bypass_ports);
+ ASSIGN_INT_IF("execu_fp_bypass_ports",
+ core_params.execu_fp_bypass_ports);
+ ASSIGN_ENUM_IF("execu_bypass_wire_type",
+ core_params.execu_bypass_wire_type, Wire_type);
+ ASSIGN_FP_IF("execu_bypass_base_width",
+ core_params.execu_bypass_base_width);
+ ASSIGN_FP_IF("execu_bypass_base_height",
+ core_params.execu_bypass_base_height);
+ ASSIGN_INT_IF("execu_bypass_start_wiring_level",
+ core_params.execu_bypass_start_wiring_level);
+ ASSIGN_FP_IF("execu_bypass_route_over_perc",
+ core_params.execu_bypass_route_over_perc);
+ ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator);
+ ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages);
+ ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages);
+ ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines);
+ ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines);
+ ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint);
+ ASSIGN_INT_IF("perThreadState", core_params.perThreadState);
+ ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
+
+ else {
+ warnUnrecognizedParam(node_name);
}
+ }
-LoadStoreU ::~LoadStoreU(){
-
- if (!exist) return;
- if(LSQ) {delete LSQ; LSQ = 0;}
+ // Change from MHz to Hz
+ core_params.clockRate *= 1e6;
+ clockRate = core_params.clockRate;
+
+ core_params.peak_commitW = core_params.peak_issueW;
+ core_params.fp_decodeW = core_params.fp_issueW;
+
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle);
+ ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle);
+ ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle);
+ ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle);
+ ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle);
+ ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle);
+ ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle);
+ ASSIGN_FP_IF("total_cycles", core_stats.total_cycles);
+ ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles);
+ ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles);
+ ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle);
+ ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle);
+ ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle);
+ ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle);
+ ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle);
+ ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses);
+ ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses);
+ ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses);
+ ASSIGN_FP_IF("function_calls", core_stats.function_calls);
+ ASSIGN_FP_IF("total_instructions", core_stats.total_instructions);
+ ASSIGN_FP_IF("int_instructions", core_stats.int_instructions);
+ ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions);
+ ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions);
+ ASSIGN_FP_IF("branch_mispredictions",
+ core_stats.branch_mispredictions);
+ ASSIGN_FP_IF("load_instructions", core_stats.load_instructions);
+ ASSIGN_FP_IF("store_instructions", core_stats.store_instructions);
+ ASSIGN_FP_IF("committed_instructions",
+ core_stats.committed_instructions);
+ ASSIGN_FP_IF("committed_int_instructions",
+ core_stats.committed_int_instructions);
+ ASSIGN_FP_IF("committed_fp_instructions",
+ core_stats.committed_fp_instructions);
+ ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads);
+ ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes);
+ ASSIGN_FP_IF("rename_reads", core_stats.rename_reads);
+ ASSIGN_FP_IF("rename_writes", core_stats.rename_writes);
+ ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads);
+ ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes);
+ ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads);
+ ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes);
+ ASSIGN_FP_IF("inst_window_wakeup_accesses",
+ core_stats.inst_window_wakeup_accesses);
+ ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads);
+ ASSIGN_FP_IF("fp_inst_window_writes",
+ core_stats.fp_inst_window_writes);
+ ASSIGN_FP_IF("fp_inst_window_wakeup_accesses",
+ core_stats.fp_inst_window_wakeup_accesses);
+ ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads);
+ ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads);
+ ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes);
+ ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes);
+ ASSIGN_FP_IF("context_switches", core_stats.context_switches);
+ ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses);
+ ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses);
+ ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses);
+
+ else {
+ warnUnrecognizedStat(node_name);
}
+ }
-MemManU ::~MemManU(){
+ // Initialize a few variables
+ core_params.multithreaded = core_params.num_hthreads > 1 ? true : false;
+ core_params.pc_width = virtual_address_width;
+ core_params.v_address_width = virtual_address_width;
+ core_params.p_address_width = physical_address_width;
+ core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32;
+ core_params.fp_data_width = core_params.int_data_width;
+ core_params.arch_ireg_width =
+ int(ceil(log2(core_params.archi_Regs_IRF_size)));
+ core_params.arch_freg_width
+ = int(ceil(log2(core_params.archi_Regs_FRF_size)));
+ core_params.num_IRF_entry = core_params.archi_Regs_IRF_size;
+ core_params.num_FRF_entry = core_params.archi_Regs_FRF_size;
+
+ if (core_params.instruction_length <= 0) {
+ errorNonPositiveParam("instruction_length");
+ }
- if (!exist) return;
- if(itlb) {delete itlb; itlb = 0;}
- if(dtlb) {delete dtlb; dtlb = 0;}
- }
+ if (core_params.num_hthreads <= 0) {
+ errorNonPositiveParam("number_hardware_threads");
+ }
-RegFU ::~RegFU(){
+ if (core_params.opcode_width <= 0) {
+ errorNonPositiveParam("opcode_width");
+ }
- if (!exist) return;
- if(IRF) {delete IRF; IRF = 0;}
- if(FRF) {delete FRF; FRF = 0;}
- if(RFWIN) {delete RFWIN; RFWIN = 0;}
- }
+ if (core_params.instruction_buffer_size <= 0) {
+ errorNonPositiveParam("instruction_buffer_size");
+ }
-SchedulerU ::~SchedulerU(){
+ if (core_params.number_instruction_fetch_ports <= 0) {
+ errorNonPositiveParam("number_instruction_fetch_ports");
+ }
- if (!exist) return;
- if(int_inst_window) {delete int_inst_window; int_inst_window = 0;}
- if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;}
- if(ROB) {delete ROB; ROB = 0;}
- if(instruction_selection) {delete instruction_selection;instruction_selection = 0;}
- }
+ if (core_params.peak_issueW <= 0) {
+ errorNonPositiveParam("peak_issue_width");
+ } else {
+ assert(core_params.peak_commitW > 0);
+ }
-EXECU ::~EXECU(){
-
- if (!exist) return;
- if(int_bypass) {delete int_bypass; int_bypass = 0;}
- if(intTagBypass) {delete intTagBypass; intTagBypass =0;}
- if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;}
- if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;}
- if(fp_bypass) {delete fp_bypass;fp_bypass = 0;}
- if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;}
- if(fp_u) {delete fp_u;fp_u = 0;}
- if(exeu) {delete exeu;exeu = 0;}
- if(mul) {delete mul;mul = 0;}
- if(rfu) {delete rfu;rfu = 0;}
- if(scheu) {delete scheu; scheu = 0;}
+ if (core_params.core_ty == OOO) {
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ core_params.phy_ireg_width =
+ int(ceil(log2(core_params.phy_Regs_IRF_size)));
+ core_params.phy_freg_width =
+ int(ceil(log2(core_params.phy_Regs_FRF_size)));
+ core_params.num_ifreelist_entries =
+ core_params.num_IRF_entry = core_params.phy_Regs_IRF_size;
+ core_params.num_ffreelist_entries =
+ core_params.num_FRF_entry = core_params.phy_Regs_FRF_size;
+ } else if (core_params.scheu_ty == ReservationStation) {
+ core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size)));
+ core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size)));
+ core_params.num_ifreelist_entries = core_params.ROB_size;
+ core_params.num_ffreelist_entries = core_params.ROB_size;
}
+ }
-Core ::~Core(){
+ core_params.regWindowing =
+ (core_params.register_window_size > 0 &&
+ core_params.core_ty == Inorder) ? true : false;
- if(ifu) {delete ifu; ifu = 0;}
- if(lsu) {delete lsu; lsu = 0;}
- if(rnu) {delete rnu; rnu = 0;}
- if(mmu) {delete mmu; mmu = 0;}
- if(exu) {delete exu; exu = 0;}
- if(corepipe) {delete corepipe; corepipe = 0;}
- if(undiffCore) {delete undiffCore;undiffCore = 0;}
- if(l2cache) {delete l2cache;l2cache = 0;}
+ if (core_params.regWindowing) {
+ if (core_params.register_window_throughput <= 0) {
+ errorNonPositiveParam("register_window_throughput");
+ } else if (core_params.register_window_latency <= 0) {
+ errorNonPositiveParam("register_window_latency");
}
+ }
-void Core::set_core_param()
-{
- coredynp.opt_local = XML->sys.core[ithCore].opt_local;
- coredynp.x86 = XML->sys.core[ithCore].x86;
- coredynp.Embedded = XML->sys.Embedded;
- coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type;
- coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme;
- coredynp.fetchW = XML->sys.core[ithCore].fetch_width;
- coredynp.decodeW = XML->sys.core[ithCore].decode_width;
- coredynp.issueW = XML->sys.core[ithCore].issue_width;
- coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width;
- coredynp.commitW = XML->sys.core[ithCore].commit_width;
- coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width;
- coredynp.predictionW = XML->sys.core[ithCore].prediction_width;
- coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width;
- coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width;
- coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core;
- coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core;
- coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core;
-
-
- coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads;
- coredynp.multithreaded = coredynp.num_hthreads>1? true:false;
- coredynp.instruction_length = XML->sys.core[ithCore].instruction_length;
- coredynp.pc_width = XML->sys.virtual_address_width;
-
- coredynp.opcode_length = XML->sys.core[ithCore].opcode_width;
- coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width;
- coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0];
- coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0];
- coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1];
- coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1];
- coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32;
- coredynp.fp_data_width = coredynp.int_data_width;
- coredynp.v_address_width = XML->sys.virtual_address_width;
- coredynp.p_address_width = XML->sys.physical_address_width;
-
- coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme;
- coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size)));
- coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size)));
- coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size;
- coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size;
- coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle;
- coredynp.total_cycles = XML->sys.core[ithCore].total_cycles;
- coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles;
- coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles;
-
- //Max power duty cycle for peak power estimation
-// if (coredynp.core_ty==OOO)
-// {
-// coredynp.IFU_duty_cycle = 1;
-// coredynp.LSU_duty_cycle = 1;
-// coredynp.MemManU_I_duty_cycle =1;
-// coredynp.MemManU_D_duty_cycle =1;
-// coredynp.ALU_duty_cycle =1;
-// coredynp.MUL_duty_cycle =1;
-// coredynp.FPU_duty_cycle =1;
-// coredynp.ALU_cdb_duty_cycle =1;
-// coredynp.MUL_cdb_duty_cycle =1;
-// coredynp.FPU_cdb_duty_cycle =1;
-// }
-// else
-// {
- coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle;
- coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle;
- coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle;
- coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle;
- coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle;
- coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle;
- coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle;
- coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle;
- coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle;
- coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle;
- coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle;
-// }
-
-
- if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder)))
- {
- cout<<"Invalid Core Type"<<endl;
- exit(0);
- }
-// if (coredynp.core_ty==OOO)
-// {
-// cout<<"OOO processor models are being updated and will be available in next release"<<endl;
-// exit(0);
-// }
- if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation)))
- {
- cout<<"Invalid OOO Scheduler Type"<<endl;
- exit(0);
- }
+ set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads,
+ core_params.num_hthreads, 0);
- if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased)))
- {
- cout<<"Invalid OOO Renaming Type"<<endl;
- exit(0);
- }
+ if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) {
+ cout << "Invalid Core Type" << endl;
+ exit(0);
+ }
-if (coredynp.core_ty==OOO)
-{
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size)));
- coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size)));
- coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size;
- coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size;
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {//ROB serves as Phy RF in RS based OOO
- coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
- coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
- coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size;
- coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size;
+ if (!((core_params.scheu_ty == PhysicalRegFile) ||
+ (core_params.scheu_ty == ReservationStation))) {
+ cout << "Invalid OOO Scheduler Type" << endl;
+ exit(0);
+ }
- }
+ if (!((core_params.rm_ty == RAMbased) ||
+ (core_params.rm_ty == CAMbased))) {
+ cout << "Invalid OOO Renaming Type" << endl;
+ exit(0);
+ }
}
- coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference.
- coredynp.perThreadState = 8;
- coredynp.instruction_length = 32;
- coredynp.clockRate = XML->sys.core[ithCore].clock_rate;
- coredynp.clockRate *= 1e6;
- coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false;
- coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate;
- set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0);
-}