summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/mcpat/XML_Parse.cc1798
-rw-r--r--ext/mcpat/XML_Parse.h591
-rw-r--r--ext/mcpat/array.cc386
-rw-r--r--ext/mcpat/array.h86
-rw-r--r--ext/mcpat/basic_components.cc374
-rw-r--r--ext/mcpat/basic_components.h522
-rw-r--r--ext/mcpat/bus_interconnect.cc179
-rw-r--r--ext/mcpat/bus_interconnect.h (renamed from ext/mcpat/sharedcache.h)100
-rw-r--r--ext/mcpat/cachearray.cc321
-rw-r--r--ext/mcpat/cachearray.h117
-rw-r--r--ext/mcpat/cachecontroller.cc42
-rw-r--r--ext/mcpat/cachecontroller.h (renamed from ext/mcpat/globalvar.h)29
-rw-r--r--ext/mcpat/cacheunit.cc647
-rw-r--r--ext/mcpat/cacheunit.h167
-rw-r--r--ext/mcpat/cacti/Ucache.cc1404
-rw-r--r--ext/mcpat/cacti/Ucache.h52
-rw-r--r--ext/mcpat/cacti/arbiter.cc119
-rw-r--r--[-rwxr-xr-x]ext/mcpat/cacti/bank.cc274
-rwxr-xr-xext/mcpat/cacti/bank.h6
-rw-r--r--ext/mcpat/cacti/basic_circuit.cc1001
-rw-r--r--ext/mcpat/cacti/basic_circuit.h51
-rw-r--r--ext/mcpat/cacti/cacti_interface.cc183
-rw-r--r--ext/mcpat/cacti/cacti_interface.h641
-rw-r--r--ext/mcpat/cacti/component.cc253
-rw-r--r--ext/mcpat/cacti/component.h44
-rw-r--r--ext/mcpat/cacti/const.h28
-rw-r--r--ext/mcpat/cacti/crossbar.cc220
-rw-r--r--ext/mcpat/cacti/crossbar.h40
-rw-r--r--ext/mcpat/cacti/decoder.cc2241
-rw-r--r--ext/mcpat/cacti/decoder.h260
-rw-r--r--ext/mcpat/cacti/htree2.cc1077
-rw-r--r--ext/mcpat/cacti/htree2.h27
-rw-r--r--ext/mcpat/cacti/io.cc3274
-rw-r--r--[-rwxr-xr-x]ext/mcpat/cacti/mat.cc3282
-rwxr-xr-xext/mcpat/cacti/mat.h14
-rw-r--r--ext/mcpat/cacti/nuca.cc1007
-rw-r--r--ext/mcpat/cacti/nuca.h16
-rw-r--r--ext/mcpat/cacti/parameter.cc1162
-rw-r--r--ext/mcpat/cacti/parameter.h450
-rw-r--r--ext/mcpat/cacti/router.cc386
-rw-r--r--ext/mcpat/cacti/router.h14
-rwxr-xr-xext/mcpat/cacti/subarray.cc257
-rwxr-xr-xext/mcpat/cacti/subarray.h10
-rw-r--r--ext/mcpat/cacti/technology.cc5177
-rwxr-xr-xext/mcpat/cacti/uca.cc723
-rwxr-xr-xext/mcpat/cacti/uca.h13
-rw-r--r--ext/mcpat/cacti/wire.cc1368
-rw-r--r--ext/mcpat/cacti/wire.h29
-rw-r--r--ext/mcpat/common.h65
-rw-r--r--ext/mcpat/core.cc7640
-rw-r--r--ext/mcpat/core.h474
-rw-r--r--ext/mcpat/interconnect.cc310
-rw-r--r--ext/mcpat/interconnect.h86
-rw-r--r--ext/mcpat/iocontrollers.cc774
-rw-r--r--ext/mcpat/iocontrollers.h62
-rw-r--r--ext/mcpat/logic.cc1544
-rw-r--r--ext/mcpat/logic.h322
-rw-r--r--ext/mcpat/main.cc114
-rw-r--r--ext/mcpat/mcpat.mk10
-rw-r--r--ext/mcpat/mcpatXeonCore.mk81
-rw-r--r--ext/mcpat/memoryctrl.cc1125
-rw-r--r--ext/mcpat/memoryctrl.h105
-rw-r--r--ext/mcpat/noc.cc504
-rw-r--r--ext/mcpat/noc.h90
-rw-r--r--ext/mcpat/processor.cc839
-rw-r--r--ext/mcpat/sharedcache.cc1162
-rw-r--r--ext/mcpat/system.cc350
-rw-r--r--ext/mcpat/system.h (renamed from ext/mcpat/processor.h)54
-rw-r--r--ext/mcpat/technology_xeon_core.cc2772
-rw-r--r--ext/mcpat/xmlParser.cc3620
-rw-r--r--ext/mcpat/xmlParser.h205
71 files changed, 23728 insertions, 29042 deletions
diff --git a/ext/mcpat/XML_Parse.cc b/ext/mcpat/XML_Parse.cc
deleted file mode 100644
index ae3ee6f17..000000000
--- a/ext/mcpat/XML_Parse.cc
+++ /dev/null
@@ -1,1798 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-#include <cstdio>
-#include <string>
-
-#include "XML_Parse.h"
-#include "xmlParser.h"
-
-using namespace std;
-
-void ParseXML::parse(char* filepath)
-{
- unsigned int i,j,k,m,n;
- unsigned int NumofCom_4;
- unsigned int itmp;
- //Initialize all structures
- ParseXML::initialize();
-
- // this open and parse the XML file:
- XMLNode xMainNode=XMLNode::openFileHelper(filepath,"component"); //the 'component' in the first layer
-
- XMLNode xNode2=xMainNode.getChildNode("component"); // the 'component' in the second layer
- //get all params in the second layer
- itmp=xNode2.nChildNode("param");
- for(i=0; i<itmp; i++)
- {
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_cores")==0) {sys.number_of_cores=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_L1Directories")==0) {sys.number_of_L1Directories=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_L2Directories")==0) {sys.number_of_L2Directories=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_L2s")==0) {sys.number_of_L2s=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"Private_L2")==0) {sys.Private_L2=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_L3s")==0) {sys.number_of_L3s=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_NoCs")==0) {sys.number_of_NoCs=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_of_dir_levels")==0) {sys.number_of_dir_levels=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"domain_size")==0) {sys.domain_size=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"first_level_dir")==0) {sys.first_level_dir=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_cores")==0) {sys.homogeneous_cores=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"core_tech_node")==0) {sys.core_tech_node=atof(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"target_core_clockrate")==0) {sys.target_core_clockrate=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"target_chip_area")==0) {sys.target_chip_area=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"temperature")==0) {sys.temperature=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"number_cache_levels")==0) {sys.number_cache_levels=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"L1_property")==0) {sys.L1_property =atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"L2_property")==0) {sys.L2_property =atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_L2s")==0) {sys.homogeneous_L2s=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_L1Directories")==0) {sys.homogeneous_L1Directories=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_L2Directories")==0) {sys.homogeneous_L2Directories=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"L3_property")==0) {sys.L3_property =atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_L3s")==0) {sys.homogeneous_L3s=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_ccs")==0) {sys.homogeneous_ccs=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"homogeneous_NoCs")==0) {sys.homogeneous_NoCs=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"Max_area_deviation")==0) {sys.Max_area_deviation=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"Max_power_deviation")==0) {sys.Max_power_deviation=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"device_type")==0) {sys.device_type=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"longer_channel_device")==0) {sys.longer_channel_device=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"opt_dynamic_power")==0) {sys.opt_dynamic_power=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"opt_lakage_power")==0) {sys.opt_lakage_power=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"opt_clockrate")==0) {sys.opt_clockrate=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"opt_area")==0) {sys.opt_area=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"Embedded")==0) {sys.Embedded=(bool)atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"interconnect_projection_type")==0) {sys.interconnect_projection_type=atoi(xNode2.getChildNode("param",i).getAttribute("value"))==0?0:1;continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"machine_bits")==0) {sys.machine_bits=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"virtual_address_width")==0) {sys.virtual_address_width=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"physical_address_width")==0) {sys.physical_address_width=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- if (strcmp(xNode2.getChildNode("param",i).getAttribute("name"),"virtual_memory_page_size")==0) {sys.virtual_memory_page_size=atoi(xNode2.getChildNode("param",i).getAttribute("value"));continue;}
- }
-
-// if (sys.Private_L2 && sys.number_of_cores!=sys.number_of_L2s)
-// {
-// cout<<"Private L2: Number of L2s must equal to Number of Cores"<<endl;
-// exit(0);
-// }
-
- itmp=xNode2.nChildNode("stat");
- for(i=0; i<itmp; i++)
- {
- if (strcmp(xNode2.getChildNode("stat",i).getAttribute("name"),"total_cycles")==0) {sys.total_cycles=atof(xNode2.getChildNode("stat",i).getAttribute("value"));continue;}
-
- }
-
- //get the number of components within the second layer
- unsigned int NumofCom_3=xNode2.nChildNode("component");
- XMLNode xNode3,xNode4; //define the third-layer(system.core0) and fourth-layer(system.core0.predictor) xnodes
-
- string strtmp;
- char chtmp[60];
- char chtmp1[60];
- chtmp1[0]='\0';
- unsigned int OrderofComponents_3layer=0;
- if (NumofCom_3>OrderofComponents_3layer)
- {
- //___________________________get all system.core0-n________________________________________________
- if (sys.homogeneous_cores==1) OrderofComponents_3layer=0;
- else OrderofComponents_3layer=sys.number_of_cores-1;
- for (i=0; i<=OrderofComponents_3layer; i++)
- {
- xNode3=xNode2.getChildNode("component",i);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_cores or number_of_cores is not correct!");
- exit(0);
- }
- else{
- if (strstr(xNode3.getAttribute("name"),"core")!=NULL)
- {
- { //For cpu0-cpui
- //Get all params with system.core?
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clock_rate")==0) {sys.core[i].clock_rate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"opt_local")==0) {sys.core[i].opt_local=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"x86")==0) {sys.core[i].x86=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"machine_bits")==0) {sys.core[i].machine_bits=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"virtual_address_width")==0) {sys.core[i].virtual_address_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"physical_address_width")==0) {sys.core[i].physical_address_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"instruction_length")==0) {sys.core[i].instruction_length=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"opcode_width")==0) {sys.core[i].opcode_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"micro_opcode_width")==0) {sys.core[i].micro_opcode_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"machine_type")==0) {sys.core[i].machine_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"internal_datapath_width")==0) {sys.core[i].internal_datapath_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_hardware_threads")==0) {sys.core[i].number_hardware_threads=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"fetch_width")==0) {sys.core[i].fetch_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_instruction_fetch_ports")==0) {sys.core[i].number_instruction_fetch_ports=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"decode_width")==0) {sys.core[i].decode_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"issue_width")==0) {sys.core[i].issue_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"peak_issue_width")==0) {sys.core[i].peak_issue_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"commit_width")==0) {sys.core[i].commit_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"fp_issue_width")==0) {sys.core[i].fp_issue_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"prediction_width")==0) {sys.core[i].prediction_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"pipelines_per_core")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].pipelines_per_core[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].pipelines_per_core[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"pipeline_depth")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].pipeline_depth[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].pipeline_depth[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
-
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"FPU")==0) {strcpy(sys.core[i].FPU,xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"divider_multiplier")==0) {strcpy(sys.core[i].divider_multiplier,xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ALU_per_core")==0) {sys.core[i].ALU_per_core=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"FPU_per_core")==0) {sys.core[i].FPU_per_core=atof(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"MUL_per_core")==0) {sys.core[i].MUL_per_core=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"instruction_buffer_size")==0) {sys.core[i].instruction_buffer_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"decoded_stream_buffer_size")==0) {sys.core[i].decoded_stream_buffer_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"instruction_window_scheme")==0) {sys.core[i].instruction_window_scheme =atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"instruction_window_size")==0) {sys.core[i].instruction_window_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"fp_instruction_window_size")==0) {sys.core[i].fp_instruction_window_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ROB_size")==0) {sys.core[i].ROB_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"archi_Regs_IRF_size")==0) {sys.core[i].archi_Regs_IRF_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"archi_Regs_FRF_size")==0) {sys.core[i].archi_Regs_FRF_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"phy_Regs_IRF_size")==0) {sys.core[i].phy_Regs_IRF_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"phy_Regs_FRF_size")==0) {sys.core[i].phy_Regs_FRF_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"rename_scheme")==0) {sys.core[i].rename_scheme=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"register_windows_size")==0) {sys.core[i].register_windows_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"LSU_order")==0) {strcpy(sys.core[i].LSU_order,xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"store_buffer_size")==0) {sys.core[i].store_buffer_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"load_buffer_size")==0) {sys.core[i].load_buffer_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"memory_ports")==0) {sys.core[i].memory_ports=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"Dcache_dual_pump")==0) {strcpy(sys.core[i].Dcache_dual_pump,xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"RAS_size")==0) {sys.core[i].RAS_size=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- }
- //Get all stats with system.core?
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_instructions")==0) {sys.core[i].total_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"int_instructions")==0) {sys.core[i].int_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fp_instructions")==0) {sys.core[i].fp_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"branch_instructions")==0) {sys.core[i].branch_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"branch_mispredictions")==0) {sys.core[i].branch_mispredictions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"committed_instructions")==0) {sys.core[i].committed_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"committed_int_instructions")==0) {sys.core[i].committed_int_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"committed_fp_instructions")==0) {sys.core[i].committed_fp_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"load_instructions")==0) {sys.core[i].load_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"store_instructions")==0) {sys.core[i].store_instructions=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_cycles")==0) {sys.core[i].total_cycles=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"idle_cycles")==0) {sys.core[i].idle_cycles=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"busy_cycles")==0) {sys.core[i].busy_cycles=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"instruction_buffer_reads")==0) {sys.core[i].instruction_buffer_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"instruction_buffer_write")==0) {sys.core[i].instruction_buffer_write=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"ROB_reads")==0) {sys.core[i].ROB_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"ROB_writes")==0) {sys.core[i].ROB_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"rename_reads")==0) {sys.core[i].rename_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"rename_writes")==0) {sys.core[i].rename_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fp_rename_reads")==0) {sys.core[i].fp_rename_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fp_rename_writes")==0) {sys.core[i].fp_rename_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"inst_window_reads")==0) {sys.core[i].inst_window_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"inst_window_writes")==0) {sys.core[i].inst_window_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"inst_window_wakeup_accesses")==0) {sys.core[i].inst_window_wakeup_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"inst_window_selections")==0) {sys.core[i].inst_window_selections=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fp_inst_window_reads")==0) {sys.core[i].fp_inst_window_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fp_inst_window_writes")==0) {sys.core[i].fp_inst_window_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fp_inst_window_wakeup_accesses")==0) {sys.core[i].fp_inst_window_wakeup_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"archi_int_regfile_reads")==0) {sys.core[i].archi_int_regfile_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"archi_float_regfile_reads")==0) {sys.core[i].archi_float_regfile_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"phy_int_regfile_reads")==0) {sys.core[i].phy_int_regfile_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"phy_float_regfile_reads")==0) {sys.core[i].phy_float_regfile_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"phy_int_regfile_writes")==0) {sys.core[i].archi_int_regfile_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"phy_float_regfile_writes")==0) {sys.core[i].archi_float_regfile_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"archi_int_regfile_writes")==0) {sys.core[i].phy_int_regfile_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"archi_float_regfile_writes")==0) {sys.core[i].phy_float_regfile_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"int_regfile_reads")==0) {sys.core[i].int_regfile_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"float_regfile_reads")==0) {sys.core[i].float_regfile_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"int_regfile_writes")==0) {sys.core[i].int_regfile_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"float_regfile_writes")==0) {sys.core[i].float_regfile_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"windowed_reg_accesses")==0) {sys.core[i].windowed_reg_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"windowed_reg_transports")==0) {sys.core[i].windowed_reg_transports=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"function_calls")==0) {sys.core[i].function_calls=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"context_switches")==0) {sys.core[i].context_switches=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"ialu_accesses")==0) {sys.core[i].ialu_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fpu_accesses")==0) {sys.core[i].fpu_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"mul_accesses")==0) {sys.core[i].mul_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"cdb_alu_accesses")==0) {sys.core[i].cdb_alu_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"cdb_mul_accesses")==0) {sys.core[i].cdb_mul_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"cdb_fpu_accesses")==0) {sys.core[i].cdb_fpu_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"load_buffer_reads")==0) {sys.core[i].load_buffer_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"load_buffer_writes")==0) {sys.core[i].load_buffer_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"load_buffer_cams")==0) {sys.core[i].load_buffer_cams=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"store_buffer_reads")==0) {sys.core[i].store_buffer_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"store_buffer_writes")==0) {sys.core[i].store_buffer_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"store_buffer_cams")==0) {sys.core[i].store_buffer_cams=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"store_buffer_forwards")==0) {sys.core[i].store_buffer_forwards=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"main_memory_access")==0) {sys.core[i].main_memory_access=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"main_memory_read")==0) {sys.core[i].main_memory_read=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"main_memory_write")==0) {sys.core[i].main_memory_write=atoi(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"pipeline_duty_cycle")==0) {sys.core[i].pipeline_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"IFU_duty_cycle")==0) {sys.core[i].IFU_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"BR_duty_cycle")==0) {sys.core[i].BR_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"LSU_duty_cycle")==0) {sys.core[i].LSU_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"MemManU_I_duty_cycle")==0) {sys.core[i].MemManU_I_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"MemManU_D_duty_cycle")==0) {sys.core[i].MemManU_D_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"ALU_duty_cycle")==0) {sys.core[i].ALU_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"MUL_duty_cycle")==0) {sys.core[i].MUL_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"FPU_duty_cycle")==0) {sys.core[i].FPU_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"ALU_cdb_duty_cycle")==0) {sys.core[i].ALU_cdb_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"MUL_cdb_duty_cycle")==0) {sys.core[i].MUL_cdb_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"FPU_cdb_duty_cycle")==0) {sys.core[i].FPU_cdb_duty_cycle=atoi(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
-
- NumofCom_4=xNode3.nChildNode("component"); //get the number of components within the third layer
- for(j=0; j<NumofCom_4; j++)
- {
- xNode4=xNode3.getChildNode("component",j);
- if (strcmp(xNode4.getAttribute("name"),"PBT")==0)
- { //find PBT
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.core0.predictor--PBT
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"prediction_width")==0) {sys.core[i].predictor.prediction_width=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"prediction_scheme")==0) {strcpy(sys.core[i].predictor.prediction_scheme,xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"predictor_size")==0) {sys.core[i].predictor.predictor_size=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"predictor_entries")==0) {sys.core[i].predictor.predictor_entries=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"local_predictor_size")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].predictor.local_predictor_size[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].predictor.local_predictor_size[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"local_predictor_entries")==0) {sys.core[i].predictor.local_predictor_entries=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"global_predictor_entries")==0) {sys.core[i].predictor.global_predictor_entries=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"global_predictor_bits")==0) {sys.core[i].predictor.global_predictor_bits=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"chooser_predictor_entries")==0) {sys.core[i].predictor.chooser_predictor_entries=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"chooser_predictor_bits")==0) {sys.core[i].predictor.chooser_predictor_bits=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.core0.predictor--PBT
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"predictor_accesses")==0) sys.core[i].predictor.predictor_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));
- }
- }
- if (strcmp(xNode4.getAttribute("name"),"itlb")==0)
- {//find system.core0.itlb
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.core0.itlb--itlb
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"number_entries")==0) sys.core[i].itlb.number_entries=atoi(xNode4.getChildNode("param",k).getAttribute("value"));
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in itlb
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.core[i].itlb.total_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.core[i].itlb.total_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.core[i].itlb.total_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.core[i].itlb.conflicts=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- if (strcmp(xNode4.getAttribute("name"),"icache")==0)
- {//find system.core0.icache
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.core0.icache--icache
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"icache_config")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].icache.icache_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].icache.icache_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"buffer_sizes")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].icache.buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].icache.buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.core[i].icache.total_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.core[i].icache.read_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.core[i].icache.read_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"replacements")==0) {sys.core[i].icache.replacements=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_hits")==0) {sys.core[i].icache.read_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.core[i].icache.total_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.core[i].icache.total_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"miss_buffer_access")==0) {sys.core[i].icache.miss_buffer_access=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"fill_buffer_accesses")==0) {sys.core[i].icache.fill_buffer_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_accesses")==0) {sys.core[i].icache.prefetch_buffer_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_writes")==0) {sys.core[i].icache.prefetch_buffer_writes=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_reads")==0) {sys.core[i].icache.prefetch_buffer_reads=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_hits")==0) {sys.core[i].icache.prefetch_buffer_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.core[i].icache.conflicts=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- if (strcmp(xNode4.getAttribute("name"),"dtlb")==0)
- {//find system.core0.dtlb
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.core0.dtlb--dtlb
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"number_entries")==0) sys.core[i].dtlb.number_entries=atoi(xNode4.getChildNode("param",k).getAttribute("value"));
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in dtlb
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.core[i].dtlb.total_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.core[i].dtlb.read_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.core[i].dtlb.write_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_hits")==0) {sys.core[i].dtlb.read_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_hits")==0) {sys.core[i].dtlb.write_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.core[i].dtlb.read_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.core[i].dtlb.write_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.core[i].dtlb.total_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.core[i].dtlb.total_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.core[i].dtlb.conflicts=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
-
- }
- }
- if (strcmp(xNode4.getAttribute("name"),"dcache")==0)
- {//find system.core0.dcache
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.core0.dcache--dcache
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"dcache_config")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].dcache.dcache_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].dcache.dcache_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"buffer_sizes")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].dcache.buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].dcache.buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in dcache
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.core[i].dcache.total_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.core[i].dcache.read_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.core[i].dcache.write_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.core[i].dcache.total_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.core[i].dcache.total_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_hits")==0) {sys.core[i].dcache.read_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_hits")==0) {sys.core[i].dcache.write_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.core[i].dcache.read_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.core[i].dcache.write_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"replacements")==0) {sys.core[i].dcache.replacements=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_backs")==0) {sys.core[i].dcache.write_backs=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"miss_buffer_access")==0) {sys.core[i].dcache.miss_buffer_access=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"fill_buffer_accesses")==0) {sys.core[i].dcache.fill_buffer_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_accesses")==0) {sys.core[i].dcache.prefetch_buffer_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_writes")==0) {sys.core[i].dcache.prefetch_buffer_writes=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_reads")==0) {sys.core[i].dcache.prefetch_buffer_reads=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_hits")==0) {sys.core[i].dcache.prefetch_buffer_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"wbb_writes")==0) {sys.core[i].dcache.wbb_writes=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"wbb_reads")==0) {sys.core[i].dcache.wbb_reads=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.core[i].dcache.conflicts=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
-
- }
- }
- if (strcmp(xNode4.getAttribute("name"),"BTB")==0)
- {//find system.core0.BTB
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.core0.BTB--BTB
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"BTB_config")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.core[i].BTB.BTB_config[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.core[i].BTB.BTB_config[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in BTB
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.core[i].BTB.total_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.core[i].BTB.read_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.core[i].BTB.write_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.core[i].BTB.total_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.core[i].BTB.total_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_hits")==0) {sys.core[i].BTB.read_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_hits")==0) {sys.core[i].BTB.write_hits=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.core[i].BTB.read_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.core[i].BTB.write_misses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"replacements")==0) {sys.core[i].BTB.replacements=atof(xNode4.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- }
- }
- else {
- printf("The value of homogeneous_cores or number_of_cores is not correct!");
- exit(0);
- }
- }
- }
-
- //__________________________________________Get system.L1Directory0-n____________________________________________
- int w,tmpOrderofComponents_3layer;
- w=OrderofComponents_3layer+1;
- tmpOrderofComponents_3layer=OrderofComponents_3layer;
- if (sys.homogeneous_L1Directories==1) OrderofComponents_3layer=OrderofComponents_3layer+1;
- else OrderofComponents_3layer=OrderofComponents_3layer+sys.number_of_L1Directories;
-
- for (i=0; i<(OrderofComponents_3layer-tmpOrderofComponents_3layer); i++)
- {
- xNode3=xNode2.getChildNode("component",w);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_L1Directories or number_of_L1Directories is not correct!");
- exit(0);
- }
- else
- {
- if (strstr(xNode3.getAttribute("id"),"L1Directory")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.L1Directory
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"Dir_config")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L1Directory[i].Dir_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L1Directory[i].Dir_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"buffer_sizes")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L1Directory[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L1Directory[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
-
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.L1Directory[i].clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ports")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L1Directory[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L1Directory[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"device_type")==0) {sys.L1Directory[i].device_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"Directory_type")==0) {sys.L1Directory[i].Directory_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"3D_stack")==0) {strcpy(sys.L1Directory[i].threeD_stack,xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.L2directorydirectory
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.L1Directory[i].total_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.L1Directory[i].read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.L1Directory[i].write_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.L1Directory[i].read_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.L1Directory[i].write_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.L1Directory[i].conflicts=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.L1Directory[i].duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- w=w+1;
- }
- else {
- printf("The value of homogeneous_L1Directories or number_of_L1Directories is not correct!");
- exit(0);
- }
- }
- }
-
- //__________________________________________Get system.L2Directory0-n____________________________________________
- w=OrderofComponents_3layer+1;
- tmpOrderofComponents_3layer=OrderofComponents_3layer;
- if (sys.homogeneous_L2Directories==1) OrderofComponents_3layer=OrderofComponents_3layer+1;
- else OrderofComponents_3layer=OrderofComponents_3layer+sys.number_of_L2Directories;
-
- for (i=0; i<(OrderofComponents_3layer-tmpOrderofComponents_3layer); i++)
- {
- xNode3=xNode2.getChildNode("component",w);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_L2Directories or number_of_L2Directories is not correct!");
- exit(0);
- }
- else
- {
- if (strstr(xNode3.getAttribute("id"),"L2Directory")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.L2Directory
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"Dir_config")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L2Directory[i].Dir_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L2Directory[i].Dir_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"buffer_sizes")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L2Directory[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L2Directory[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
-
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.L2Directory[i].clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"Directory_type")==0) {sys.L2Directory[i].Directory_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ports")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L2Directory[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L2Directory[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"device_type")==0) {sys.L2Directory[i].device_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"3D_stack")==0) {strcpy(sys.L2Directory[i].threeD_stack,xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.L2directorydirectory
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.L2Directory[i].total_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.L2Directory[i].read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.L2Directory[i].write_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.L2Directory[i].read_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.L2Directory[i].write_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.L2Directory[i].conflicts=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.L2Directory[i].duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- }
- w=w+1;
- }
- else {
- printf("The value of homogeneous_L2Directories or number_of_L2Directories is not correct!");
- exit(0);
- }
- }
- }
-
- //__________________________________________Get system.L2[0..n]____________________________________________
- w=OrderofComponents_3layer+1;
- tmpOrderofComponents_3layer=OrderofComponents_3layer;
- if (sys.homogeneous_L2s==1) OrderofComponents_3layer=OrderofComponents_3layer+1;
- else OrderofComponents_3layer=OrderofComponents_3layer+sys.number_of_L2s;
-
- for (i=0; i<(OrderofComponents_3layer-tmpOrderofComponents_3layer); i++)
- {
- xNode3=xNode2.getChildNode("component",w);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_L2s or number_of_L2s is not correct!");
- exit(0);
- }
- else
- {
- if (strstr(xNode3.getAttribute("name"),"L2")!=NULL)
- {
- { //For L20-L2i
- //Get all params with system.L2?
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"L2_config")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L2[i].L2_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L2[i].L2_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.L2[i].clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"merged_dir")==0) {sys.L2[i].merged_dir=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ports")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L2[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L2[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"device_type")==0) {sys.L2[i].device_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"threeD_stack")==0) {strcpy(sys.L2[i].threeD_stack,(xNode3.getChildNode("param",k).getAttribute("value")));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"buffer_sizes")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L2[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L2[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- }
- //Get all stats with system.L2?
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.L2[i].total_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.L2[i].read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.L2[i].write_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.L2[i].total_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.L2[i].total_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_hits")==0) {sys.L2[i].read_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_hits")==0) {sys.L2[i].write_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.L2[i].read_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.L2[i].write_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"replacements")==0) {sys.L2[i].replacements=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_backs")==0) {sys.L2[i].write_backs=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"miss_buffer_accesses")==0) {sys.L2[i].miss_buffer_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fill_buffer_accesses")==0) {sys.L2[i].fill_buffer_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_accesses")==0) {sys.L2[i].prefetch_buffer_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_writes")==0) {sys.L2[i].prefetch_buffer_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_reads")==0) {sys.L2[i].prefetch_buffer_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_hits")==0) {sys.L2[i].prefetch_buffer_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"wbb_writes")==0) {sys.L2[i].wbb_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"wbb_reads")==0) {sys.L2[i].wbb_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.L2[i].conflicts=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.L2[i].duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_accesses")==0) {sys.L2[i].homenode_read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_accesses")==0) {sys.L2[i].homenode_read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_hits")==0) {sys.L2[i].homenode_read_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_write_hits")==0) {sys.L2[i].homenode_write_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_misses")==0) {sys.L2[i].homenode_read_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_write_misses")==0) {sys.L2[i].homenode_write_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"dir_duty_cycle")==0) {sys.L2[i].dir_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- }
- }
- w=w+1;
- }
- else {
- printf("The value of homogeneous_L2s or number_of_L2s is not correct!");
- exit(0);
- }
- }
- }
- //__________________________________________Get system.L3[0..n]____________________________________________
- w=OrderofComponents_3layer+1;
- tmpOrderofComponents_3layer=OrderofComponents_3layer;
- if (sys.homogeneous_L3s==1) OrderofComponents_3layer=OrderofComponents_3layer+1;
- else OrderofComponents_3layer=OrderofComponents_3layer+sys.number_of_L3s;
-
- for (i=0; i<(OrderofComponents_3layer-tmpOrderofComponents_3layer); i++)
- {
- xNode3=xNode2.getChildNode("component",w);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_L3s or number_of_L3s is not correct!");
- exit(0);
- }
- else
- {
- if (strstr(xNode3.getAttribute("name"),"L3")!=NULL)
- {
- { //For L30-L3i
- //Get all params with system.L3?
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"L3_config")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L3[i].L3_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L3[i].L3_config[m]=atof(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.L3[i].clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"merged_dir")==0) {sys.L3[i].merged_dir=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ports")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L3[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L3[i].ports[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"device_type")==0) {sys.L3[i].device_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"threeD_stack")==0) {strcpy(sys.L3[i].threeD_stack,(xNode3.getChildNode("param",k).getAttribute("value")));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"buffer_sizes")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.L3[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.L3[i].buffer_sizes[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- }
- //Get all stats with system.L3?
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) {sys.L3[i].total_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_accesses")==0) {sys.L3[i].read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_accesses")==0) {sys.L3[i].write_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_hits")==0) {sys.L3[i].total_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_misses")==0) {sys.L3[i].total_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_hits")==0) {sys.L3[i].read_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_hits")==0) {sys.L3[i].write_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"read_misses")==0) {sys.L3[i].read_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_misses")==0) {sys.L3[i].write_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"replacements")==0) {sys.L3[i].replacements=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"write_backs")==0) {sys.L3[i].write_backs=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"miss_buffer_accesses")==0) {sys.L3[i].miss_buffer_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"fill_buffer_accesses")==0) {sys.L3[i].fill_buffer_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_accesses")==0) {sys.L3[i].prefetch_buffer_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_writes")==0) {sys.L3[i].prefetch_buffer_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_reads")==0) {sys.L3[i].prefetch_buffer_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"prefetch_buffer_hits")==0) {sys.L3[i].prefetch_buffer_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"wbb_writes")==0) {sys.L3[i].wbb_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"wbb_reads")==0) {sys.L3[i].wbb_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"conflicts")==0) {sys.L3[i].conflicts=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.L3[i].duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_accesses")==0) {sys.L3[i].homenode_read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_accesses")==0) {sys.L3[i].homenode_read_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_hits")==0) {sys.L3[i].homenode_read_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_write_hits")==0) {sys.L3[i].homenode_write_hits=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_read_misses")==0) {sys.L3[i].homenode_read_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"homenode_write_misses")==0) {sys.L3[i].homenode_write_misses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"dir_duty_cycle")==0) {sys.L3[i].dir_duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- }
- }
- w=w+1;
- }
- else {
- printf("The value of homogeneous_L3s or number_of_L3s is not correct!");
- exit(0);
- }
- }
- }
- //__________________________________________Get system.NoC[0..n]____________________________________________
- w=OrderofComponents_3layer+1;
- tmpOrderofComponents_3layer=OrderofComponents_3layer;
- if (sys.homogeneous_NoCs==1) OrderofComponents_3layer=OrderofComponents_3layer+1;
- else OrderofComponents_3layer=OrderofComponents_3layer+sys.number_of_NoCs;
-
- for (i=0; i<(OrderofComponents_3layer-tmpOrderofComponents_3layer); i++)
- {
- xNode3=xNode2.getChildNode("component",w);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_NoCs or number_of_NoCs is not correct!");
- exit(0);
- }
- else
- {
- if (strstr(xNode3.getAttribute("name"),"noc")!=NULL)
- {
- { //For NoC0-NoCi
- //Get all params with system.NoC?
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.NoC[i].clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"type")==0) {sys.NoC[i].type=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"topology")==0) {strcpy(sys.NoC[i].topology,(xNode3.getChildNode("param",k).getAttribute("value")));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"horizontal_nodes")==0) {sys.NoC[i].horizontal_nodes=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"vertical_nodes")==0) {sys.NoC[i].vertical_nodes=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"has_global_link")==0) {sys.NoC[i].has_global_link=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"link_throughput")==0) {sys.NoC[i].link_throughput=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"link_latency")==0) {sys.NoC[i].link_latency=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"input_ports")==0) {sys.NoC[i].input_ports=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"output_ports")==0) {sys.NoC[i].output_ports=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"virtual_channel_per_port")==0) {sys.NoC[i].virtual_channel_per_port=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"flit_bits")==0) {sys.NoC[i].flit_bits=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"input_buffer_entries_per_vc")==0) {sys.NoC[i].input_buffer_entries_per_vc=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"dual_pump")==0) {sys.NoC[i].dual_pump=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"chip_coverage")==0) {sys.NoC[i].chip_coverage=atof(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"link_routing_over_percentage")==0) {sys.NoC[i].route_over_perc=atof(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"ports_of_input_buffer")==0)
- {
- strtmp.assign(xNode3.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.NoC[i].ports_of_input_buffer[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.NoC[i].ports_of_input_buffer[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- continue;
- }
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_of_crossbars")==0) {sys.NoC[i].number_of_crossbars=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"crossbar_type")==0) {strcpy(sys.NoC[i].crossbar_type,(xNode3.getChildNode("param",k).getAttribute("value")));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"crosspoint_type")==0) {strcpy(sys.NoC[i].crosspoint_type,(xNode3.getChildNode("param",k).getAttribute("value")));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"arbiter_type")==0) {sys.NoC[i].arbiter_type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- }
- NumofCom_4=xNode3.nChildNode("component"); //get the number of components within the third layer
- for(j=0; j<NumofCom_4; j++)
- {
- xNode4=xNode3.getChildNode("component",j);
- if (strcmp(xNode4.getAttribute("name"),"xbar0")==0)
- { //find PBT
- itmp=xNode4.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.XoC0.xbar0--xbar0
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"number_of_inputs_of_crossbars")==0) {sys.NoC[i].xbar0.number_of_inputs_of_crossbars=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"number_of_outputs_of_crossbars")==0) {sys.NoC[i].xbar0.number_of_outputs_of_crossbars=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"flit_bits")==0) {sys.NoC[i].xbar0.flit_bits=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"input_buffer_entries_per_port")==0) {sys.NoC[i].xbar0.input_buffer_entries_per_port=atoi(xNode4.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode4.getChildNode("param",k).getAttribute("name"),"ports_of_input_buffer")==0)
- {
- strtmp.assign(xNode4.getChildNode("param",k).getAttribute("value"));
- m=0;
- for(n=0; n<strtmp.length(); n++)
- {
- if (strtmp[n]!=',')
- {
- sprintf(chtmp,"%c",strtmp[n]);
- strcat(chtmp1,chtmp);
- }
- else{
- sys.NoC[i].xbar0.ports_of_input_buffer[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- sys.NoC[i].xbar0.ports_of_input_buffer[m]=atoi(chtmp1);
- m++;
- chtmp1[0]='\0';
- }
- }
- itmp=xNode4.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.core0.predictor--PBT
- if (strcmp(xNode4.getChildNode("stat",k).getAttribute("name"),"predictor_accesses")==0) sys.core[i].predictor.predictor_accesses=atof(xNode4.getChildNode("stat",k).getAttribute("value"));
- }
- }
- }
- //Get all stats with system.NoC?
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- {
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_accesses")==0) sys.NoC[i].total_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) sys.NoC[i].duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));
- }
- }
- w=w+1;
- }
- }
- }
- //__________________________________________Get system.mem____________________________________________
- if (OrderofComponents_3layer>0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL)
- {
-
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.mem
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"mem_tech_node")==0) {sys.mem.mem_tech_node=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"device_clock")==0) {sys.mem.device_clock=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"peak_transfer_rate")==0) {sys.mem.peak_transfer_rate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"capacity_per_channel")==0) {sys.mem.capacity_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_ranks")==0) {sys.mem.number_ranks=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"num_banks_of_DRAM_chip")==0) {sys.mem.num_banks_of_DRAM_chip=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"Block_width_of_DRAM_chip")==0) {sys.mem.Block_width_of_DRAM_chip=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"output_width_of_DRAM_chip")==0) {sys.mem.output_width_of_DRAM_chip=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"page_size_of_DRAM_chip")==0) {sys.mem.page_size_of_DRAM_chip=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"burstlength_of_DRAM_chip")==0) {sys.mem.burstlength_of_DRAM_chip=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"internal_prefetch_of_DRAM_chip")==0) {sys.mem.internal_prefetch_of_DRAM_chip=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.mem
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_accesses")==0) {sys.mem.memory_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_reads")==0) {sys.mem.memory_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_writes")==0) {sys.mem.memory_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- else{
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- //__________________________________________Get system.mc____________________________________________
- if (OrderofComponents_3layer>0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.mc")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.mem
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"mc_clock")==0) {sys.mc.mc_clock=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"block_size")==0) {sys.mc.llc_line_length=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_mcs")==0) {sys.mc.number_mcs=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"memory_channels_per_mc")==0) {sys.mc.memory_channels_per_mc=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"req_window_size_per_channel")==0) {sys.mc.req_window_size_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"IO_buffer_size_per_channel")==0) {sys.mc.IO_buffer_size_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"databus_width")==0) {sys.mc.databus_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"addressbus_width")==0) {sys.mc.addressbus_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"peak_transfer_rate")==0) {sys.mc.peak_transfer_rate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_ranks")==0) {sys.mc.number_ranks=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"LVDS")==0) {sys.mc.LVDS=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"type")==0) {sys.mc.type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"withPHY")==0) {sys.mc.withPHY=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.mendirectory
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_accesses")==0) {sys.mc.memory_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_reads")==0) {sys.mc.memory_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_writes")==0) {sys.mc.memory_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- else{
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- //__________________________________________Get system.niu____________________________________________
- if (OrderofComponents_3layer>0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.niu")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.mem
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.niu.clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_units")==0) {sys.niu.number_units=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"type")==0) {sys.niu.type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.mendirectory
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.niu.duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_load_perc")==0) {sys.niu.total_load_perc=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- else{
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
-
- //__________________________________________Get system.pcie____________________________________________
- if (OrderofComponents_3layer>0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.pcie")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.mem
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"clockrate")==0) {sys.pcie.clockrate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_units")==0) {sys.pcie.number_units=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"num_channels")==0) {sys.pcie.num_channels=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"type")==0) {sys.pcie.type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"withPHY")==0) {sys.pcie.withPHY=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.mendirectory
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.pcie.duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_load_perc")==0) {sys.pcie.total_load_perc=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- }
- }
- else{
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- //__________________________________________Get system.flashcontroller____________________________________________
- if (OrderofComponents_3layer>0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.flashc")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k<itmp; k++)
- { //get all items of param in system.mem
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"flashc_clock")==0) {sys.flashc.mc_clock=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"block_size")==0) {sys.flashc.llc_line_length=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_flashcs")==0) {sys.flashc.number_mcs=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"memory_channels_per_flashc")==0) {sys.flashc.memory_channels_per_mc=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"req_window_size_per_channel")==0) {sys.flashc.req_window_size_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"IO_buffer_size_per_channel")==0) {sys.flashc.IO_buffer_size_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"databus_width")==0) {sys.flashc.databus_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"addressbus_width")==0) {sys.flashc.addressbus_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"peak_transfer_rate")==0) {sys.flashc.peak_transfer_rate=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_ranks")==0) {sys.flashc.number_ranks=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"LVDS")==0) {sys.flashc.LVDS=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"type")==0) {sys.flashc.type=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"withPHY")==0) {sys.flashc.withPHY=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;}
-
- }
- itmp=xNode3.nChildNode("stat");
- for(k=0; k<itmp; k++)
- { //get all items of stat in system.mendirectory
-// if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_accesses")==0) {sys.flashc.memory_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_reads")==0) {sys.flashc.memory_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-// if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_writes")==0) {sys.flashc.memory_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"duty_cycle")==0) {sys.flashc.duty_cycle=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
- if (strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"total_load_perc")==0) {sys.flashc.total_load_perc=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;}
-
- }
- }
- else{
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
-
- }
-}
-void ParseXML::initialize() //Initialize all
-{
- //All number_of_* at the level of 'system' 03/21/2009
- sys.number_of_cores=1;
- sys.number_of_L1Directories=1;
- sys.number_of_L2Directories=1;
- sys.number_of_L2s=1;
- sys.Private_L2 = false;
- sys.number_of_L3s=1;
- sys.number_of_NoCs=1;
- // All params at the level of 'system'
- //strcpy(sys.homogeneous_cores,"default");
- sys.core_tech_node=1;
- sys.target_core_clockrate=1;
- sys.target_chip_area=1;
- sys.temperature=1;
- sys.number_cache_levels=1;
- sys.homogeneous_cores=1;
- sys.homogeneous_L1Directories=1;
- sys.homogeneous_L2Directories=1;
- sys.homogeneous_L2s=1;
- sys.homogeneous_L3s=1;
- sys.homogeneous_NoCs=1;
- sys.homogeneous_ccs=1;
-
- sys.Max_area_deviation=1;
- sys.Max_power_deviation=1;
- sys.device_type=1;
- sys.longer_channel_device =true;
- sys.Embedded =false;
- sys.opt_dynamic_power=false;
- sys.opt_lakage_power=false;
- sys.opt_clockrate=true;
- sys.opt_area=false;
- sys.interconnect_projection_type=1;
- int i,j;
- for (i=0; i<=63; i++)
- {
- sys.core[i].clock_rate=1;
- sys.core[i].opt_local = true;
- sys.core[i].x86 = false;
- sys.core[i].machine_bits=1;
- sys.core[i].virtual_address_width=1;
- sys.core[i].physical_address_width=1;
- sys.core[i].opcode_width=1;
- sys.core[i].micro_opcode_width=1;
- //strcpy(sys.core[i].machine_type,"default");
- sys.core[i].internal_datapath_width=1;
- sys.core[i].number_hardware_threads=1;
- sys.core[i].fetch_width=1;
- sys.core[i].number_instruction_fetch_ports=1;
- sys.core[i].decode_width=1;
- sys.core[i].issue_width=1;
- sys.core[i].peak_issue_width=1;
- sys.core[i].commit_width=1;
- for (j=0; j<20; j++) sys.core[i].pipelines_per_core[j]=1;
- for (j=0; j<20; j++) sys.core[i].pipeline_depth[j]=1;
- strcpy(sys.core[i].FPU,"default");
- strcpy(sys.core[i]. divider_multiplier,"default");
- sys.core[i].ALU_per_core=1;
- sys.core[i].FPU_per_core=1.0;
- sys.core[i].MUL_per_core=1;
- sys.core[i].instruction_buffer_size=1;
- sys.core[i].decoded_stream_buffer_size=1;
- //strcpy(sys.core[i].instruction_window_scheme,"default");
- sys.core[i].instruction_window_size=1;
- sys.core[i].ROB_size=1;
- sys.core[i].archi_Regs_IRF_size=1;
- sys.core[i].archi_Regs_FRF_size=1;
- sys.core[i].phy_Regs_IRF_size=1;
- sys.core[i].phy_Regs_FRF_size=1;
- //strcpy(sys.core[i].rename_scheme,"default");
- sys.core[i].register_windows_size=1;
- strcpy(sys.core[i].LSU_order,"default");
- sys.core[i].store_buffer_size=1;
- sys.core[i].load_buffer_size=1;
- sys.core[i].memory_ports=1;
- strcpy(sys.core[i].Dcache_dual_pump,"default");
- sys.core[i].RAS_size=1;
- //all stats at the level of system.core(0-n)
- sys.core[i].total_instructions=1;
- sys.core[i].int_instructions=1;
- sys.core[i].fp_instructions=1;
- sys.core[i].branch_instructions=1;
- sys.core[i].branch_mispredictions=1;
- sys.core[i].committed_instructions=1;
- sys.core[i].load_instructions=1;
- sys.core[i].store_instructions=1;
- sys.core[i].total_cycles=1;
- sys.core[i].idle_cycles=1;
- sys.core[i].busy_cycles=1;
- sys.core[i].instruction_buffer_reads=1;
- sys.core[i].instruction_buffer_write=1;
- sys.core[i].ROB_reads=1;
- sys.core[i].ROB_writes=1;
- sys.core[i].rename_accesses=1;
- sys.core[i].inst_window_reads=1;
- sys.core[i].inst_window_writes=1;
- sys.core[i].inst_window_wakeup_accesses=1;
- sys.core[i].inst_window_selections=1;
- sys.core[i].archi_int_regfile_reads=1;
- sys.core[i].archi_float_regfile_reads=1;
- sys.core[i].phy_int_regfile_reads=1;
- sys.core[i].phy_float_regfile_reads=1;
- sys.core[i].windowed_reg_accesses=1;
- sys.core[i].windowed_reg_transports=1;
- sys.core[i].function_calls=1;
- sys.core[i].ialu_accesses=1;
- sys.core[i].fpu_accesses=1;
- sys.core[i].mul_accesses=1;
- sys.core[i].cdb_alu_accesses=1;
- sys.core[i].cdb_mul_accesses=1;
- sys.core[i].cdb_fpu_accesses=1;
- sys.core[i].load_buffer_reads=1;
- sys.core[i].load_buffer_writes=1;
- sys.core[i].load_buffer_cams=1;
- sys.core[i].store_buffer_reads=1;
- sys.core[i].store_buffer_writes=1;
- sys.core[i].store_buffer_cams=1;
- sys.core[i].store_buffer_forwards=1;
- sys.core[i].main_memory_access=1;
- sys.core[i].main_memory_read=1;
- sys.core[i].main_memory_write=1;
- sys.core[i].IFU_duty_cycle = 1;
- sys.core[i].BR_duty_cycle = 1;
- sys.core[i].LSU_duty_cycle = 1;
- sys.core[i].MemManU_I_duty_cycle =1;
- sys.core[i].MemManU_D_duty_cycle =1;
- sys.core[i].ALU_duty_cycle =1;
- sys.core[i].MUL_duty_cycle =1;
- sys.core[i].FPU_duty_cycle =1;
- sys.core[i].ALU_cdb_duty_cycle =1;
- sys.core[i].MUL_cdb_duty_cycle =1;
- sys.core[i].FPU_cdb_duty_cycle =1;
- //system.core?.predictor
- sys.core[i].predictor.prediction_width=1;
- strcpy(sys.core[i].predictor.prediction_scheme,"default");
- sys.core[i].predictor.predictor_size=1;
- sys.core[i].predictor.predictor_entries=1;
- sys.core[i].predictor.local_predictor_entries=1;
- for (j=0; j<20; j++) sys.core[i].predictor.local_predictor_size[j]=1;
- sys.core[i].predictor.global_predictor_entries=1;
- sys.core[i].predictor.global_predictor_bits=1;
- sys.core[i].predictor.chooser_predictor_entries=1;
- sys.core[i].predictor.chooser_predictor_bits=1;
- sys.core[i].predictor.predictor_accesses=1;
- //system.core?.itlb
- sys.core[i].itlb.number_entries=1;
- sys.core[i].itlb.total_hits=1;
- sys.core[i].itlb.total_accesses=1;
- sys.core[i].itlb.total_misses=1;
- //system.core?.icache
- for (j=0; j<20; j++) sys.core[i].icache.icache_config[j]=1;
- //strcpy(sys.core[i].icache.buffer_sizes,"default");
- sys.core[i].icache.total_accesses=1;
- sys.core[i].icache.read_accesses=1;
- sys.core[i].icache.read_misses=1;
- sys.core[i].icache.replacements=1;
- sys.core[i].icache.read_hits=1;
- sys.core[i].icache.total_hits=1;
- sys.core[i].icache.total_misses=1;
- sys.core[i].icache.miss_buffer_access=1;
- sys.core[i].icache.fill_buffer_accesses=1;
- sys.core[i].icache.prefetch_buffer_accesses=1;
- sys.core[i].icache.prefetch_buffer_writes=1;
- sys.core[i].icache.prefetch_buffer_reads=1;
- sys.core[i].icache.prefetch_buffer_hits=1;
- //system.core?.dtlb
- sys.core[i].dtlb.number_entries=1;
- sys.core[i].dtlb.total_accesses=1;
- sys.core[i].dtlb.read_accesses=1;
- sys.core[i].dtlb.write_accesses=1;
- sys.core[i].dtlb.write_hits=1;
- sys.core[i].dtlb.read_hits=1;
- sys.core[i].dtlb.read_misses=1;
- sys.core[i].dtlb.write_misses=1;
- sys.core[i].dtlb.total_hits=1;
- sys.core[i].dtlb.total_misses=1;
- //system.core?.dcache
- for (j=0; j<20; j++) sys.core[i].dcache.dcache_config[j]=1;
- //strcpy(sys.core[i].dcache.buffer_sizes,"default");
- sys.core[i].dcache.total_accesses=1;
- sys.core[i].dcache.read_accesses=1;
- sys.core[i].dcache.write_accesses=1;
- sys.core[i].dcache.total_hits=1;
- sys.core[i].dcache.total_misses=1;
- sys.core[i].dcache.read_hits=1;
- sys.core[i].dcache.write_hits=1;
- sys.core[i].dcache.read_misses=1;
- sys.core[i].dcache.write_misses=1;
- sys.core[i].dcache.replacements=1;
- sys.core[i].dcache.write_backs=1;
- sys.core[i].dcache.miss_buffer_access=1;
- sys.core[i].dcache.fill_buffer_accesses=1;
- sys.core[i].dcache.prefetch_buffer_accesses=1;
- sys.core[i].dcache.prefetch_buffer_writes=1;
- sys.core[i].dcache.prefetch_buffer_reads=1;
- sys.core[i].dcache.prefetch_buffer_hits=1;
- sys.core[i].dcache.wbb_writes=1;
- sys.core[i].dcache.wbb_reads=1;
- //system.core?.BTB
- for (j=0; j<20; j++) sys.core[i].BTB.BTB_config[j]=1;
- sys.core[i].BTB.total_accesses=1;
- sys.core[i].BTB.read_accesses=1;
- sys.core[i].BTB.write_accesses=1;
- sys.core[i].BTB.total_hits=1;
- sys.core[i].BTB.total_misses=1;
- sys.core[i].BTB.read_hits=1;
- sys.core[i].BTB.write_hits=1;
- sys.core[i].BTB.read_misses=1;
- sys.core[i].BTB.write_misses=1;
- sys.core[i].BTB.replacements=1;
- }
-
- //system_L1directory
- for (i=0; i<=63; i++)
- {
- for (j=0; j<20; j++) sys.L1Directory[i].Dir_config[j]=1;
- for (j=0; j<20; j++) sys.L1Directory[i].buffer_sizes[j]=1;
- sys.L1Directory[i].clockrate=1;
- sys.L1Directory[i].ports[20]=1;
- sys.L1Directory[i].device_type=1;
- strcpy(sys.L1Directory[i].threeD_stack,"default");
- sys.L1Directory[i].total_accesses=1;
- sys.L1Directory[i].read_accesses=1;
- sys.L1Directory[i].write_accesses=1;
- sys.L1Directory[i].duty_cycle =1;
- }
- //system_L2directory
- for (i=0; i<=63; i++)
- {
- for (j=0; j<20; j++) sys.L2Directory[i].Dir_config[j]=1;
- for (j=0; j<20; j++) sys.L2Directory[i].buffer_sizes[j]=1;
- sys.L2Directory[i].clockrate=1;
- sys.L2Directory[i].ports[20]=1;
- sys.L2Directory[i].device_type=1;
- strcpy(sys.L2Directory[i].threeD_stack,"default");
- sys.L2Directory[i].total_accesses=1;
- sys.L2Directory[i].read_accesses=1;
- sys.L2Directory[i].write_accesses=1;
- sys.L2Directory[i].duty_cycle =1;
- }
- for (i=0; i<=63; i++)
- {
- //system_L2
- for (j=0; j<20; j++) sys.L2[i].L2_config[j]=1;
- sys.L2[i].clockrate=1;
- for (j=0; j<20; j++) sys.L2[i].ports[j]=1;
- sys.L2[i].device_type=1;
- strcpy(sys.L2[i].threeD_stack,"default");
- for (j=0; j<20; j++) sys.L2[i].buffer_sizes[j]=1;
- sys.L2[i].total_accesses=1;
- sys.L2[i].read_accesses=1;
- sys.L2[i].write_accesses=1;
- sys.L2[i].total_hits=1;
- sys.L2[i].total_misses=1;
- sys.L2[i].read_hits=1;
- sys.L2[i].write_hits=1;
- sys.L2[i].read_misses=1;
- sys.L2[i].write_misses=1;
- sys.L2[i].replacements=1;
- sys.L2[i].write_backs=1;
- sys.L2[i].miss_buffer_accesses=1;
- sys.L2[i].fill_buffer_accesses=1;
- sys.L2[i].prefetch_buffer_accesses=1;
- sys.L2[i].prefetch_buffer_writes=1;
- sys.L2[i].prefetch_buffer_reads=1;
- sys.L2[i].prefetch_buffer_hits=1;
- sys.L2[i].wbb_writes=1;
- sys.L2[i].wbb_reads=1;
- sys.L2[i].duty_cycle =1;
- sys.L2[i].merged_dir=false;
- sys.L2[i].homenode_read_accesses =1;
- sys.L2[i].homenode_write_accesses=1;
- sys.L2[i].homenode_read_hits=1;
- sys.L2[i].homenode_write_hits=1;
- sys.L2[i].homenode_read_misses=1;
- sys.L2[i].homenode_write_misses=1;
- sys.L2[i].dir_duty_cycle=1;
- }
- for (i=0; i<=63; i++)
- {
- //system_L3
- for (j=0; j<20; j++) sys.L3[i].L3_config[j]=1;
- sys.L3[i].clockrate=1;
- for (j=0; j<20; j++) sys.L3[i].ports[j]=1;
- sys.L3[i].device_type=1;
- strcpy(sys.L3[i].threeD_stack,"default");
- for (j=0; j<20; j++) sys.L3[i].buffer_sizes[j]=1;
- sys.L3[i].total_accesses=1;
- sys.L3[i].read_accesses=1;
- sys.L3[i].write_accesses=1;
- sys.L3[i].total_hits=1;
- sys.L3[i].total_misses=1;
- sys.L3[i].read_hits=1;
- sys.L3[i].write_hits=1;
- sys.L3[i].read_misses=1;
- sys.L3[i].write_misses=1;
- sys.L3[i].replacements=1;
- sys.L3[i].write_backs=1;
- sys.L3[i].miss_buffer_accesses=1;
- sys.L3[i].fill_buffer_accesses=1;
- sys.L3[i].prefetch_buffer_accesses=1;
- sys.L3[i].prefetch_buffer_writes=1;
- sys.L3[i].prefetch_buffer_reads=1;
- sys.L3[i].prefetch_buffer_hits=1;
- sys.L3[i].wbb_writes=1;
- sys.L3[i].wbb_reads=1;
- sys.L3[i].duty_cycle =1;
- sys.L3[i].merged_dir=false;
- sys.L3[i].homenode_read_accesses =1;
- sys.L3[i].homenode_write_accesses=1;
- sys.L3[i].homenode_read_hits=1;
- sys.L3[i].homenode_write_hits=1;
- sys.L3[i].homenode_read_misses=1;
- sys.L3[i].homenode_write_misses=1;
- sys.L3[i].dir_duty_cycle=1;
- }
- //system_NoC
- for (i=0; i<=63; i++)
- {
- sys.NoC[i].clockrate=1;
- sys.NoC[i].type=true;
- sys.NoC[i].chip_coverage=1;
- sys.NoC[i].has_global_link = true;
- strcpy(sys.NoC[i].topology,"default");
- sys.NoC[i].horizontal_nodes=1;
- sys.NoC[i].vertical_nodes=1;
- sys.NoC[i].input_ports=1;
- sys.NoC[i].output_ports=1;
- sys.NoC[i].virtual_channel_per_port=1;
- sys.NoC[i].flit_bits=1;
- sys.NoC[i].input_buffer_entries_per_vc=1;
- sys.NoC[i].total_accesses=1;
- sys.NoC[i].duty_cycle=1;
- sys.NoC[i].route_over_perc = 0.5;
- for (j=0; j<20; j++) sys.NoC[i].ports_of_input_buffer[j]=1;
- sys.NoC[i].number_of_crossbars=1;
- strcpy(sys.NoC[i].crossbar_type,"default");
- strcpy(sys.NoC[i].crosspoint_type,"default");
- //system.NoC?.xbar0;
- sys.NoC[i].xbar0.number_of_inputs_of_crossbars=1;
- sys.NoC[i].xbar0.number_of_outputs_of_crossbars=1;
- sys.NoC[i].xbar0.flit_bits=1;
- sys.NoC[i].xbar0.input_buffer_entries_per_port=1;
- sys.NoC[i].xbar0.ports_of_input_buffer[20]=1;
- sys.NoC[i].xbar0.crossbar_accesses=1;
- }
- //system_mem
- sys.mem.mem_tech_node=1;
- sys.mem.device_clock=1;
- sys.mem.capacity_per_channel=1;
- sys.mem.number_ranks=1;
- sys.mem.peak_transfer_rate =1;
- sys.mem.num_banks_of_DRAM_chip=1;
- sys.mem.Block_width_of_DRAM_chip=1;
- sys.mem.output_width_of_DRAM_chip=1;
- sys.mem.page_size_of_DRAM_chip=1;
- sys.mem.burstlength_of_DRAM_chip=1;
- sys.mem.internal_prefetch_of_DRAM_chip=1;
- sys.mem.memory_accesses=1;
- sys.mem.memory_reads=1;
- sys.mem.memory_writes=1;
- //system_mc
- sys.mc.mc_clock =1;
- sys.mc.number_mcs=1;
- sys.mc.peak_transfer_rate =1;
- sys.mc.memory_channels_per_mc=1;
- sys.mc.number_ranks=1;
- sys.mc.req_window_size_per_channel=1;
- sys.mc.IO_buffer_size_per_channel=1;
- sys.mc.databus_width=1;
- sys.mc.addressbus_width=1;
- sys.mc.memory_accesses=1;
- sys.mc.memory_reads=1;
- sys.mc.memory_writes=1;
- sys.mc.LVDS=true;
- sys.mc.type=1;
- //system_niu
- sys.niu.clockrate =1;
- sys.niu.number_units=1;
- sys.niu.type = 1;
- sys.niu.duty_cycle =1;
- sys.niu.total_load_perc=1;
- //system_pcie
- sys.pcie.clockrate =1;
- sys.pcie.number_units=1;
- sys.pcie.num_channels=1;
- sys.pcie.type = 1;
- sys.pcie.withPHY = false;
- sys.pcie.duty_cycle =1;
- sys.pcie.total_load_perc=1;
- //system_flash_controller
- sys.flashc.mc_clock =1;
- sys.flashc.number_mcs=1;
- sys.flashc.peak_transfer_rate =1;
- sys.flashc.memory_channels_per_mc=1;
- sys.flashc.number_ranks=1;
- sys.flashc.req_window_size_per_channel=1;
- sys.flashc.IO_buffer_size_per_channel=1;
- sys.flashc.databus_width=1;
- sys.flashc.addressbus_width=1;
- sys.flashc.memory_accesses=1;
- sys.flashc.memory_reads=1;
- sys.flashc.memory_writes=1;
- sys.flashc.LVDS=true;
- sys.flashc.withPHY = false;
- sys.flashc.type =1;
- sys.flashc.duty_cycle =1;
- sys.flashc.total_load_perc=1;
-}
diff --git a/ext/mcpat/XML_Parse.h b/ext/mcpat/XML_Parse.h
deleted file mode 100644
index 88fd3dac2..000000000
--- a/ext/mcpat/XML_Parse.h
+++ /dev/null
@@ -1,591 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#ifndef XML_PARSE_H_
-#define XML_PARSE_H_
-
-
-//#ifdef WIN32
-//#define _CRT_SECURE_NO_DEPRECATE
-//#endif
-
-#include <stdio.h>
-#include <string.h>
-
-#include <iostream>
-
-#include "xmlParser.h"
-using namespace std;
-
-/*
-void myfree(char *t); // {free(t);}
-ToXMLStringTool tx,tx2;
-*/
-//all subnodes at the level of system.core(0-n)
-//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
-
-typedef struct{
- int prediction_width;
- char prediction_scheme[20];
- int predictor_size;
- int predictor_entries;
- int local_predictor_size[20];
- int local_predictor_entries;
- int global_predictor_entries;
- int global_predictor_bits;
- int chooser_predictor_entries;
- int chooser_predictor_bits;
- double predictor_accesses;
-} predictor_systemcore;
-typedef struct{
- int number_entries;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- double total_hits;
- double total_accesses;
- double total_misses;
- double conflicts;
-} itlb_systemcore;
-typedef struct{
- //params
- double icache_config[20];
- int buffer_sizes[20];
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- //stats
- double total_accesses;
- double read_accesses;
- double read_misses;
- double replacements;
- double read_hits;
- double total_hits;
- double total_misses;
- double miss_buffer_access;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double conflicts;
-} icache_systemcore;
-typedef struct{
- //params
- int number_entries;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double write_hits;
- double read_hits;
- double read_misses;
- double write_misses;
- double total_hits;
- double total_misses;
- double conflicts;
-} dtlb_systemcore;
-typedef struct{
- //params
- double dcache_config[20];
- int buffer_sizes[20];
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
- double write_backs;
- double miss_buffer_access;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double wbb_writes;
- double wbb_reads;
- double conflicts;
-} dcache_systemcore;
-typedef struct{
- //params
- int BTB_config[20];
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
-} BTB_systemcore;
-typedef struct{
- //all params at the level of system.core(0-n)
- int clock_rate;
- bool opt_local;
- bool x86;
- int machine_bits;
- int virtual_address_width;
- int physical_address_width;
- int opcode_width;
- int micro_opcode_width;
- int instruction_length;
- int machine_type;
- int internal_datapath_width;
- int number_hardware_threads;
- int fetch_width;
- int number_instruction_fetch_ports;
- int decode_width;
- int issue_width;
- int peak_issue_width;
- int commit_width;
- int pipelines_per_core[20];
- int pipeline_depth[20];
- char FPU[20];
- char divider_multiplier[20];
- int ALU_per_core;
- double FPU_per_core;
- int MUL_per_core;
- int instruction_buffer_size;
- int decoded_stream_buffer_size;
- int instruction_window_scheme;
- int instruction_window_size;
- int fp_instruction_window_size;
- int ROB_size;
- int archi_Regs_IRF_size;
- int archi_Regs_FRF_size;
- int phy_Regs_IRF_size;
- int phy_Regs_FRF_size;
- int rename_scheme;
- int register_windows_size;
- char LSU_order[20];
- int store_buffer_size;
- int load_buffer_size;
- int memory_ports;
- char Dcache_dual_pump[20];
- int RAS_size;
- int fp_issue_width;
- int prediction_width;
- int number_of_BTB;
- int number_of_BPT;
-
- //all stats at the level of system.core(0-n)
- double total_instructions;
- double int_instructions;
- double fp_instructions;
- double branch_instructions;
- double branch_mispredictions;
- double committed_instructions;
- double committed_int_instructions;
- double committed_fp_instructions;
- double load_instructions;
- double store_instructions;
- double total_cycles;
- double idle_cycles;
- double busy_cycles;
- double instruction_buffer_reads;
- double instruction_buffer_write;
- double ROB_reads;
- double ROB_writes;
- double rename_accesses;
- double fp_rename_accesses;
- double rename_reads;
- double rename_writes;
- double fp_rename_reads;
- double fp_rename_writes;
- double inst_window_reads;
- double inst_window_writes;
- double inst_window_wakeup_accesses;
- double inst_window_selections;
- double fp_inst_window_reads;
- double fp_inst_window_writes;
- double fp_inst_window_wakeup_accesses;
- double fp_inst_window_selections;
- double archi_int_regfile_reads;
- double archi_float_regfile_reads;
- double phy_int_regfile_reads;
- double phy_float_regfile_reads;
- double phy_int_regfile_writes;
- double phy_float_regfile_writes;
- double archi_int_regfile_writes;
- double archi_float_regfile_writes;
- double int_regfile_reads;
- double float_regfile_reads;
- double int_regfile_writes;
- double float_regfile_writes;
- double windowed_reg_accesses;
- double windowed_reg_transports;
- double function_calls;
- double context_switches;
- double ialu_accesses;
- double fpu_accesses;
- double mul_accesses;
- double cdb_alu_accesses;
- double cdb_mul_accesses;
- double cdb_fpu_accesses;
- double load_buffer_reads;
- double load_buffer_writes;
- double load_buffer_cams;
- double store_buffer_reads;
- double store_buffer_writes;
- double store_buffer_cams;
- double store_buffer_forwards;
- double main_memory_access;
- double main_memory_read;
- double main_memory_write;
- double pipeline_duty_cycle;
-
- double IFU_duty_cycle ;
- double BR_duty_cycle ;
- double LSU_duty_cycle ;
- double MemManU_I_duty_cycle;
- double MemManU_D_duty_cycle ;
- double ALU_duty_cycle ;
- double MUL_duty_cycle ;
- double FPU_duty_cycle ;
- double ALU_cdb_duty_cycle ;
- double MUL_cdb_duty_cycle ;
- double FPU_cdb_duty_cycle ;
-
- //all subnodes at the level of system.core(0-n)
- predictor_systemcore predictor;
- itlb_systemcore itlb;
- icache_systemcore icache;
- dtlb_systemcore dtlb;
- dcache_systemcore dcache;
- BTB_systemcore BTB;
-
-} system_core;
-typedef struct{
- //params
- int Directory_type;
- double Dir_config[20];
- int buffer_sizes[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double read_misses;
- double write_misses;
- double conflicts;
- double duty_cycle;
-} system_L1Directory;
-typedef struct{
- //params
- int Directory_type;
- double Dir_config[20];
- int buffer_sizes[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double read_misses;
- double write_misses;
- double conflicts;
- double duty_cycle;
-} system_L2Directory;
-typedef struct{
- //params
- double L2_config[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- int buffer_sizes[20];
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
- double write_backs;
- double miss_buffer_accesses;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double wbb_writes;
- double wbb_reads;
- double conflicts;
- double duty_cycle;
-
- bool merged_dir;
- double homenode_read_accesses;
- double homenode_write_accesses;
- double homenode_read_hits;
- double homenode_write_hits;
- double homenode_read_misses;
- double homenode_write_misses;
- double dir_duty_cycle;
-} system_L2;
-typedef struct{
- //params
- double L3_config[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- int buffer_sizes[20];
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
- double write_backs;
- double miss_buffer_accesses;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double wbb_writes;
- double wbb_reads;
- double conflicts;
- double duty_cycle;
-
- bool merged_dir;
- double homenode_read_accesses;
- double homenode_write_accesses;
- double homenode_read_hits;
- double homenode_write_hits;
- double homenode_read_misses;
- double homenode_write_misses;
- double dir_duty_cycle;
-} system_L3;
-typedef struct{
- //params
- int number_of_inputs_of_crossbars;
- int number_of_outputs_of_crossbars;
- int flit_bits;
- int input_buffer_entries_per_port;
- int ports_of_input_buffer[20];
- //stats
- double crossbar_accesses;
-} xbar0_systemNoC;
-typedef struct{
- //params
- int clockrate;
- bool type;
- bool has_global_link;
- char topology[20];
- int horizontal_nodes;
- int vertical_nodes;
- int link_throughput;
- int link_latency;
- int input_ports;
- int output_ports;
- int virtual_channel_per_port;
- int flit_bits;
- int input_buffer_entries_per_vc;
- int ports_of_input_buffer[20];
- int dual_pump;
- int number_of_crossbars;
- char crossbar_type[20];
- char crosspoint_type[20];
- xbar0_systemNoC xbar0;
- int arbiter_type;
- double chip_coverage;
- //stats
- double total_accesses;
- double duty_cycle;
- double route_over_perc;
-} system_NoC;
-typedef struct{
- //params
- int mem_tech_node;
- int device_clock;
- int peak_transfer_rate;
- int internal_prefetch_of_DRAM_chip;
- int capacity_per_channel;
- int number_ranks;
- int num_banks_of_DRAM_chip;
- int Block_width_of_DRAM_chip;
- int output_width_of_DRAM_chip;
- int page_size_of_DRAM_chip;
- int burstlength_of_DRAM_chip;
- //stats
- double memory_accesses;
- double memory_reads;
- double memory_writes;
-} system_mem;
-typedef struct{
- //params
- //Common Param for mc and fc
- double peak_transfer_rate;
- int number_mcs;
- bool withPHY;
- int type;
-
- //FCParam
- //stats
- double duty_cycle;
- double total_load_perc;
-
- //McParam
- int mc_clock;
- int llc_line_length;
- int memory_channels_per_mc;
- int number_ranks;
- int req_window_size_per_channel;
- int IO_buffer_size_per_channel;
- int databus_width;
- int addressbus_width;
- bool LVDS;
-
- //stats
- double memory_accesses;
- double memory_reads;
- double memory_writes;
-} system_mc;
-
-typedef struct{
- //params
- int clockrate;
- int number_units;
- int type;
- //stats
- double duty_cycle;
- double total_load_perc;
-} system_niu;
-
-typedef struct{
- //params
- int clockrate;
- int number_units;
- int num_channels;
- int type;
- bool withPHY;
- //stats
- double duty_cycle;
- double total_load_perc;
-} system_pcie;
-
-typedef struct{
- //All number_of_* at the level of 'system' Ying 03/21/2009
- int number_of_cores;
- int number_of_L1Directories;
- int number_of_L2Directories;
- int number_of_L2s;
- bool Private_L2;
- int number_of_L3s;
- int number_of_NoCs;
- int number_of_dir_levels;
- int domain_size;
- int first_level_dir;
- // All params at the level of 'system'
- int homogeneous_cores;
- int homogeneous_L1Directories;
- int homogeneous_L2Directories;
- double core_tech_node;
- int target_core_clockrate;
- int target_chip_area;
- int temperature;
- int number_cache_levels;
- int L1_property;
- int L2_property;
- int homogeneous_L2s;
- int L3_property;
- int homogeneous_L3s;
- int homogeneous_NoCs;
- int homogeneous_ccs;
- int Max_area_deviation;
- int Max_power_deviation;
- int device_type;
- bool longer_channel_device;
- bool Embedded;
- bool opt_dynamic_power;
- bool opt_lakage_power;
- bool opt_clockrate;
- bool opt_area;
- int interconnect_projection_type;
- int machine_bits;
- int virtual_address_width;
- int physical_address_width;
- int virtual_memory_page_size;
- double total_cycles;
- //system.core(0-n):3rd level
- system_core core[64];
- system_L1Directory L1Directory[64];
- system_L2Directory L2Directory[64];
- system_L2 L2[64];
- system_L3 L3[64];
- system_NoC NoC[64];
- system_mem mem;
- system_mc mc;
- system_mc flashc;
- system_niu niu;
- system_pcie pcie;
-} root_system;
-
-class ParseXML
-{
-public:
- void parse(char* filepath);
- void initialize();
-public:
- root_system sys;
-};
-
-
-#endif /* XML_PARSE_H_ */
-
-
-
-
diff --git a/ext/mcpat/array.cc b/ext/mcpat/array.cc
index 975f82fad..0e46afe03 100644
--- a/ext/mcpat/array.cc
+++ b/ext/mcpat/array.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,232 +26,242 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
-#define GLOBALVAR
-#include <cassert>
-#include <cmath>
#include <iostream>
+#include <math.h>
#include "area.h"
#include "array.h"
+#include "common.h"
#include "decoder.h"
-#include "globalvar.h"
#include "parameter.h"
using namespace std;
-ArrayST::ArrayST(const InputParameter *configure_interface,
- string _name,
- enum Device_ty device_ty_,
- bool opt_local_,
- enum Core_type core_ty_,
- bool _is_default)
-:l_ip(*configure_interface),
- name(_name),
- device_ty(device_ty_),
- opt_local(opt_local_),
- core_ty(core_ty_),
- is_default(_is_default)
- {
-
- if (l_ip.cache_sz<64) l_ip.cache_sz=64;
- l_ip.error_checking();//not only do the error checking but also fill some missing parameters
- optimize_array();
+double ArrayST::area_efficiency_threshold = 20.0;
+int ArrayST::ed = 0;
+//Fixed number, make sure timing can be satisfied.
+int ArrayST::delay_wt = 100;
+int ArrayST::cycle_time_wt = 1000;
+//Fixed number, This is used to exhaustive search for individual components.
+int ArrayST::area_wt = 10;
+//Fixed number, This is used to exhaustive search for individual components.
+int ArrayST::dynamic_power_wt = 10;
+int ArrayST::leakage_power_wt = 10;
+//Fixed number, make sure timing can be satisfied.
+int ArrayST::delay_dev = 1000000;
+int ArrayST::cycle_time_dev = 100;
+//Fixed number, This is used to exhaustive search for individual components.
+int ArrayST::area_dev = 1000000;
+//Fixed number, This is used to exhaustive search for individual components.
+int ArrayST::dynamic_power_dev = 1000000;
+int ArrayST::leakage_power_dev = 1000000;
+int ArrayST::cycle_time_dev_threshold = 10;
+
+
+ArrayST::ArrayST(XMLNode* _xml_data,
+ const InputParameter *configure_interface, string _name,
+ enum Device_ty device_ty_, double _clockRate,
+ bool opt_local_, enum Core_type core_ty_, bool _is_default)
+ : McPATComponent(_xml_data), l_ip(*configure_interface),
+ device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
+ is_default(_is_default) {
+ name = _name;
+ clockRate = _clockRate;
+ if (l_ip.cache_sz < MIN_BUFFER_SIZE)
+ l_ip.cache_sz = MIN_BUFFER_SIZE;
+
+ if (!l_ip.error_checking(name)) {
+ exit(1);
+ }
-}
+ output_data.reset();
+ computeEnergy();
+ computeArea();
+}
-void ArrayST::compute_base_power()
- {
- //l_ip.out_w =l_ip.line_sz*8;
- local_result=cacti_interface(&l_ip);
+void ArrayST::compute_base_power() {
+ local_result = cacti_interface(&l_ip);
+}
- }
+void ArrayST::computeArea() {
+ area.set_area(local_result.area);
+ output_data.area = local_result.area / 1e6;
+}
-void ArrayST::optimize_array()
-{
- list<uca_org_t > candidate_solutions(0);
- list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
+void ArrayST::computeEnergy() {
+ list<uca_org_t > candidate_solutions(0);
+ list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
- uca_org_t * temp_res = 0;
- local_result.valid=false;
+ uca_org_t* temp_res = NULL;
+ local_result.valid = false;
- double throughput=l_ip.throughput, latency=l_ip.latency;
- double area_efficiency_threshold = 20.0;
- bool throughput_overflow=true, latency_overflow=true;
- compute_base_power();
+ double throughput = l_ip.throughput;
+ double latency = l_ip.latency;
+ bool throughput_overflow = true;
+ bool latency_overflow = true;
+ compute_base_power();
- if ((local_result.cycle_time - throughput) <= 1e-10 )
- throughput_overflow=false;
- if ((local_result.access_time - latency)<= 1e-10)
- latency_overflow=false;
+ if ((local_result.cycle_time - throughput) <= 1e-10 )
+ throughput_overflow = false;
+ if ((local_result.access_time - latency) <= 1e-10)
+ latency_overflow = false;
- if (opt_for_clk && opt_local)
- {
- if (throughput_overflow || latency_overflow)
- {
- l_ip.ed=0;
+ if (opt_for_clk && opt_local) {
+ if (throughput_overflow || latency_overflow) {
+ l_ip.ed = ed;
- l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
- l_ip.cycle_time_wt = 1000;
+ l_ip.delay_wt = delay_wt;
+ l_ip.cycle_time_wt = cycle_time_wt;
- l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
- l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
- l_ip.leakage_power_wt = 10;
+ l_ip.area_wt = area_wt;
+ l_ip.dynamic_power_wt = dynamic_power_wt;
+ l_ip.leakage_power_wt = leakage_power_wt;
- l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied.
- l_ip.cycle_time_dev = 100;
+ l_ip.delay_dev = delay_dev;
+ l_ip.cycle_time_dev = cycle_time_dev;
- l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
- l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
- l_ip.leakage_power_dev = 1000000;
+ l_ip.area_dev = area_dev;
+ l_ip.dynamic_power_dev = dynamic_power_dev;
+ l_ip.leakage_power_dev = leakage_power_dev;
- throughput_overflow=true; //Reset overflow flag before start optimization iterations
- latency_overflow=true;
+ //Reset overflow flag before start optimization iterations
+ throughput_overflow = true;
+ latency_overflow = true;
- temp_res = &local_result; //Clean up the result for optimized for ED^2P
- temp_res->cleanup();
- }
+ //Clean up the result for optimized for ED^2P
+ temp_res = &local_result;
+ temp_res->cleanup();
+ }
- while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10
- {
- compute_base_power();
-
- l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration
-
- // from best area to worst area -->worst timing to best timing
- if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)||
- (local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0))
- { //if no satisfiable solution is found,the most aggressive one is left
- candidate_solutions.push_back(local_result);
- //output_data_csv(candidate_solutions.back());
- if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10))
- //ensure stop opt not because of cam
- {
- throughput_overflow=false;
- latency_overflow=false;
- }
-
- }
- else
- {
- //TODO: whether checking the partial satisfied results too, or just change the mark???
- if ((local_result.cycle_time - throughput) <= 1e-10)
- throughput_overflow=false;
- if ((local_result.access_time - latency)<= 1e-10)
- latency_overflow=false;
-
- if (l_ip.cycle_time_dev > 10)
- { //if not >10 local_result is the last result, it cannot be cleaned up
- temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up
- temp_res->cleanup();
- }
- }
-// l_ip.cycle_time_dev-=10;
-// l_ip.delay_dev-=10;
+ while ((throughput_overflow || latency_overflow) &&
+ l_ip.cycle_time_dev > cycle_time_dev_threshold) {
+ compute_base_power();
+
+ //This is the time_dev to be used for next iteration
+ l_ip.cycle_time_dev -= cycle_time_dev_threshold;
+
+ // from best area to worst area -->worst timing to best timing
+ if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
+ (local_result.access_time - latency) <= 1e-10) ||
+ (local_result.data_array2->area_efficiency <
+ area_efficiency_threshold && l_ip.assoc == 0)) {
+ //if no satisfiable solution is found,the most aggressive one
+ //is left
+ candidate_solutions.push_back(local_result);
+ if (((local_result.cycle_time - throughput) <= 1e-10) &&
+ ((local_result.access_time - latency) <= 1e-10)) {
+ //ensure stop opt not because of cam
+ throughput_overflow = false;
+ latency_overflow = false;
+ }
+ } else {
+ if ((local_result.cycle_time - throughput) <= 1e-10)
+ throughput_overflow = false;
+ if ((local_result.access_time - latency) <= 1e-10)
+ latency_overflow = false;
+
+ //if not >10 local_result is the last result, it cannot be
+ //cleaned up
+ if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
+ //Only solutions not saved in the list need to be
+ //cleaned up
+ temp_res = &local_result;
+ temp_res->cleanup();
}
+ }
+ }
- if (l_ip.assoc > 0)
- {
- //For array structures except CAM and FA, Give warning but still provide a result with best timing found
- if (throughput_overflow==true)
- cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl;
- if (latency_overflow==true)
- cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl;
+ if (l_ip.assoc > 0) {
+ //For array structures except CAM and FA, Give warning but still
+ //provide a result with best timing found
+ if (throughput_overflow == true)
+ cout << "Warning: " << name
+ << " array structure cannot satisfy throughput constraint."
+ << endl;
+ if (latency_overflow == true)
+ cout << "Warning: " << name
+ << " array structure cannot satisfy latency constraint."
+ << endl;
}
-// else
-// {
-// /*According to "Content-Addressable Memory (CAM) Circuits and
-// Architectures": A Tutorial and Survey
-// by Kostas Pagiamtzis et al.
-// CAM structures can be heavily pipelined and use look-ahead techniques,
-// therefore timing can be relaxed. But McPAT does not model the advanced
-// techniques. If continue optimizing, the area efficiency will be too low
-// */
-// //For CAM and FA, stop opt if area efficiency is too low
-// if (throughput_overflow==true)
-// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
-// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
-// if (latency_overflow==true)
-// cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name
-// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
-// }
-
- //double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time;
- double min_dynamic_energy=BIGNUM;
- if (candidate_solutions.empty()==false)
- {
- local_result.valid=true;
- for (candidate_iter = candidate_solutions.begin(); candidate_iter != candidate_solutions.end(); ++candidate_iter)
-
- {
- if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic)
- {
- min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
- min_dynamic_energy_iter = candidate_iter;
- local_result = *(min_dynamic_energy_iter);
- //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
-
- }
- else
- {
- candidate_iter->cleanup() ;
- }
-
- }
+ double min_dynamic_energy = BIGNUM;
+ if (candidate_solutions.empty() == false) {
+ local_result.valid = true;
+ for (candidate_iter = candidate_solutions.begin();
+ candidate_iter != candidate_solutions.end();
+ ++candidate_iter) {
+ if (min_dynamic_energy >
+ (candidate_iter)->power.readOp.dynamic) {
+ min_dynamic_energy =
+ (candidate_iter)->power.readOp.dynamic;
+ min_dynamic_energy_iter = candidate_iter;
+ local_result = *(min_dynamic_energy_iter);
+ } else {
+ candidate_iter->cleanup() ;
+ }
+ }
- }
- candidate_solutions.clear();
- }
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
-
- double macro_layout_overhead = g_tp.macro_layout_overhead;
- double chip_PR_overhead = g_tp.chip_layout_overhead;
- double total_overhead = macro_layout_overhead*chip_PR_overhead;
- local_result.area *= total_overhead;
-
- //maintain constant power density
- double pppm_t[4] = {total_overhead,1,1,total_overhead};
-
- double sckRation = g_tp.sckt_co_eff;
- local_result.power.readOp.dynamic *= sckRation;
- local_result.power.writeOp.dynamic *= sckRation;
- local_result.power.searchOp.dynamic *= sckRation;
- local_result.power.readOp.leakage *= l_ip.nbanks;
- local_result.power.readOp.longer_channel_leakage =
- local_result.power.readOp.leakage*long_channel_device_reduction;
- local_result.power = local_result.power* pppm_t;
-
- local_result.data_array2->power.readOp.dynamic *= sckRation;
- local_result.data_array2->power.writeOp.dynamic *= sckRation;
- local_result.data_array2->power.searchOp.dynamic *= sckRation;
- local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
- local_result.data_array2->power.readOp.longer_channel_leakage =
- local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
- local_result.data_array2->power = local_result.data_array2->power* pppm_t;
-
-
- if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
- {
- local_result.tag_array2->power.readOp.dynamic *= sckRation;
- local_result.tag_array2->power.writeOp.dynamic *= sckRation;
- local_result.tag_array2->power.searchOp.dynamic *= sckRation;
- local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
- local_result.tag_array2->power.readOp.longer_channel_leakage =
- local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
- local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
}
+ candidate_solutions.clear();
+ }
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, core_ty);
+
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ double chip_PR_overhead = g_tp.chip_layout_overhead;
+ double total_overhead = macro_layout_overhead * chip_PR_overhead;
+ local_result.area *= total_overhead;
+
+ //maintain constant power density
+ double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
+
+ double sckRation = g_tp.sckt_co_eff;
+ local_result.power.readOp.dynamic *= sckRation;
+ local_result.power.writeOp.dynamic *= sckRation;
+ local_result.power.searchOp.dynamic *= sckRation;
+ local_result.power.readOp.leakage *= l_ip.nbanks;
+ local_result.power.readOp.longer_channel_leakage =
+ local_result.power.readOp.leakage * long_channel_device_reduction;
+ local_result.power = local_result.power * pppm_t;
+
+ local_result.data_array2->power.readOp.dynamic *= sckRation;
+ local_result.data_array2->power.writeOp.dynamic *= sckRation;
+ local_result.data_array2->power.searchOp.dynamic *= sckRation;
+ local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.data_array2->power.readOp.longer_channel_leakage =
+ local_result.data_array2->power.readOp.leakage *
+ long_channel_device_reduction;
+ local_result.data_array2->power = local_result.data_array2->power * pppm_t;
+
+
+ if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
+ local_result.tag_array2->power.readOp.dynamic *= sckRation;
+ local_result.tag_array2->power.writeOp.dynamic *= sckRation;
+ local_result.tag_array2->power.searchOp.dynamic *= sckRation;
+ local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.tag_array2->power.readOp.longer_channel_leakage =
+ local_result.tag_array2->power.readOp.leakage *
+ long_channel_device_reduction;
+ local_result.tag_array2->power =
+ local_result.tag_array2->power * pppm_t;
+ }
+ power = local_result.power;
+
+ output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power = power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
}
void ArrayST::leakage_feedback(double temperature)
@@ -296,7 +307,6 @@ void ArrayST::leakage_feedback(double temperature)
}
}
-ArrayST:: ~ArrayST()
-{
- local_result.cleanup();
+ArrayST::~ArrayST() {
+ local_result.cleanup();
}
diff --git a/ext/mcpat/array.h b/ext/mcpat/array.h
index 8c6124d46..6a4c0b6cb 100644
--- a/ext/mcpat/array.h
+++ b/ext/mcpat/array.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -43,59 +44,42 @@
using namespace std;
-class ArrayST :public Component{
- public:
- ArrayST(){};
- ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true);
-
- InputParameter l_ip;
- string name;
- enum Device_ty device_ty;
- bool opt_local;
- enum Core_type core_ty;
- bool is_default;
- uca_org_t local_result;
+class ArrayST : public McPATComponent {
+public:
+ static double area_efficiency_threshold;
- statsDef tdp_stats;
- statsDef rtp_stats;
- statsDef stats_t;
- powerDef power_t;
+ // These are used for the CACTI interface.
+ static int ed;
+ static int delay_wt;
+ static int cycle_time_wt;
+ static int area_wt;
+ static int dynamic_power_wt;
+ static int leakage_power_wt;
+ static int delay_dev;
+ static int cycle_time_dev;
+ static int area_dev;
+ static int dynamic_power_dev;
+ static int leakage_power_dev;
+ static int cycle_time_dev_threshold;
- virtual void optimize_array();
- virtual void compute_base_power();
- virtual ~ArrayST();
+ InputParameter l_ip;
+ enum Device_ty device_ty;
+ bool opt_local;
+ enum Core_type core_ty;
+ bool is_default;
+ uca_org_t local_result;
+ statsDef stats_t;
- void leakage_feedback(double temperature);
-};
+ ArrayST(XMLNode* _xml_data, const InputParameter *configure_interface,
+ string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
+ bool opt_local_ = true,
+ enum Core_type core_ty_ = Inorder, bool _is_default = true);
+ void computeArea();
+ void computeEnergy();
+ void compute_base_power();
+ ~ArrayST();
-class InstCache :public Component{
-public:
- ArrayST* caches;
- ArrayST* missb;
- ArrayST* ifb;
- ArrayST* prefetchb;
- powerDef power_t;//temp value holder for both (max) power and runtime power
- InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;};
- ~InstCache(){
- if (caches) {//caches->local_result.cleanup();
- delete caches; caches=0;}
- if (missb) {//missb->local_result.cleanup();
- delete missb; missb=0;}
- if (ifb) {//ifb->local_result.cleanup();
- delete ifb; ifb=0;}
- if (prefetchb) {//prefetchb->local_result.cleanup();
- delete prefetchb; prefetchb=0;}
- };
-};
-
-class DataCache :public InstCache{
-public:
- ArrayST* wbb;
- DataCache(){wbb=0;};
- ~DataCache(){
- if (wbb) {//wbb->local_result.cleanup();
- delete wbb; wbb=0;}
- };
+ void leakage_feedback(double temperature);
};
-#endif /* TLB_H_ */
+#endif /* ARRAY_H_ */
diff --git a/ext/mcpat/basic_components.cc b/ext/mcpat/basic_components.cc
index f288d7479..3835460f3 100644
--- a/ext/mcpat/basic_components.cc
+++ b/ext/mcpat/basic_components.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -34,94 +35,327 @@
#include <iostream>
#include "basic_components.h"
+#include "cacheunit.h"
+#include "common.h"
-double longer_channel_device_reduction(
- enum Device_ty device_ty,
- enum Core_type core_ty)
-{
+// Turn this to true to get debugging messages
+bool McPATComponent::debug = false;
- double longer_channel_device_percentage_core;
- double longer_channel_device_percentage_uncore;
- double longer_channel_device_percentage_llc;
+bool McPATComponent::opt_for_clk = true;
+int McPATComponent::longer_channel_device = 0;
+// Number of cycles per second, 2GHz = 2e9
+double McPATComponent::target_core_clockrate = 2e9;
+double McPATComponent::total_cycles = 0.0f;
+double McPATComponent::execution_time = 0.0f;
+int McPATComponent::physical_address_width = 0;
+int McPATComponent::virtual_address_width = 0;
+int McPATComponent::virtual_memory_page_size = 0;
+int McPATComponent::data_path_width = 0;
- double long_channel_device_reduction;
+void McPATOutput::reset() {
+ storage = 0.0;
+ area = 0.0;
+ peak_dynamic_power = 0.0;
+ subthreshold_leakage_power = 0.0;
+ gate_leakage_power = 0.0;
+ runtime_dynamic_energy = 0.0;
+}
- longer_channel_device_percentage_llc = 1.0;
- longer_channel_device_percentage_uncore = 0.82;
- if (core_ty==OOO)
- {
- longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam
- //longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam
+McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs) {
+ McPATOutput to_return;
+ to_return.storage = lhs.storage + rhs.storage;
+ to_return.area = lhs.area + rhs.area;
+ to_return.peak_dynamic_power = lhs.peak_dynamic_power +
+ rhs.peak_dynamic_power;
+ to_return.subthreshold_leakage_power = lhs.subthreshold_leakage_power +
+ rhs.subthreshold_leakage_power;
+ to_return.gate_leakage_power = lhs.gate_leakage_power +
+ rhs.gate_leakage_power;
+ to_return.runtime_dynamic_energy = lhs.runtime_dynamic_energy +
+ rhs.runtime_dynamic_energy;
+ return to_return;
+}
+
+void McPATOutput::operator+=(const McPATOutput &rhs) {
+ storage += rhs.storage;
+ area += rhs.area;
+ peak_dynamic_power += rhs.peak_dynamic_power;
+ subthreshold_leakage_power += rhs.subthreshold_leakage_power;
+ gate_leakage_power += rhs.gate_leakage_power;
+ runtime_dynamic_energy += rhs.runtime_dynamic_energy;
+}
+
+McPATComponent::McPATComponent()
+ : xml_data(NULL), name("") {
+}
+
+McPATComponent::McPATComponent(XMLNode* _xml_data)
+ : xml_data(_xml_data), name("") {
+}
+
+McPATComponent::McPATComponent(XMLNode* _xml_data,
+ InputParameter* _interface_ip)
+ : xml_data(_xml_data), interface_ip(*_interface_ip), name("") {
+}
+
+McPATComponent::~McPATComponent() {
+}
+
+void McPATComponent::recursiveInstantiate() {
+ if (debug) {
+ fprintf(stderr, "WARNING: Called recursiveInstantiate from %s, with ",
+ "'type' %s\n", name.c_str(), xml_data->getAttribute("type"));
+ }
+ int i;
+ int numChildren = xml_data->nChildNode("component");
+ for (i = 0; i < numChildren; i++ ) {
+ // For each child node of the system,
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "Core")
+ warnIncompleteComponentType(type);
+ STRCMP(type, "CacheUnit")
+ children.push_back(new CacheUnit(childXML, &interface_ip));
+ STRCMP(type, "CacheController")
+ warnIncompleteComponentType(type);
+ STRCMP(type, "MemoryController")
+ warnIncompleteComponentType(type);
+ STRCMP(type, "Memory")
+ warnIncompleteComponentType(type);
+ STRCMP(type, "OnChipNetwork")
+ warnIncompleteComponentType(type);
+ STRCMP(type, "BusInterconnect")
+ warnIncompleteComponentType(type);
+ STRCMP(type, "Directory")
+ warnIncompleteComponentType(type);
- }
- else
- {
- longer_channel_device_percentage_core = 0.8;//0.8;//Niagara
- //longer_channel_device_percentage_uncore = 0.9;//Niagara
- }
-
- if (device_ty==Core_device)
- {
- long_channel_device_reduction = (1- longer_channel_device_percentage_core)
- + longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction;
- }
- else if (device_ty==Uncore_device)
- {
- long_channel_device_reduction = (1- longer_channel_device_percentage_uncore)
- + longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction;
- }
- else if (device_ty==LLC_device)
- {
- long_channel_device_reduction = (1- longer_channel_device_percentage_llc)
- + longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction;
- }
else
- {
- cout<<"unknown device category"<<endl;
- exit(0);
- }
+ warnUnrecognizedComponent(type);
+ }
+}
+
+void McPATComponent::computeArea() {
+ if (debug) {
+ fprintf(stderr, "WARNING: Called computeArea from %s, with 'type' ",
+ "%s\n", name.c_str(), xml_data->getAttribute("type"));
+ }
+
+ // TODO: This calculation is incorrect and is overwritten by computeEnergy
+ // Fix it up so that the values are available at the correct times
+ int i;
+ int numChildren = children.size();
+ area.set_area(0.0);
+ output_data.area = 0.0;
+ for (i = 0; i < numChildren; i++) {
+ children[i]->computeArea();
+ output_data.area += area.get_area();
+ }
+}
+
+void McPATComponent::computeEnergy() {
+ if (debug) {
+ fprintf(stderr, "WARNING: Called computeEnergy from %s, with 'type' ",
+ "%s\n", name.c_str(), xml_data->getAttribute("type"));
+ }
+
+ power.reset();
+ rt_power.reset();
+ memset(&output_data, 0, sizeof(McPATOutput));
+ int i;
+ int numChildren = children.size();
+ for (i = 0; i < numChildren; i++) {
+ children[i]->computeEnergy();
+ output_data += children[i]->output_data;
+ }
+}
+
+void McPATComponent::displayData(uint32_t indent, int plevel) {
+ if (debug) {
+ fprintf(stderr, "WARNING: Called displayData from %s, with 'type' ",
+ "%s\n", name.c_str(), xml_data->getAttribute("type"));
+ }
+
+ string indent_str(indent, ' ');
+ string indent_str_next(indent + 2, ' ');
+
+ double leakage_power = output_data.subthreshold_leakage_power +
+ output_data.gate_leakage_power;
+ double total_runtime_energy = output_data.runtime_dynamic_energy +
+ leakage_power * execution_time;
+ cout << indent_str << name << ":" << endl;
+ cout << indent_str_next << "Area = " << output_data.area << " mm^2"
+ << endl;
+ cout << indent_str_next << "Peak Dynamic Power = "
+ << output_data.peak_dynamic_power << " W" << endl;
+ cout << indent_str_next << "Subthreshold Leakage Power = "
+ << output_data.subthreshold_leakage_power << " W" << endl;
+ cout << indent_str_next << "Gate Leakage Power = "
+ << output_data.gate_leakage_power << " W" << endl;
+ cout << indent_str_next << "Runtime Dynamic Power = "
+ << (output_data.runtime_dynamic_energy / execution_time) << " W"
+ << endl;
+ cout << indent_str_next << "Runtime Dynamic Energy = "
+ << output_data.runtime_dynamic_energy << " J" << endl;
+ cout << indent_str_next << "Total Runtime Energy = "
+ << total_runtime_energy << " J" << endl;
+ cout << endl;
+
+ // Recursively print children
+ int i;
+ int numChildren = children.size();
+ for (i = 0; i < numChildren; i++) {
+ children[i]->displayData(indent + 4, plevel);
+ }
+}
+
+void McPATComponent::errorUnspecifiedParam(string param) {
+ fprintf(stderr, "ERROR: Parameter must be specified in %s: %s\n",
+ name.c_str(), param.c_str());
+ exit(1);
+}
+
+void McPATComponent::errorNonPositiveParam(string param) {
+ fprintf(stderr, "ERROR: Parameter must be positive in %s: %s\n",
+ name.c_str(), param.c_str());
+ exit(1);
+}
+
+void McPATComponent::warnUnrecognizedComponent(XMLCSTR component) {
+ fprintf(stderr, "WARNING: Component type not recognized in %s: %s\n",
+ name.c_str(), component);
+}
+
+void McPATComponent::warnUnrecognizedParam(XMLCSTR param) {
+ fprintf(stderr, "WARNING: Parameter not recognized in %s: %s\n",
+ name.c_str(), param);
+}
+
+void McPATComponent::warnUnrecognizedStat(XMLCSTR stat) {
+ fprintf(stderr, "WARNING: Statistic not recognized in %s: %s\n",
+ name.c_str(), stat);
+}
+
+void McPATComponent::warnIncompleteComponentType(XMLCSTR type) {
+ fprintf(stderr, " WARNING: %s handling not yet complete\n", type);
+}
+
+void McPATComponent::warnMissingComponentType(XMLCSTR id) {
+ if (id) {
+ fprintf(stderr,
+ "WARNING: Ignoring a component due to the missing type: %s\n",
+ id);
+ } else {
+ fprintf(stderr,
+ "WARNING: Ignoring a component in %s due to the missing type\n",
+ name.c_str());
+ }
+}
+
+void McPATComponent::warnMissingParamName(XMLCSTR id) {
+ if (id) {
+ fprintf(stderr,
+ "WARNING: Ignoring a parameter due to the missing name: %s\n",
+ id);
+ } else {
+ fprintf(stderr,
+ "WARNING: Ignoring a parameter in %s due to the missing name\n",
+ name.c_str());
+ }
+}
+
+void McPATComponent::warnMissingStatName(XMLCSTR id) {
+ if (id) {
+ fprintf(stderr,
+ "WARNING: Ignoring a statistic due to the missing name: %s\n",
+ id);
+ } else {
+ fprintf(stderr,
+ "WARNING: Ignoring a statistic in %s due to the missing name\n",
+ name.c_str());
+ }
+}
+
+double longer_channel_device_reduction(
+ enum Device_ty device_ty,
+ enum Core_type core_ty) {
+
+ double longer_channel_device_percentage_core;
+ double longer_channel_device_percentage_uncore;
+ double longer_channel_device_percentage_llc;
+
+ double long_channel_device_reduction;
+
+ longer_channel_device_percentage_llc = 1.0;
+ longer_channel_device_percentage_uncore = 0.82;
+ if (core_ty == OOO) {
+ //0.54 Xeon Tulsa //0.58 Nehelam
+ longer_channel_device_percentage_core = 0.56;
+ } else {
+ //0.8;//Niagara
+ longer_channel_device_percentage_core = 0.8;
+ }
+
+ if (device_ty == Core_device) {
+ long_channel_device_reduction =
+ (1 - longer_channel_device_percentage_core) +
+ longer_channel_device_percentage_core *
+ g_tp.peri_global.long_channel_leakage_reduction;
+ } else if (device_ty == Uncore_device) {
+ long_channel_device_reduction =
+ (1 - longer_channel_device_percentage_uncore) +
+ longer_channel_device_percentage_uncore *
+ g_tp.peri_global.long_channel_leakage_reduction;
+ } else if (device_ty == LLC_device) {
+ long_channel_device_reduction =
+ (1 - longer_channel_device_percentage_llc) +
+ longer_channel_device_percentage_llc *
+ g_tp.peri_global.long_channel_leakage_reduction;
+ } else {
+ cout << "ERROR: Unknown device category: " << device_ty << endl;
+ exit(0);
+ }
- return long_channel_device_reduction;
+ return long_channel_device_reduction;
}
-statsComponents operator+(const statsComponents & x, const statsComponents & y)
-{
- statsComponents z;
+statsComponents operator+(const statsComponents & x, const statsComponents & y) {
+ statsComponents z;
- z.access = x.access + y.access;
- z.hit = x.hit + y.hit;
- z.miss = x.miss + y.miss;
+ z.access = x.access + y.access;
+ z.hit = x.hit + y.hit;
+ z.miss = x.miss + y.miss;
- return z;
+ return z;
}
-statsComponents operator*(const statsComponents & x, double const * const y)
-{
- statsComponents z;
+statsComponents operator*(const statsComponents & x, double const * const y) {
+ statsComponents z;
- z.access = x.access*y[0];
- z.hit = x.hit*y[1];
- z.miss = x.miss*y[2];
+ z.access = x.access * y[0];
+ z.hit = x.hit * y[1];
+ z.miss = x.miss * y[2];
- return z;
+ return z;
}
-statsDef operator+(const statsDef & x, const statsDef & y)
-{
- statsDef z;
+statsDef operator+(const statsDef & x, const statsDef & y) {
+ statsDef z;
- z.readAc = x.readAc + y.readAc;
- z.writeAc = x.writeAc + y.writeAc;
- z.searchAc = x.searchAc + y.searchAc;
- return z;
+ z.readAc = x.readAc + y.readAc;
+ z.writeAc = x.writeAc + y.writeAc;
+ z.searchAc = x.searchAc + y.searchAc;
+ return z;
}
-statsDef operator*(const statsDef & x, double const * const y)
-{
- statsDef z;
+statsDef operator*(const statsDef & x, double const * const y) {
+ statsDef z;
- z.readAc = x.readAc*y;
- z.writeAc = x.writeAc*y;
- z.searchAc = x.searchAc*y;
- return z;
+ z.readAc = x.readAc * y;
+ z.writeAc = x.writeAc * y;
+ z.searchAc = x.searchAc * y;
+ return z;
}
diff --git a/ext/mcpat/basic_components.h b/ext/mcpat/basic_components.h
index ce3e639cd..ea07d2779 100644
--- a/ext/mcpat/basic_components.h
+++ b/ext/mcpat/basic_components.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -34,9 +35,15 @@
#include <vector>
-#include "XML_Parse.h"
+#include "component.h"
#include "parameter.h"
+#include "xmlParser.h"
+/**
+ * TODO: Since revisions to McPAT aim to make the component hierarchy more
+ * modular, many of the parameter and statistics classes/structs included in
+ * this file should be moved to the files for their respective components.
+ */
const double cdb_overhead = 1.1;
enum FU_type {
@@ -46,21 +53,28 @@ enum FU_type {
};
enum Core_type {
- OOO,
- Inorder
+ OOO,
+ Inorder
};
enum Renaming_type {
RAMbased,
- CAMbased
+ CAMbased
};
enum Scheduler_type {
PhysicalRegFile,
- ReservationStation
+ ReservationStation
};
-enum cache_level {
+enum Cache_type {
+ DATA_CACHE,
+ INSTRUCTION_CACHE,
+ MIXED
+};
+
+enum CacheLevel {
+ L1,
L2,
L3,
L1Directory,
@@ -68,198 +82,408 @@ enum cache_level {
};
enum MemoryCtrl_type {
- MC, //memory controller
- FLASHC //flash controller
+ MC, //memory controller
+ FLASHC //flash controller
};
enum Dir_type {
- ST,//shadowed tag
- DC,//directory cache
- SBT,//static bank tag
- NonDir
+ ST,//shadowed tag
+ DC,//directory cache
+ SBT,//static bank tag
+ NonDir
};
enum Cache_policy {
- Write_through,
- Write_back
+ Write_through,
+ Write_back
};
enum Device_ty {
- Core_device,
- Uncore_device,
- LLC_device
+ Core_device,
+ Uncore_device,
+ LLC_device
};
-class statsComponents
-{
- public:
+enum Access_mode {
+ Normal,
+ Sequential,
+ Fast
+};
+
+class statsComponents {
+public:
double access;
double hit;
double miss;
statsComponents() : access(0), hit(0), miss(0) {}
- statsComponents(const statsComponents & obj) { *this = obj; }
- statsComponents & operator=(const statsComponents & rhs)
- {
- access = rhs.access;
- hit = rhs.hit;
- miss = rhs.miss;
- return *this;
+ statsComponents(const statsComponents & obj) {
+ *this = obj;
+ }
+ statsComponents & operator=(const statsComponents & rhs) {
+ access = rhs.access;
+ hit = rhs.hit;
+ miss = rhs.miss;
+ return *this;
+ }
+ void reset() {
+ access = 0;
+ hit = 0;
+ miss = 0;
}
- void reset() { access = 0; hit = 0; miss = 0;}
- friend statsComponents operator+(const statsComponents & x, const statsComponents & y);
- friend statsComponents operator*(const statsComponents & x, double const * const y);
+ friend statsComponents operator+(const statsComponents & x,
+ const statsComponents & y);
+ friend statsComponents operator*(const statsComponents & x,
+ double const * const y);
};
-class statsDef
-{
- public:
+class statsDef {
+public:
statsComponents readAc;
statsComponents writeAc;
statsComponents searchAc;
-
- statsDef() : readAc(), writeAc(),searchAc() { }
- void reset() { readAc.reset(); writeAc.reset();searchAc.reset();}
+ statsComponents dataReadAc;
+ statsComponents dataWriteAc;
+ statsComponents tagReadAc;
+ statsComponents tagWriteAc;
+
+ statsDef() : readAc(), writeAc(), searchAc() { }
+ void reset() {
+ readAc.reset();
+ writeAc.reset();
+ searchAc.reset();
+ }
friend statsDef operator+(const statsDef & x, const statsDef & y);
friend statsDef operator*(const statsDef & x, double const * const y);
};
+/**
+ * An object to store the computed data that will be output from McPAT on a
+ * per-component-instance basis. Currently, this includes the amount of storage
+ * that the component comprises, its chip area, and power and energy
+ * calculations.
+ */
+class McPATOutput {
+public:
+ // Storage is in bytes (B)
+ double storage;
+ // Area is in mm^2
+ double area;
+ // Peak Dynamic Power is in W
+ double peak_dynamic_power;
+ // Subthreshold Leakage Power is in W
+ double subthreshold_leakage_power;
+ // Gate Leakage Power is in W
+ double gate_leakage_power;
+ // Runtime Dynamic Energy is in J
+ double runtime_dynamic_energy;
+
+ void reset();
+
+ friend McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs);
+ void operator+=(const McPATOutput &rhs);
+};
+
+/**
+ * A McPATComponent encompasses all the parts that are common to any component
+ * for which McPAT may compute and print power, area, and timing data. It
+ * includes a pointer to the XML data from which the component gathers its
+ * input parameters, it stores the variables that are commonly used in all
+ * components, and it maintains the hierarchical structure to recursively
+ * compute and print output. This is a base class from which all components
+ * should inherit these functionality (possibly through other descended
+ * classes.
+*/
+class McPATComponent : public Component {
+public:
+ static bool debug;
+
+ // Variables shared across the system by all McPATComponents
+ static bool opt_for_clk;
+ static int longer_channel_device;
+ static double execution_time;
+ static int physical_address_width;
+ static int virtual_address_width;
+ static int virtual_memory_page_size;
+ static int data_path_width;
+
+ // Although these two variables are static right now, they need to be
+ // modulated on a per-frequency-domain basis eventually.
+ static double target_core_clockrate;
+ static double total_cycles;
+
+ XMLNode* xml_data;
+ InputParameter interface_ip;
+ string name;
+ // Number of cycles per second (consider changing name)
+ double clockRate;
+ vector<McPATComponent*> children;
+ // The data structure that is printed in displayData
+ McPATOutput output_data;
+ // Set this to contain the stats to calculate peak dynamic power
+ statsDef tdp_stats;
+ // Set this to contain the stats to calculate runtime dynamic energy/power
+ statsDef rtp_stats;
+ // Holds the peak dynamic power calculation
+ powerDef power_t;
+ // Holds the runtime dynamic power calculation
+ powerDef rt_power;
+
+ McPATComponent();
+ // Which of these is a better way of doing things?!
+ McPATComponent(XMLNode* _xml_data);
+ McPATComponent(XMLNode* _xml_data, InputParameter* _interface_ip);
+ virtual void recursiveInstantiate();
+ virtual void computeArea();
+ // This function should probably be pure virtual, but it's too early in
+ // the modifying process to know for sure. Note that each component has
+ // to calculate it's own power consumption
+ virtual void computeEnergy();
+ virtual void displayData(uint32_t indent, int plevel);
+ ~McPATComponent();
+
+ protected:
+ void errorUnspecifiedParam(string param);
+ void errorNonPositiveParam(string param);
+ void warnUnrecognizedComponent(XMLCSTR component);
+ void warnUnrecognizedParam(XMLCSTR param);
+ void warnUnrecognizedStat(XMLCSTR stat);
+ void warnIncompleteComponentType(XMLCSTR type);
+ void warnMissingComponentType(XMLCSTR id);
+ void warnMissingParamName(XMLCSTR id);
+ void warnMissingStatName(XMLCSTR id);
+};
+
double longer_channel_device_reduction(
- enum Device_ty device_ty=Core_device,
- enum Core_type core_ty=Inorder);
+ enum Device_ty device_ty = Core_device,
+ enum Core_type core_ty = Inorder);
-class CoreDynParam {
+class CoreParameters {
public:
- CoreDynParam(){};
- CoreDynParam(ParseXML *XML_interface, int ithCore_);
- // :XML(XML_interface),
- // ithCore(ithCore_)
- // core_ty(inorder),
- // rm_ty(CAMbased),
- // scheu_ty(PhysicalRegFile),
- // clockRate(1e9),//1GHz
- // arch_ireg_width(32),
- // arch_freg_width(32),
- // phy_ireg_width(128),
- // phy_freg_width(128),
- // perThreadState(8),
- // globalCheckpoint(32),
- // instructionLength(32){};
- //ParseXML * XML;
- bool opt_local;
- bool x86;
- bool Embedded;
- enum Core_type core_ty;
- enum Renaming_type rm_ty;
+ bool opt_local;
+ bool x86;
+ bool Embedded;
+ enum Core_type core_ty;
+ enum Renaming_type rm_ty;
enum Scheduler_type scheu_ty;
- double clockRate,executionTime;
- int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width;
- int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries;
- int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW;
- int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length;
- int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines;
- int num_alus, num_muls;
+ double clockRate;
+ int arch_ireg_width;
+ int arch_freg_width;
+ int archi_Regs_IRF_size;
+ int archi_Regs_FRF_size;
+ int phy_ireg_width;
+ int phy_freg_width;
+ int num_IRF_entry;
+ int num_FRF_entry;
+ int num_ifreelist_entries;
+ int num_ffreelist_entries;
+ int fetchW;
+ int decodeW;
+ int issueW;
+ int peak_issueW;
+ int commitW;
+ int peak_commitW;
+ int predictionW;
+ int fp_issueW;
+ int fp_decodeW;
+ int perThreadState;
+ int globalCheckpoint;
+ int instruction_length;
+ int pc_width;
+ int opcode_width;
+ int micro_opcode_length;
+ int num_hthreads;
+ int pipeline_stages;
+ int fp_pipeline_stages;
+ int num_pipelines;
+ int num_fp_pipelines;
+ int num_alus;
+ int num_muls;
double num_fpus;
- int int_data_width, fp_data_width,v_address_width, p_address_width;
- double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles;
- bool regWindowing,multithreaded;
+ int int_data_width;
+ int fp_data_width;
+ int v_address_width;
+ int p_address_width;
+ bool regWindowing;
+ bool multithreaded;
double pppm_lkg_multhread[4];
- double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle,
- MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle,
- FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle,
- FPU_cdb_duty_cycle;
- ~CoreDynParam(){};
+ int ROB_size;
+ int ROB_assoc;
+ int ROB_nbanks;
+ int ROB_tag_width;
+ int scheduler_assoc;
+ int scheduler_nbanks;
+ int register_window_size;
+ double register_window_throughput;
+ double register_window_latency;
+ int register_window_assoc;
+ int register_window_nbanks;
+ int register_window_tag_width;
+ int register_window_rw_ports;
+ int phy_Regs_IRF_size;
+ int phy_Regs_IRF_assoc;
+ int phy_Regs_IRF_nbanks;
+ int phy_Regs_IRF_tag_width;
+ int phy_Regs_IRF_rd_ports;
+ int phy_Regs_IRF_wr_ports;
+ int phy_Regs_FRF_size;
+ int phy_Regs_FRF_assoc;
+ int phy_Regs_FRF_nbanks;
+ int phy_Regs_FRF_tag_width;
+ int phy_Regs_FRF_rd_ports;
+ int phy_Regs_FRF_wr_ports;
+ int front_rat_nbanks;
+ int front_rat_rw_ports;
+ int retire_rat_nbanks;
+ int retire_rat_rw_ports;
+ int freelist_nbanks;
+ int freelist_rw_ports;
+ int memory_ports;
+ int load_buffer_size;
+ int load_buffer_assoc;
+ int load_buffer_nbanks;
+ int store_buffer_size;
+ int store_buffer_assoc;
+ int store_buffer_nbanks;
+ int instruction_window_size;
+ int fp_instruction_window_size;
+ int instruction_buffer_size;
+ int instruction_buffer_assoc;
+ int instruction_buffer_nbanks;
+ int instruction_buffer_tag_width;
+ int number_instruction_fetch_ports;
+ int RAS_size;
+ int execu_int_bypass_ports;
+ int execu_mul_bypass_ports;
+ int execu_fp_bypass_ports;
+ Wire_type execu_bypass_wire_type;
+ Wire_type execu_broadcast_wt;
+ int execu_wire_mat_type;
+ double execu_bypass_base_width;
+ double execu_bypass_base_height;
+ int execu_bypass_start_wiring_level;
+ double execu_bypass_route_over_perc;
+ double broadcast_numerator;
};
-class CacheDynParam {
+class CoreStatistics {
public:
- CacheDynParam(){};
- CacheDynParam(ParseXML *XML_interface, int ithCache_);
- string name;
- enum Dir_type dir_ty;
- double clockRate,executionTime;
- double capacity, blockW, assoc, nbanks;
- double throughput, latency;
- double duty_cycle, dir_duty_cycle;
- //double duty_cycle;
- int missb_size, fu_size, prefetchb_size, wbb_size;
- ~CacheDynParam(){};
+ double pipeline_duty_cycle;
+ double total_cycles;
+ double busy_cycles;
+ double idle_cycles;
+ double IFU_duty_cycle;
+ double BR_duty_cycle;
+ double LSU_duty_cycle;
+ double MemManU_I_duty_cycle;
+ double MemManU_D_duty_cycle;
+ double ALU_duty_cycle;
+ double MUL_duty_cycle;
+ double FPU_duty_cycle;
+ double ALU_cdb_duty_cycle;
+ double MUL_cdb_duty_cycle;
+ double FPU_cdb_duty_cycle;
+ double ROB_reads;
+ double ROB_writes;
+ double total_instructions;
+ double int_instructions;
+ double fp_instructions;
+ double branch_instructions;
+ double branch_mispredictions;
+ double load_instructions;
+ double store_instructions;
+ double committed_instructions;
+ double committed_int_instructions;
+ double committed_fp_instructions;
+ double rename_reads;
+ double rename_writes;
+ double fp_rename_reads;
+ double fp_rename_writes;
+ double inst_window_reads;
+ double inst_window_writes;
+ double inst_window_wakeup_accesses;
+ double fp_inst_window_reads;
+ double fp_inst_window_writes;
+ double fp_inst_window_wakeup_accesses;
+ double int_regfile_reads;
+ double float_regfile_reads;
+ double int_regfile_writes;
+ double float_regfile_writes;
+ double context_switches;
+ double ialu_accesses;
+ double fpu_accesses;
+ double mul_accesses;
+ double cdb_alu_accesses;
+ double cdb_fpu_accesses;
+ double cdb_mul_accesses;
+ double function_calls;
};
-class MCParam {
+class MCParameters {
public:
- MCParam(){};
- MCParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate,num_mcs, peakDataTransferRate, num_channels;
- // double mcTEPowerperGhz;
- // double mcPHYperGbit;
- // double area;
- int llcBlockSize, dataBusWidth, addressBusWidth;
- int opcodeW;
- int memAccesses;
- int memRank;
- int type;
- double frontend_duty_cycle, duty_cycle, perc_load;
- double executionTime, reads, writes;
- bool LVDS, withPHY;
-
- ~MCParam(){};
+ double clockRate;
+ enum MemoryCtrl_type mc_type;
+ double num_mcs;
+ int num_channels;
+ int llcBlockSize;
+ int dataBusWidth;
+ int databus_width;
+ int llc_line_length;
+ int req_window_size_per_channel;
+ int IO_buffer_size_per_channel;
+ int addressbus_width;
+ int opcodeW;
+ int type;
+ bool LVDS;
+ bool withPHY;
+ int peak_transfer_rate;
+ int number_ranks;
+ int reorder_buffer_assoc;
+ int reorder_buffer_nbanks;
+ int read_buffer_assoc;
+ int read_buffer_nbanks;
+ int read_buffer_tag_width;
+ int write_buffer_assoc;
+ int write_buffer_nbanks;
+ int write_buffer_tag_width;
};
-class NoCParam {
+class MCStatistics {
public:
- NoCParam(){};
- NoCParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate;
- int flit_size;
- int input_ports, output_ports, min_ports, global_linked_ports;
- int virtual_channel_per_port,input_buffer_entries_per_vc;
- int horizontal_nodes,vertical_nodes, total_nodes;
- double executionTime, total_access, link_throughput,link_latency,
- duty_cycle, chip_coverage, route_over_perc;
- bool has_global_link, type;
-
- ~NoCParam(){};
+ double duty_cycle;
+ double perc_load;
+ double reads;
+ double writes;
};
-class ProcParam {
-public:
- ProcParam(){};
- ProcParam(ParseXML *XML_interface, int ithCache_);
- string name;
- int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel;
- bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir;
+class NIUParameters {
+ public:
+ double clockRate;
+ int num_units;
+ int type;
+};
- ~ProcParam(){};
+class NIUStatistics {
+ public:
+ double duty_cycle;
+ double perc_load;
};
-class NIUParam {
-public:
- NIUParam(){};
- NIUParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate;
- int num_units;
- int type;
- double duty_cycle, perc_load;
- ~NIUParam(){};
+class PCIeParameters {
+ public:
+ double clockRate;
+ int num_channels;
+ int num_units;
+ bool withPHY;
+ int type;
};
-class PCIeParam {
-public:
- PCIeParam(){};
- PCIeParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate;
- int num_channels, num_units;
- bool withPHY;
- int type;
- double duty_cycle, perc_load;
- ~PCIeParam(){};
+class PCIeStatistics {
+ public:
+ double duty_cycle;
+ double perc_load;
};
#endif /* BASIC_COMPONENTS_H_ */
diff --git a/ext/mcpat/bus_interconnect.cc b/ext/mcpat/bus_interconnect.cc
new file mode 100644
index 000000000..1dee2c338
--- /dev/null
+++ b/ext/mcpat/bus_interconnect.cc
@@ -0,0 +1,179 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Joel Hestness
+ *
+ ***************************************************************************/
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <string>
+
+#include "basic_circuit.h"
+#include "bus_interconnect.h"
+#include "common.h"
+#include "const.h"
+#include "io.h"
+#include "parameter.h"
+
+BusInterconnect::BusInterconnect(XMLNode* _xml_data,
+ InputParameter* interface_ip_)
+ : McPATComponent(_xml_data), link_bus(NULL), interface_ip(*interface_ip_) {
+ name = "Bus Interconnect";
+ set_param_stats();
+ local_result = init_interface(&interface_ip, name);
+ scktRatio = g_tp.sckt_co_eff;
+
+ interface_ip.throughput = bus_params.link_throughput / bus_params.clockRate;
+ interface_ip.latency = bus_params.link_latency / bus_params.clockRate;
+
+ link_len /= bus_params.total_nodes;
+ if (bus_params.total_nodes > 1) {
+ //All links are shared by neighbors
+ link_len /= 2;
+ }
+
+ link_bus = new Interconnect(xml_data, "Link", Uncore_device,
+ bus_params.link_base_width,
+ bus_params.link_base_height,
+ bus_params.flit_size, link_len, &interface_ip,
+ bus_params.link_start_wiring_level,
+ bus_params.clockRate,
+ bus_params.pipelinable,
+ bus_params.route_over_perc);
+ children.push_back(link_bus);
+}
+
+void BusInterconnect::computeEnergy() {
+ // Initialize stats for TDP
+ tdp_stats.reset();
+ tdp_stats.readAc.access = bus_stats.duty_cycle;
+ link_bus->int_params.active_ports = bus_params.min_ports - 1;
+ link_bus->int_stats.duty_cycle =
+ bus_params.M_traffic_pattern * bus_stats.duty_cycle;
+
+ // Initialize stats for runtime energy and power
+ rtp_stats.reset();
+ rtp_stats.readAc.access = bus_stats.total_access;
+ link_bus->int_stats.accesses = bus_stats.total_access;
+
+ // Recursively compute energy
+ McPATComponent::computeEnergy();
+}
+
+void BusInterconnect::set_param_stats() {
+ memset(&bus_params, 0, sizeof(BusInterconnectParameters));
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ int mat_type;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("clockrate", bus_params.clockRate);
+ ASSIGN_INT_IF("flit_bits", bus_params.flit_size);
+ ASSIGN_FP_IF("link_throughput", bus_params.link_throughput);
+ ASSIGN_FP_IF("link_latency", bus_params.link_latency);
+ ASSIGN_INT_IF("total_nodes", bus_params.total_nodes);
+ ASSIGN_INT_IF("input_ports", bus_params.input_ports);
+ ASSIGN_INT_IF("output_ports", bus_params.output_ports);
+ ASSIGN_INT_IF("global_linked_ports", bus_params.global_linked_ports);
+ ASSIGN_FP_IF("chip_coverage", bus_params.chip_coverage);
+ ASSIGN_INT_IF("pipelinable", bus_params.pipelinable);
+ ASSIGN_FP_IF("link_routing_over_percentage",
+ bus_params.route_over_perc);
+ ASSIGN_INT_IF("virtual_channel_per_port",
+ bus_params.virtual_channel_per_port);
+ ASSIGN_FP_IF("M_traffic_pattern", bus_params.M_traffic_pattern);
+ ASSIGN_FP_IF("link_len", link_len);
+ ASSIGN_FP_IF("link_base_width", bus_params.link_base_width);
+ ASSIGN_FP_IF("link_base_height", bus_params.link_base_height);
+ ASSIGN_FP_IF("link_start_wiring_level",
+ bus_params.link_start_wiring_level);
+ ASSIGN_INT_IF("wire_mat_type", mat_type);
+ ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ // Change from MHz to Hz
+ bus_params.clockRate *= 1e6;
+
+ interface_ip.wire_is_mat_type = mat_type;
+ interface_ip.wire_os_mat_type = mat_type;
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("duty_cycle", bus_stats.duty_cycle);
+ ASSIGN_FP_IF("total_accesses", bus_stats.total_access);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+
+ clockRate = bus_params.clockRate;
+ bus_params.min_ports =
+ min(bus_params.input_ports, bus_params.output_ports);
+
+ assert(bus_params.chip_coverage <= 1);
+ assert(bus_params.route_over_perc <= 1);
+ assert(link_len > 0);
+}
+
+void
+BusInterconnect::set_duty_cycle(double duty_cycle) {
+ bus_stats.duty_cycle = duty_cycle;
+}
+
+void
+BusInterconnect::set_number_of_accesses(double total_accesses) {
+ bus_stats.total_access = total_accesses;
+}
+
+BusInterconnect::~BusInterconnect() {
+ delete link_bus;
+ link_bus = NULL;
+}
diff --git a/ext/mcpat/sharedcache.h b/ext/mcpat/bus_interconnect.h
index 923408482..5c8b00420 100644
--- a/ext/mcpat/sharedcache.h
+++ b/ext/mcpat/bus_interconnect.h
@@ -1,7 +1,7 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,65 +25,71 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Joel Hestness
*
***************************************************************************/
-#ifndef SHAREDCACHE_H_
-#define SHAREDCACHE_H_
-#include <vector>
+#ifndef BUS_INTERCONNECT_H_
+#define BUS_INTERCONNECT_H_
-#include "XML_Parse.h"
-#include "area.h"
#include "array.h"
#include "basic_components.h"
+#include "interconnect.h"
#include "logic.h"
#include "parameter.h"
-class SharedCache :public Component{
- public:
- ParseXML * XML;
- int ithCache;
- InputParameter interface_ip;
- enum cache_level cacheL;
- DataCache unicache;//Shared cache
- CacheDynParam cachep;
- statsDef homenode_tdp_stats;
- statsDef homenode_rtp_stats;
- statsDef homenode_stats_t;
- double dir_overhead;
- // cache_processor llCache,directory, directory1, inv_dir;
-
- //pipeline pipeLogicCache, pipeLogicDirectory;
- //clock_network clockNetwork;
- double scktRatio, executionTime;
- // Component L2Tot, cc, cc1, ccTot;
+class BusInterconnectParameters {
+public:
+ double clockRate;
+ int flit_size;
+ int input_ports;
+ int output_ports;
+ int min_ports;
+ int global_linked_ports;
+ int virtual_channel_per_port;
+ int input_buffer_entries_per_vc;
+ int total_nodes;
+ double link_throughput;
+ double link_latency;
+ double chip_coverage;
+ bool pipelinable;
+ double route_over_perc;
+ bool has_global_link;
+ bool type;
+ double M_traffic_pattern;
+ double link_base_width;
+ double link_base_height;
+ int link_start_wiring_level;
+};
- SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2);
- void set_cache_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
- ~SharedCache(){};
+class BusInterconnectStatistics {
+public:
+ double duty_cycle;
+ double total_access;
};
-class CCdir :public Component{
- public:
- ParseXML * XML;
- int ithCache;
- InputParameter interface_ip;
- DataCache dc;//Shared cache
- ArrayST * shadow_dir;
-// cache_processor llCache,directory, directory1, inv_dir;
+class BusInterconnect : public McPATComponent {
+public:
+ Interconnect* link_bus;
- //pipeline pipeLogicCache, pipeLogicDirectory;
- //clock_network clockNetwork;
- double scktRatio, clockRate, executionTime;
- Component L2Tot, cc, cc1, ccTot;
+ int ithNoC;
+ InputParameter interface_ip;
+ double link_len;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ BusInterconnectParameters bus_params;
+ BusInterconnectStatistics bus_stats;
+ uca_org_t local_result;
+ statsDef stats_t;
+ double M_traffic_pattern;
- CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
- ~CCdir();
+ BusInterconnect(XMLNode* _xml_data, InputParameter* interface_ip_);
+ void set_param_stats();
+ void set_duty_cycle(double duty_cycle);
+ void set_number_of_accesses(double total_accesses);
+ void computeEnergy();
+ ~BusInterconnect();
};
-#endif /* SHAREDCACHE_H_ */
+#endif /* BUS_INTERCONNECT_H_ */
diff --git a/ext/mcpat/cachearray.cc b/ext/mcpat/cachearray.cc
new file mode 100644
index 000000000..cebea289e
--- /dev/null
+++ b/ext/mcpat/cachearray.cc
@@ -0,0 +1,321 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#include <cmath>
+#include <iostream>
+
+#include "area.h"
+#include "cachearray.h"
+#include "common.h"
+#include "decoder.h"
+#include "parameter.h"
+
+using namespace std;
+
+double CacheArray::area_efficiency_threshold = 20.0;
+int CacheArray::ed = 0;
+//Fixed number, make sure timing can be satisfied.
+int CacheArray::delay_wt = 100;
+int CacheArray::cycle_time_wt = 1000;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::area_wt = 10;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::dynamic_power_wt = 10;
+int CacheArray::leakage_power_wt = 10;
+//Fixed number, make sure timing can be satisfied.
+int CacheArray::delay_dev = 1000000;
+int CacheArray::cycle_time_dev = 100;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::area_dev = 1000000;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::dynamic_power_dev = 1000000;
+int CacheArray::leakage_power_dev = 1000000;
+int CacheArray::cycle_time_dev_threshold = 10;
+
+CacheArray::CacheArray(XMLNode* _xml_data,
+ const InputParameter *configure_interface, string _name,
+ enum Device_ty device_ty_, double _clockRate,
+ bool opt_local_, enum Core_type core_ty_, bool _is_default)
+ : McPATComponent(_xml_data), l_ip(*configure_interface),
+ device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
+ is_default(_is_default), sbt_dir_overhead(0) {
+ name = _name;
+ clockRate = _clockRate;
+ if (l_ip.cache_sz < MIN_BUFFER_SIZE) {
+ l_ip.cache_sz = MIN_BUFFER_SIZE;
+ }
+
+ if (!l_ip.error_checking(name)) {
+ exit(1);
+ }
+
+ sbt_tdp_stats.reset();
+ sbt_rtp_stats.reset();
+
+ // Compute initial search point
+ local_result.valid = false;
+ compute_base_power();
+
+ // Set up the cache by searching design space with cacti
+ list<uca_org_t > candidate_solutions(0);
+ list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
+ uca_org_t* temp_res = NULL;
+ double throughput = l_ip.throughput;
+ double latency = l_ip.latency;
+ bool throughput_overflow = true;
+ bool latency_overflow = true;
+
+ if ((local_result.cycle_time - throughput) <= 1e-10 )
+ throughput_overflow = false;
+ if ((local_result.access_time - latency) <= 1e-10)
+ latency_overflow = false;
+
+ if (opt_for_clk && opt_local) {
+ if (throughput_overflow || latency_overflow) {
+ l_ip.ed = ed;
+
+ l_ip.delay_wt = delay_wt;
+ l_ip.cycle_time_wt = cycle_time_wt;
+
+ l_ip.area_wt = area_wt;
+ l_ip.dynamic_power_wt = dynamic_power_wt;
+ l_ip.leakage_power_wt = leakage_power_wt;
+
+ l_ip.delay_dev = delay_dev;
+ l_ip.cycle_time_dev = cycle_time_dev;
+
+ l_ip.area_dev = area_dev;
+ l_ip.dynamic_power_dev = dynamic_power_dev;
+ l_ip.leakage_power_dev = leakage_power_dev;
+
+ //Reset overflow flag before start optimization iterations
+ throughput_overflow = true;
+ latency_overflow = true;
+
+ //Clean up the result for optimized for ED^2P
+ temp_res = &local_result;
+ temp_res->cleanup();
+ }
+
+
+ while ((throughput_overflow || latency_overflow) &&
+ l_ip.cycle_time_dev > cycle_time_dev_threshold) {
+ compute_base_power();
+
+ //This is the time_dev to be used for next iteration
+ l_ip.cycle_time_dev -= cycle_time_dev_threshold;
+
+ // from best area to worst area -->worst timing to best timing
+ if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
+ (local_result.access_time - latency) <= 1e-10) ||
+ (local_result.data_array2->area_efficiency <
+ area_efficiency_threshold && l_ip.assoc == 0)) {
+ //if no satisfiable solution is found,the most aggressive one
+ //is left
+ candidate_solutions.push_back(local_result);
+ if (((local_result.cycle_time - throughput) <= 1e-10) &&
+ ((local_result.access_time - latency) <= 1e-10)) {
+ //ensure stop opt not because of cam
+ throughput_overflow = false;
+ latency_overflow = false;
+ }
+
+ } else {
+ if ((local_result.cycle_time - throughput) <= 1e-10)
+ throughput_overflow = false;
+ if ((local_result.access_time - latency) <= 1e-10)
+ latency_overflow = false;
+
+ //if not >10 local_result is the last result, it cannot be
+ //cleaned up
+ if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
+ //Only solutions not saved in the list need to be
+ //cleaned up
+ temp_res = &local_result;
+ temp_res->cleanup();
+ }
+ }
+ }
+
+
+ if (l_ip.assoc > 0) {
+ //For array structures except CAM and FA, Give warning but still
+ //provide a result with best timing found
+ if (throughput_overflow == true)
+ cout << "Warning: " << name
+ << " array structure cannot satisfy throughput constraint."
+ << endl;
+ if (latency_overflow == true)
+ cout << "Warning: " << name
+ << " array structure cannot satisfy latency constraint."
+ << endl;
+ }
+
+ double min_dynamic_energy = BIGNUM;
+ if (candidate_solutions.empty() == false) {
+ local_result.valid = true;
+ for (candidate_iter = candidate_solutions.begin();
+ candidate_iter != candidate_solutions.end();
+ ++candidate_iter) {
+ if (min_dynamic_energy >
+ (candidate_iter)->power.readOp.dynamic) {
+ min_dynamic_energy =
+ (candidate_iter)->power.readOp.dynamic;
+ min_dynamic_energy_iter = candidate_iter;
+ local_result = *(min_dynamic_energy_iter);
+
+ } else {
+ candidate_iter->cleanup() ;
+ }
+
+ }
+
+
+ }
+ candidate_solutions.clear();
+ }
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, core_ty);
+
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ double chip_PR_overhead = g_tp.chip_layout_overhead;
+ double total_overhead = macro_layout_overhead * chip_PR_overhead;
+ local_result.area *= total_overhead;
+
+ //maintain constant power density
+ double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
+
+ double sckRation = g_tp.sckt_co_eff;
+ local_result.power.readOp.dynamic *= sckRation;
+ local_result.power.writeOp.dynamic *= sckRation;
+ local_result.power.searchOp.dynamic *= sckRation;
+ local_result.power.readOp.leakage *= l_ip.nbanks;
+ local_result.power.readOp.longer_channel_leakage =
+ local_result.power.readOp.leakage * long_channel_device_reduction;
+ local_result.power = local_result.power * pppm_t;
+
+ local_result.data_array2->power.readOp.dynamic *= sckRation;
+ local_result.data_array2->power.writeOp.dynamic *= sckRation;
+ local_result.data_array2->power.searchOp.dynamic *= sckRation;
+ local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.data_array2->power.readOp.longer_channel_leakage =
+ local_result.data_array2->power.readOp.leakage *
+ long_channel_device_reduction;
+ local_result.data_array2->power = local_result.data_array2->power * pppm_t;
+
+
+ if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
+ local_result.tag_array2->power.readOp.dynamic *= sckRation;
+ local_result.tag_array2->power.writeOp.dynamic *= sckRation;
+ local_result.tag_array2->power.searchOp.dynamic *= sckRation;
+ local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.tag_array2->power.readOp.longer_channel_leakage =
+ local_result.tag_array2->power.readOp.leakage *
+ long_channel_device_reduction;
+ local_result.tag_array2->power =
+ local_result.tag_array2->power * pppm_t;
+ }
+}
+
+void CacheArray::compute_base_power() {
+ local_result = cacti_interface(&l_ip);
+}
+
+void CacheArray::computeArea() {
+ area.set_area(local_result.area);
+ output_data.area = local_result.area / 1e6;
+}
+
+void CacheArray::computeEnergy() {
+ // Set the leakage power numbers
+ output_data.subthreshold_leakage_power = local_result.power.readOp.leakage;
+ output_data.gate_leakage_power = local_result.power.readOp.gate_leakage;
+
+ if (l_ip.assoc && l_ip.is_cache) {
+ // This is a standard cache array with data and tags
+ // Calculate peak dynamic power
+ output_data.peak_dynamic_power =
+ (local_result.tag_array2->power.readOp.dynamic +
+ local_result.data_array2->power.readOp.dynamic) *
+ tdp_stats.readAc.hit +
+ (local_result.tag_array2->power.readOp.dynamic) *
+ tdp_stats.readAc.miss +
+ (local_result.tag_array2->power.readOp.dynamic +
+ local_result.data_array2->power.writeOp.dynamic) *
+ tdp_stats.writeAc.hit +
+ (local_result.tag_array2->power.readOp.dynamic) *
+ tdp_stats.writeAc.miss;
+ output_data.peak_dynamic_power *= clockRate;
+
+ // Calculate the runtime dynamic power
+ output_data.runtime_dynamic_energy =
+ local_result.data_array2->power.readOp.dynamic *
+ rtp_stats.dataReadAc.access +
+ local_result.data_array2->power.writeOp.dynamic *
+ rtp_stats.dataWriteAc.access +
+ (local_result.tag_array2->power.readOp.dynamic *
+ rtp_stats.tagReadAc.access +
+ local_result.tag_array2->power.writeOp.dynamic *
+ rtp_stats.tagWriteAc.access) * l_ip.assoc;
+ } else {
+ // Calculate peak dynamic power
+ output_data.peak_dynamic_power =
+ local_result.power.readOp.dynamic * tdp_stats.readAc.access +
+ local_result.power.writeOp.dynamic * tdp_stats.writeAc.access +
+ local_result.power.searchOp.dynamic * tdp_stats.searchAc.access;
+ output_data.peak_dynamic_power *= clockRate;
+
+ // Calculate the runtime dynamic power
+ output_data.runtime_dynamic_energy =
+ local_result.power.readOp.dynamic * rtp_stats.readAc.access +
+ local_result.power.writeOp.dynamic * rtp_stats.writeAc.access +
+ local_result.power.searchOp.dynamic * rtp_stats.searchAc.access;
+ }
+
+ // An SBT directory has more dynamic power
+ if (sbt_dir_overhead > 0) {
+ // Calculate peak dynamic power
+ output_data.peak_dynamic_power +=
+ (computeSBTDynEnergy(&sbt_tdp_stats) * clockRate);
+
+ // Calculate the runtime dynamic power
+ output_data.runtime_dynamic_energy +=
+ computeSBTDynEnergy(&sbt_rtp_stats);
+ }
+}
+
+CacheArray::~CacheArray() {
+ local_result.cleanup();
+}
diff --git a/ext/mcpat/cachearray.h b/ext/mcpat/cachearray.h
new file mode 100644
index 000000000..ba55ffcd1
--- /dev/null
+++ b/ext/mcpat/cachearray.h
@@ -0,0 +1,117 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#ifndef CACHEARRAY_H_
+#define CACHEARRAY_H_
+
+#include <iostream>
+#include <string>
+
+#include "basic_components.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "const.h"
+#include "parameter.h"
+
+class CacheArray : public McPATComponent {
+public:
+ static double area_efficiency_threshold;
+
+ // These are used for the CACTI interface.
+ static int ed;
+ static int delay_wt;
+ static int cycle_time_wt;
+ static int area_wt;
+ static int dynamic_power_wt;
+ static int leakage_power_wt;
+ static int delay_dev;
+ static int cycle_time_dev;
+ static int area_dev;
+ static int dynamic_power_dev;
+ static int leakage_power_dev;
+ static int cycle_time_dev_threshold;
+
+ InputParameter l_ip;
+ enum Device_ty device_ty;
+ bool opt_local;
+ enum Core_type core_ty;
+ bool is_default;
+ uca_org_t local_result;
+
+ // These are only used for static bank tag (SBT) directory type.
+ double sbt_dir_overhead;
+ // Set this to contain SBT peak power stats
+ statsDef sbt_tdp_stats;
+ // Set this to contain SBT runtime power stats
+ statsDef sbt_rtp_stats;
+
+ CacheArray(XMLNode* _xml_data, const InputParameter *configure_interface,
+ string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
+ bool opt_local_ = true,
+ enum Core_type core_ty_ = Inorder, bool _is_default = true);
+ void computeArea();
+ void computeEnergy();
+ void compute_base_power();
+ void setSBTDirOverhead(double overhead) { sbt_dir_overhead = overhead; }
+ ~CacheArray();
+
+ private:
+ double computeSBTDynEnergy(statsDef *sbt_stats_ptr);
+};
+
+extern inline
+double CacheArray::computeSBTDynEnergy(statsDef *sbt_stats_p) {
+ if (sbt_dir_overhead == 0) {
+ return 0;
+ }
+
+ // Write miss on dynamic home node will generate a replacement write on
+ // whole cache block
+ double dynamic =
+ sbt_stats_p->readAc.hit *
+ (local_result.data_array2->power.readOp.dynamic * sbt_dir_overhead +
+ local_result.tag_array2->power.readOp.dynamic) +
+ sbt_stats_p->readAc.miss *
+ local_result.tag_array2->power.readOp.dynamic +
+ sbt_stats_p->writeAc.miss *
+ local_result.tag_array2->power.readOp.dynamic +
+ sbt_stats_p->writeAc.hit *
+ (local_result.data_array2->power.writeOp.dynamic * sbt_dir_overhead +
+ local_result.tag_array2->power.readOp.dynamic+
+ sbt_stats_p->writeAc.miss *
+ local_result.power.writeOp.dynamic);
+ return dynamic;
+}
+
+#endif /* CACHEARRAY_H_ */
diff --git a/ext/mcpat/cachecontroller.cc b/ext/mcpat/cachecontroller.cc
new file mode 100644
index 000000000..6b505aac3
--- /dev/null
+++ b/ext/mcpat/cachecontroller.cc
@@ -0,0 +1,42 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Joel Hestness
+ *
+ ***************************************************************************/
+
+#include "cachecontroller.h"
+
+CacheController::CacheController(XMLNode* _xml_data,
+ InputParameter* _interface_ip)
+ : McPATComponent(_xml_data, _interface_ip) {
+ name = "Cache Controller";
+ clockRate = target_core_clockrate;
+ McPATComponent::recursiveInstantiate();
+}
diff --git a/ext/mcpat/globalvar.h b/ext/mcpat/cachecontroller.h
index 953257653..26eccb6de 100644
--- a/ext/mcpat/globalvar.h
+++ b/ext/mcpat/cachecontroller.h
@@ -1,7 +1,7 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,24 +25,21 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Joel Hestness
*
***************************************************************************/
+#ifndef CACHECONTROLLER_H_
+#define CACHECONTROLLER_H_
-#ifndef GLOBALVAR_H_
-#define GLOBALVAR_H_
-
-#ifdef GLOBALVAR
-#define EXTERN
-#else
-#define EXTERN extern
-#endif
-
-EXTERN bool opt_for_clk;
-
-#endif /* GLOBALVAR_H_ */
-
-
+#include "basic_components.h"
+class CacheController : public McPATComponent {
+public:
+ CacheController(XMLNode* _xml_data, InputParameter* _interface_ip);
+ ~CacheController();
+};
+#endif /* CACHECONTROLLER_H_ */
diff --git a/ext/mcpat/cacheunit.cc b/ext/mcpat/cacheunit.cc
new file mode 100644
index 000000000..3b9e84749
--- /dev/null
+++ b/ext/mcpat/cacheunit.cc
@@ -0,0 +1,647 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <iostream>
+
+#include "arbiter.h"
+#include "array.h"
+#include "basic_circuit.h"
+#include "cachearray.h"
+#include "cacheunit.h"
+#include "common.h"
+#include "const.h"
+#include "io.h"
+#include "logic.h"
+#include "parameter.h"
+
+bool CacheUnit::is_cache = true;
+bool CacheUnit::pure_cam = false;
+bool CacheUnit::opt_local = true;
+bool CacheUnit::force_cache_config = false;
+
+CacheUnit::CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip)
+ : dir_overhead(0), McPATComponent(_xml_data, _interface_ip) {
+
+ int tag;
+ int data;
+
+ name = "Cache Unit";
+ CacheArray* arrayPtr = NULL;
+
+ set_cache_param_from_xml_data();
+
+ //All lower level cache are physically indexed and tagged.
+ double size;
+ double line;
+ double assoc;
+ double banks;
+ size = cache_params.capacity;
+ line = cache_params.blockW;
+ assoc = cache_params.assoc;
+ banks = cache_params.nbanks;
+ if ((cache_params.dir_ty == ST &&
+ cache_params.cache_level == L1Directory) ||
+ (cache_params.dir_ty == ST &&
+ cache_params.cache_level == L2Directory)) {
+ tag = physical_address_width + EXTRA_TAG_BITS;
+ } else {
+ tag = physical_address_width - int(ceil(log2(size / line / assoc))) -
+ int(ceil(log2(line))) + EXTRA_TAG_BITS;
+
+ if (cache_params.dir_ty == SBT) {
+ dir_overhead = ceil(cache_params.num_cores / BITS_PER_BYTE) *
+ BITS_PER_BYTE / (line * BITS_PER_BYTE);
+ line *= (1 + dir_overhead);
+ size *= (1 + dir_overhead);
+ }
+ }
+
+ interface_ip.cache_sz = (int)size;
+ interface_ip.line_sz = (int)line;
+ interface_ip.assoc = (int)assoc;
+ interface_ip.nbanks = (int)banks;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+
+ if (cache_params.cache_level == L1) {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ } else {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
+ }
+
+ interface_ip.access_mode = cache_params.cache_access_mode;
+ interface_ip.throughput= cache_params.throughput;
+ interface_ip.latency = cache_params.latency;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.is_cache = is_cache;
+ interface_ip.pure_ram = cache_params.pure_ram;
+ interface_ip.pure_cam = pure_cam;
+ interface_ip.num_rw_ports = cache_params.cache_rw_ports;
+ interface_ip.num_rd_ports = cache_params.cache_rd_ports;
+ interface_ip.num_wr_ports = cache_params.cache_wr_ports;
+ interface_ip.num_se_rd_ports = cache_params.cache_se_rd_ports;
+ interface_ip.num_search_ports = cache_params.cache_search_ports;
+
+ arrayPtr = new CacheArray(xml_data, &interface_ip, "Data and Tag Arrays",
+ cache_params.device_ty, clockRate, opt_local,
+ cache_params.core_ty);
+ children.push_back(arrayPtr);
+
+ // This is for calculating TDP, which depends on the number of
+ // available ports
+ int num_tdp_ports = arrayPtr->l_ip.num_rw_ports +
+ arrayPtr->l_ip.num_rd_ports + arrayPtr->l_ip.num_wr_ports;
+
+ // Set new array stats for calculating TDP and runtime power
+ arrayPtr->tdp_stats.reset();
+ arrayPtr->tdp_stats.readAc.access = cache_stats.tdp_read_access_scalar *
+ num_tdp_ports * cache_stats.duty_cycle *
+ cache_stats.homenode_access_scalar;
+ arrayPtr->tdp_stats.readAc.miss = 0;
+ arrayPtr->tdp_stats.readAc.hit = arrayPtr->tdp_stats.readAc.access -
+ arrayPtr->tdp_stats.readAc.miss;
+ arrayPtr->tdp_stats.writeAc.access = cache_stats.tdp_write_access_scalar *
+ num_tdp_ports * cache_stats.duty_cycle *
+ cache_stats.homenode_access_scalar;
+ arrayPtr->tdp_stats.writeAc.miss = 0;
+ arrayPtr->tdp_stats.writeAc.hit = arrayPtr->tdp_stats.writeAc.access -
+ arrayPtr->tdp_stats.writeAc.miss;
+ arrayPtr->tdp_stats.searchAc.access = 0;
+ arrayPtr->tdp_stats.searchAc.miss = 0;
+ arrayPtr->tdp_stats.searchAc.hit = 0;
+
+ arrayPtr->rtp_stats.reset();
+ if (cache_stats.use_detailed_stats) {
+ arrayPtr->rtp_stats.dataReadAc.access =
+ cache_stats.num_data_array_reads;
+ arrayPtr->rtp_stats.dataWriteAc.access =
+ cache_stats.num_data_array_writes;
+ arrayPtr->rtp_stats.tagReadAc.access =
+ cache_stats.num_tag_array_reads;
+ arrayPtr->rtp_stats.tagWriteAc.access =
+ cache_stats.num_tag_array_writes;
+ } else {
+ // This code makes assumptions. For instance, it assumes that
+ // tag and data arrays are accessed in parallel on a read request and
+ // this is a write-allocate cache. It also ignores any coherence
+ // requests. Using detailed stats as above can avoid the ambiguity
+ // that is introduced here
+ arrayPtr->rtp_stats.dataReadAc.access =
+ cache_stats.read_accesses + cache_stats.write_misses;
+ arrayPtr->rtp_stats.dataWriteAc.access =
+ cache_stats.write_accesses + cache_stats.read_misses;
+ arrayPtr->rtp_stats.tagReadAc.access =
+ cache_stats.read_accesses + cache_stats.write_accesses;
+ arrayPtr->rtp_stats.tagWriteAc.access =
+ cache_stats.read_misses + cache_stats.write_misses;
+ }
+
+ // Set SBT stats if this is an SBT directory type
+ if (dir_overhead > 0) {
+ arrayPtr->setSBTDirOverhead(dir_overhead);
+
+ // TDP stats
+ arrayPtr->sbt_tdp_stats.readAc.access =
+ cache_stats.tdp_read_access_scalar *
+ num_tdp_ports * cache_stats.dir_duty_cycle *
+ (1 - cache_stats.homenode_access_scalar);
+ arrayPtr->sbt_tdp_stats.readAc.miss = 0;
+ arrayPtr->sbt_tdp_stats.readAc.hit =
+ arrayPtr->sbt_tdp_stats.readAc.access -
+ arrayPtr->sbt_tdp_stats.readAc.miss;
+ arrayPtr->sbt_tdp_stats.writeAc.access =
+ cache_stats.tdp_sbt_write_access_scalar *
+ num_tdp_ports * cache_stats.dir_duty_cycle *
+ (1 - cache_stats.homenode_access_scalar);
+ arrayPtr->sbt_tdp_stats.writeAc.miss = 0;
+ arrayPtr->sbt_tdp_stats.writeAc.hit =
+ arrayPtr->sbt_tdp_stats.writeAc.access -
+ arrayPtr->sbt_tdp_stats.writeAc.miss;
+
+ // Runtime power stats
+ arrayPtr->sbt_rtp_stats.readAc.access =
+ cache_stats.homenode_read_accesses;
+ arrayPtr->sbt_rtp_stats.readAc.miss =
+ cache_stats.homenode_read_misses;
+ arrayPtr->sbt_rtp_stats.readAc.access =
+ cache_stats.homenode_read_accesses -
+ cache_stats.homenode_read_misses;
+ arrayPtr->sbt_rtp_stats.writeAc.access =
+ cache_stats.homenode_write_accesses;
+ arrayPtr->sbt_rtp_stats.writeAc.miss =
+ cache_stats.homenode_write_misses;
+ arrayPtr->sbt_rtp_stats.writeAc.hit =
+ cache_stats.homenode_write_accesses -
+ cache_stats.homenode_write_misses;
+ }
+
+ interface_ip.force_cache_config = force_cache_config;
+ if (!((cache_params.dir_ty == ST &&
+ cache_params.cache_level == L1Directory) ||
+ (cache_params.dir_ty == ST &&
+ cache_params.cache_level== L2Directory))) {
+ // Miss Buffer
+ tag = physical_address_width + EXTRA_TAG_BITS;
+ data = (physical_address_width) +
+ int(ceil(log2(size / cache_params.blockW))) +
+ (cache_params.blockW * BITS_PER_BYTE);
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = cache_params.missb_size * line;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = cache_params.missb_assoc;
+ interface_ip.nbanks = cache_params.missb_banks;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+
+ if (cache_params.cache_level == L1) {
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ } else {
+ interface_ip.out_w = line * BITS_PER_BYTE / 2;
+ }
+
+ interface_ip.access_mode = cache_params.miss_buff_access_mode;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.is_cache = is_cache;
+ interface_ip.pure_ram = cache_params.pure_ram;
+ interface_ip.pure_cam = pure_cam;
+ interface_ip.throughput = cache_params.throughput;
+ interface_ip.latency = cache_params.latency;
+ interface_ip.num_rw_ports = cache_params.miss_buff_rw_ports;
+ interface_ip.num_rd_ports = cache_params.miss_buff_rd_ports;
+ interface_ip.num_wr_ports = cache_params.miss_buff_wr_ports;
+ interface_ip.num_se_rd_ports = cache_params.miss_buff_se_rd_ports;
+ interface_ip.num_search_ports = cache_params.miss_buff_search_ports;
+
+ arrayPtr = new CacheArray(xml_data, &interface_ip, "Miss Buffer",
+ cache_params.device_ty, clockRate, opt_local,
+ cache_params.core_ty);
+ children.push_back(arrayPtr);
+
+ arrayPtr->tdp_stats.reset();
+ arrayPtr->tdp_stats.readAc.access = 0;
+ arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
+ arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
+
+ arrayPtr->rtp_stats.reset();
+ arrayPtr->rtp_stats.readAc.access =
+ cache_stats.read_misses + cache_stats.write_misses;
+ arrayPtr->rtp_stats.writeAc.access =
+ cache_stats.read_misses + cache_stats.write_misses;
+ arrayPtr->rtp_stats.searchAc.access = 0;
+
+ if (cache_params.dir_ty == SBT) {
+ arrayPtr->rtp_stats.readAc.access +=
+ cache_stats.homenode_write_misses;
+ arrayPtr->rtp_stats.writeAc.access +=
+ cache_stats.homenode_write_misses;
+ }
+
+ // Fill Buffer
+ tag = physical_address_width + EXTRA_TAG_BITS;
+ data = cache_params.blockW;
+
+ interface_ip.cache_sz = data * cache_params.fu_size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = cache_params.fu_assoc;
+ interface_ip.nbanks = cache_params.fu_banks;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+
+ if (cache_params.cache_level == L1) {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ } else {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
+ }
+
+ interface_ip.access_mode = cache_params.fetch_buff_access_mode;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.is_cache = is_cache;
+ interface_ip.pure_cam = pure_cam;
+ interface_ip.throughput = cache_params.throughput;
+ interface_ip.latency = cache_params.latency;
+ interface_ip.num_rw_ports = cache_params.fetch_buff_rw_ports;
+ interface_ip.num_rd_ports = cache_params.fetch_buff_rd_ports;
+ interface_ip.num_wr_ports = cache_params.fetch_buff_wr_ports;
+ interface_ip.num_se_rd_ports = cache_params.fetch_buff_se_rd_ports;
+ interface_ip.num_search_ports = cache_params.fetch_buff_search_ports;
+ arrayPtr = new CacheArray(xml_data, &interface_ip, "Fill Buffer",
+ cache_params.device_ty, clockRate, opt_local,
+ cache_params.core_ty);
+ children.push_back(arrayPtr);
+
+ arrayPtr->tdp_stats.reset();
+ arrayPtr->tdp_stats.readAc.access = 0;
+ arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
+ arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
+
+ arrayPtr->rtp_stats.reset();
+ arrayPtr->rtp_stats.readAc.access =
+ cache_stats.read_misses + cache_stats.write_misses;
+ arrayPtr->rtp_stats.writeAc.access =
+ cache_stats.read_misses + cache_stats.write_misses;
+ arrayPtr->rtp_stats.searchAc.access = 0;
+
+ if (cache_params.dir_ty == SBT) {
+ arrayPtr->rtp_stats.readAc.access +=
+ cache_stats.homenode_write_misses;
+ arrayPtr->rtp_stats.writeAc.access +=
+ cache_stats.homenode_write_misses;
+ }
+
+ // Prefetch Buffer
+ tag = physical_address_width + EXTRA_TAG_BITS;
+ line = cache_params.blockW;
+
+ interface_ip.cache_sz = cache_params.prefetchb_size * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = cache_params.prefetchb_assoc;
+ interface_ip.nbanks = cache_params.prefetchb_banks;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+
+ if (cache_params.cache_level == L1) {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ } else {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
+ }
+
+ interface_ip.access_mode = cache_params.prefetch_buff_access_mode;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.is_cache = is_cache;
+ interface_ip.pure_ram = cache_params.pure_ram;
+ interface_ip.pure_cam = pure_cam;
+ interface_ip.throughput = cache_params.throughput;
+ interface_ip.latency = cache_params.latency;
+ interface_ip.num_rw_ports = cache_params.pf_buff_rw_ports;
+ interface_ip.num_rd_ports = cache_params.pf_buff_rd_ports;
+ interface_ip.num_wr_ports = cache_params.pf_buff_wr_ports;
+ interface_ip.num_se_rd_ports = cache_params.pf_buff_se_rd_ports;
+ interface_ip.num_search_ports = cache_params.pf_buff_search_ports;
+ arrayPtr = new CacheArray(xml_data, &interface_ip, "Prefetch Buffer",
+ cache_params.device_ty, clockRate, opt_local,
+ cache_params.core_ty);
+ children.push_back(arrayPtr);
+
+ arrayPtr->tdp_stats.reset();
+ arrayPtr->tdp_stats.readAc.access = 0;
+ arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
+ arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
+
+ arrayPtr->rtp_stats.reset();
+ arrayPtr->rtp_stats.readAc.access = cache_stats.read_misses;
+ arrayPtr->rtp_stats.writeAc.access = cache_stats.read_misses;
+ arrayPtr->rtp_stats.searchAc.access = 0;
+
+ if (cache_params.dir_ty == SBT) {
+ arrayPtr->rtp_stats.readAc.access +=
+ cache_stats.homenode_write_misses;
+ arrayPtr->rtp_stats.writeAc.access +=
+ cache_stats.homenode_write_misses;
+ }
+
+ // Writeback Buffer
+ if (cache_params.wbb_size > 0) {
+ tag = physical_address_width + EXTRA_TAG_BITS;
+ line = cache_params.blockW;
+
+ interface_ip.cache_sz = cache_params.wbb_size * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = cache_params.wbb_assoc;
+ interface_ip.nbanks = cache_params.wbb_banks;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+
+ if (cache_params.cache_level == L1) {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ } else {
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
+ }
+
+ interface_ip.access_mode = cache_params.writeback_buff_access_mode;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.is_cache = is_cache;
+ interface_ip.pure_ram = cache_params.pure_ram;
+ interface_ip.pure_cam = pure_cam;
+ interface_ip.throughput = cache_params.throughput;
+ interface_ip.latency = cache_params.latency;
+ interface_ip.num_rw_ports = cache_params.wb_buff_rw_ports;
+ interface_ip.num_rd_ports = cache_params.wb_buff_rd_ports;
+ interface_ip.num_wr_ports = cache_params.wb_buff_wr_ports;
+ interface_ip.num_se_rd_ports = cache_params.wb_buff_se_rd_ports;
+ interface_ip.num_search_ports = cache_params.wb_buff_search_ports;
+ arrayPtr = new CacheArray(xml_data, &interface_ip,
+ "Writeback Buffer",
+ cache_params.device_ty, clockRate,
+ opt_local, cache_params.core_ty);
+ children.push_back(arrayPtr);
+
+ arrayPtr->tdp_stats.reset();
+ arrayPtr->tdp_stats.readAc.access = 0;
+ arrayPtr->tdp_stats.writeAc.access =
+ arrayPtr->l_ip.num_search_ports;
+ arrayPtr->tdp_stats.searchAc.access =
+ arrayPtr->l_ip.num_search_ports;
+
+ arrayPtr->rtp_stats.reset();
+ arrayPtr->rtp_stats.readAc.access = cache_stats.write_misses;
+ arrayPtr->rtp_stats.writeAc.access = cache_stats.write_misses;
+ arrayPtr->rtp_stats.searchAc.access = 0;
+
+ if (cache_params.dir_ty == SBT) {
+ arrayPtr->rtp_stats.readAc.access +=
+ cache_stats.homenode_write_misses;
+ arrayPtr->rtp_stats.writeAc.access +=
+ cache_stats.homenode_write_misses;
+ }
+ }
+ }
+}
+
+void CacheUnit::computeEnergy() {
+ McPATComponent::computeEnergy();
+}
+
+void CacheUnit::set_cache_param_from_xml_data() {
+ int level, type;
+
+ // Initialization... move this?
+ memset(&cache_params, 0, sizeof(CacheParameters));
+ memset(&cache_stats, 0, sizeof(CacheStatistics));
+
+ // By default, use the core clock frequency. This can be changed by
+ // setting the clockrate param in the XML definition of the CacheUnit
+ clockRate = target_core_clockrate;
+ XMLCSTR comp_name = xml_data->getAttribute("name");
+ if (comp_name) {
+ name = comp_name;
+ }
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ int tech_type;
+ int mat_type;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("level", level);
+ ASSIGN_FP_IF("size", cache_params.capacity);
+ ASSIGN_FP_IF("block_size", cache_params.blockW);
+ ASSIGN_FP_IF("assoc", cache_params.assoc);
+ ASSIGN_FP_IF("num_banks", cache_params.nbanks);
+ ASSIGN_FP_IF("latency", cache_params.latency);
+ ASSIGN_FP_IF("throughput", cache_params.throughput);
+ ASSIGN_INT_IF("miss_buffer_size", cache_params.missb_size);
+ ASSIGN_INT_IF("fetch_buffer_size", cache_params.fu_size);
+ ASSIGN_INT_IF("prefetch_buffer_size", cache_params.prefetchb_size);
+ ASSIGN_INT_IF("writeback_buffer_size", cache_params.wbb_size);
+ ASSIGN_INT_IF("miss_buffer_assoc", cache_params.missb_assoc);
+ ASSIGN_INT_IF("fetch_buffer_assoc", cache_params.fu_assoc);
+ ASSIGN_INT_IF("prefetch_buffer_assoc", cache_params.prefetchb_assoc);
+ ASSIGN_INT_IF("writeback_buffer_assoc", cache_params.wbb_assoc);
+ ASSIGN_INT_IF("miss_buffer_banks", cache_params.missb_banks);
+ ASSIGN_INT_IF("fetch_buffer_banks", cache_params.fu_banks);
+ ASSIGN_INT_IF("prefetch_buffer_banks", cache_params.prefetchb_banks);
+ ASSIGN_INT_IF("writeback_buffer_banks", cache_params.wbb_banks);
+ ASSIGN_ENUM_IF("cache_access_mode",
+ cache_params.cache_access_mode, Access_mode);
+ ASSIGN_ENUM_IF("miss_buff_access_mode",
+ cache_params.miss_buff_access_mode, Access_mode);
+ ASSIGN_ENUM_IF("fetch_buff_access_mode",
+ cache_params.fetch_buff_access_mode, Access_mode);
+ ASSIGN_ENUM_IF("prefetch_buff_access_mode",
+ cache_params.prefetch_buff_access_mode, Access_mode);
+ ASSIGN_ENUM_IF("writeback_buff_access_mode",
+ cache_params.writeback_buff_access_mode, Access_mode);
+ ASSIGN_INT_IF("cache_rw_ports", cache_params.cache_rw_ports);
+ ASSIGN_INT_IF("cache_rd_ports", cache_params.cache_rd_ports);
+ ASSIGN_INT_IF("cache_wr_ports", cache_params.cache_wr_ports);
+ ASSIGN_INT_IF("cache_se_rd_ports", cache_params.cache_se_rd_ports);
+ ASSIGN_INT_IF("cache_search_ports", cache_params.cache_search_ports);
+ ASSIGN_INT_IF("miss_buff_rw_ports", cache_params.miss_buff_rw_ports);
+ ASSIGN_INT_IF("miss_buff_rd_ports", cache_params.miss_buff_rd_ports);
+ ASSIGN_INT_IF("miss_buff_wr_ports", cache_params.miss_buff_wr_ports);
+ ASSIGN_INT_IF("miss_buff_se_rd_ports" ,
+ cache_params.miss_buff_se_rd_ports);
+ ASSIGN_INT_IF("miss_buff_search_ports",
+ cache_params.miss_buff_search_ports);
+ ASSIGN_INT_IF("fetch_buff_rw_ports", cache_params.fetch_buff_rw_ports);
+ ASSIGN_INT_IF("fetch_buff_rd_ports", cache_params.fetch_buff_rd_ports);
+ ASSIGN_INT_IF("fetch_buff_wr_ports", cache_params.fetch_buff_wr_ports);
+ ASSIGN_INT_IF("fetch_buff_se_rd_ports",
+ cache_params.fetch_buff_se_rd_ports);
+ ASSIGN_INT_IF("fetch_buff_search_ports",
+ cache_params.fetch_buff_search_ports);
+ ASSIGN_INT_IF("pf_buff_rw_ports", cache_params.pf_buff_rw_ports);
+ ASSIGN_INT_IF("pf_buff_rd_ports", cache_params.pf_buff_rd_ports);
+ ASSIGN_INT_IF("pf_buff_wr_ports", cache_params.pf_buff_wr_ports);
+ ASSIGN_INT_IF("pf_buff_se_rd_ports", cache_params.pf_buff_se_rd_ports);
+ ASSIGN_INT_IF("pf_buff_search_ports",
+ cache_params.pf_buff_search_ports);
+ ASSIGN_INT_IF("wb_buff_rw_ports", cache_params.wb_buff_rw_ports);
+ ASSIGN_INT_IF("wb_buff_rd_ports", cache_params.wb_buff_rd_ports);
+ ASSIGN_INT_IF("wb_buff_wr_ports", cache_params.wb_buff_wr_ports);
+ ASSIGN_INT_IF("wb_buff_se_rd_ports", cache_params.wb_buff_se_rd_ports);
+ ASSIGN_INT_IF("wb_buff_search_ports",
+ cache_params.wb_buff_search_ports);
+ ASSIGN_FP_IF("clockrate", cache_params.clockRate);
+ ASSIGN_INT_IF("pure_ram", cache_params.pure_ram);
+ ASSIGN_INT_IF("tech_type", tech_type);
+ ASSIGN_ENUM_IF("Directory_type", cache_params.dir_ty, Dir_type);
+ ASSIGN_ENUM_IF("device_type", cache_params.device_ty, Device_ty);
+ ASSIGN_ENUM_IF("core_type", cache_params.core_ty, Core_type);
+ ASSIGN_INT_IF("num_cores", cache_params.num_cores);
+ ASSIGN_INT_IF("wire_mat_type", mat_type);
+ ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ // Change from MHz to Hz
+ cache_params.clockRate *= 1e6;
+ if (cache_params.clockRate > 0) {
+ clockRate = cache_params.clockRate;
+ }
+
+ interface_ip.data_arr_ram_cell_tech_type = tech_type;
+ interface_ip.data_arr_peri_global_tech_type = tech_type;
+ interface_ip.tag_arr_ram_cell_tech_type = tech_type;
+ interface_ip.tag_arr_peri_global_tech_type = tech_type;
+
+ interface_ip.wire_is_mat_type = mat_type;
+ interface_ip.wire_os_mat_type = mat_type;
+
+ switch(level) {
+ case 1:
+ cache_params.cache_level = L1;
+ break;
+ case 2:
+ cache_params.cache_level = L2;
+ break;
+ case 3:
+ cache_params.cache_level = L3;
+ break;
+ case 4:
+ cache_params.cache_level = L1Directory;
+ break;
+ case 5:
+ cache_params.cache_level = L2Directory;
+ break;
+
+ default:
+ fprintf(stderr, "ERROR: Unrecognized cache level in %s: %d\n",
+ name.c_str(), level);
+ exit(1);
+ }
+
+ cache_stats.use_detailed_stats = false;
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("num_data_array_reads", cache_stats.num_data_array_reads);
+ ASSIGN_FP_IF("num_data_array_writes",
+ cache_stats.num_data_array_writes);
+ ASSIGN_FP_IF("num_tag_array_reads", cache_stats.num_tag_array_reads);
+ ASSIGN_FP_IF("num_tag_array_writes", cache_stats.num_tag_array_writes);
+ ASSIGN_FP_IF("duty_cycle", cache_stats.duty_cycle);
+ ASSIGN_FP_IF("read_accesses", cache_stats.read_accesses);
+ ASSIGN_FP_IF("write_accesses", cache_stats.write_accesses);
+ ASSIGN_FP_IF("read_misses", cache_stats.read_misses);
+ ASSIGN_FP_IF("write_misses", cache_stats.write_misses);
+ ASSIGN_FP_IF("conflicts", cache_stats.conflicts);
+ ASSIGN_INT_IF("homenode_read_accesses",
+ cache_stats.homenode_read_accesses);
+ ASSIGN_INT_IF("homenode_write_accesses",
+ cache_stats.homenode_write_accesses);
+ ASSIGN_INT_IF("homenode_read_misses",
+ cache_stats.homenode_read_misses);
+ ASSIGN_INT_IF("homenode_write_misses",
+ cache_stats.homenode_write_misses);
+ ASSIGN_FP_IF("homenode_access_scalar",
+ cache_stats.homenode_access_scalar);
+ ASSIGN_FP_IF("tdp_read_access_scalar",
+ cache_stats.tdp_read_access_scalar);
+ ASSIGN_FP_IF("tdp_write_access_scalar",
+ cache_stats.tdp_write_access_scalar);
+ ASSIGN_FP_IF("tdp_sbt_write_access_scalar",
+ cache_stats.tdp_sbt_write_access_scalar);
+ ASSIGN_FP_IF("dir_duty_cycle",
+ cache_stats.dir_duty_cycle);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+
+ if (cache_stats.num_data_array_reads > 0 ||
+ cache_stats.num_data_array_writes > 0 ||
+ cache_stats.num_tag_array_reads > 0 ||
+ cache_stats.num_tag_array_writes > 0) {
+ cache_stats.use_detailed_stats = true;
+ calculate_runtime_data_and_tag = true;
+ }
+}
diff --git a/ext/mcpat/cacheunit.h b/ext/mcpat/cacheunit.h
new file mode 100644
index 000000000..e4429e74b
--- /dev/null
+++ b/ext/mcpat/cacheunit.h
@@ -0,0 +1,167 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#ifndef CACHEUNIT_H_
+#define CACHEUNIT_H_
+
+#include "area.h"
+#include "array.h"
+#include "basic_components.h"
+#include "logic.h"
+#include "parameter.h"
+
+class CacheParameters {
+public:
+ enum Dir_type dir_ty;
+ double clockRate;
+ double capacity;
+ double blockW;
+ double assoc;
+ double nbanks;
+ double throughput;
+ double latency;
+ int missb_size;
+ int fu_size;
+ int prefetchb_size;
+ int wbb_size;
+ int missb_assoc;
+ int fu_assoc;
+ int prefetchb_assoc;
+ int wbb_assoc;
+ int missb_banks;
+ int fu_banks;
+ int prefetchb_banks;
+ int wbb_banks;
+ enum Access_mode cache_access_mode;
+ enum Access_mode miss_buff_access_mode;
+ enum Access_mode fetch_buff_access_mode;
+ enum Access_mode prefetch_buff_access_mode;
+ enum Access_mode writeback_buff_access_mode;
+ int cache_rw_ports;
+ int cache_rd_ports;
+ int cache_wr_ports;
+ int cache_se_rd_ports;
+ int cache_search_ports;
+ int miss_buff_rw_ports;
+ int miss_buff_rd_ports;
+ int miss_buff_wr_ports;
+ int miss_buff_se_rd_ports;
+ int miss_buff_search_ports;
+ int fetch_buff_rw_ports;
+ int fetch_buff_rd_ports;
+ int fetch_buff_wr_ports;
+ int fetch_buff_se_rd_ports;
+ int fetch_buff_search_ports;
+ int pf_buff_rw_ports;
+ int pf_buff_rd_ports;
+ int pf_buff_wr_ports;
+ int pf_buff_se_rd_ports;
+ int pf_buff_search_ports;
+ int wb_buff_rw_ports;
+ int wb_buff_rd_ports;
+ int wb_buff_wr_ports;
+ int wb_buff_se_rd_ports;
+ int wb_buff_search_ports;
+ bool pure_ram;
+ enum CacheLevel cache_level;
+ enum Device_ty device_ty;
+ enum Core_type core_ty;
+ int num_cores;
+};
+
+class CacheStatistics {
+public:
+ // Duty cycle is used for estimating TDP. It should reflect the highest
+ // sustainable rate of access to the cache unit in execution of a benchmark
+ // Default should be 1.0: one access per cycle
+ double duty_cycle;
+ // This duty cycle is only used for SBT directory types
+ double dir_duty_cycle;
+ // The following two stats are also used for estimating TDP.
+ double tdp_read_access_scalar;
+ double tdp_write_access_scalar;
+ // There are 2 ways to calculate dynamic power from activity statistics:
+ // Default is false
+ bool use_detailed_stats;
+ // 1) Count the number and type of accesses to each cache array
+ // splitting data and tag arrays (use_detailed_stats = true).
+ // These are extremely detailed statistics.
+ // read_misses and write_misses are still required for this method for
+ // various buffers associated with this cache.
+ double num_data_array_reads;
+ double num_data_array_writes;
+ double num_tag_array_reads;
+ double num_tag_array_writes;
+ // 2) Count the number and type of access to the cache unit and
+ // use them to extrapolate the number of accesses to the other
+ // subcomponents (cache arrays and buffers)
+ double read_accesses;
+ double write_accesses;
+ double read_misses;
+ double write_misses;
+ double conflicts;
+ // The following is only used for SBT directory types
+ int homenode_read_accesses;
+ int homenode_write_accesses;
+ int homenode_read_misses;
+ int homenode_write_misses;
+ double homenode_access_scalar;
+ double tdp_sbt_write_access_scalar;
+};
+
+class CacheUnit : public McPATComponent {
+public:
+ static bool is_cache;
+ static bool pure_cam;
+ // This is used for CacheArray objects
+ static bool opt_local;
+ static bool force_cache_config;
+
+ int ithCache;
+ CacheParameters cache_params;
+ CacheStatistics cache_stats;
+ Cache_type cacheType;
+ bool calculate_runtime_data_and_tag;
+ double dir_overhead;
+
+ double scktRatio;
+
+ // TODO: REMOVE _interface_ip... It promotes a mess. Find a better way...
+ CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip);
+ void set_cache_param_from_xml_data();
+ void computeEnergy();
+ ~CacheUnit() {};
+};
+
+#endif /* CACHEUNIT_H_ */
diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc
index f3e1227df..ada9c5aa1 100644
--- a/ext/mcpat/cacti/Ucache.cc
+++ b/ext/mcpat/cacti/Ucache.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -54,176 +55,163 @@ using namespace std;
const uint32_t nthreads = NTHREADS;
-void min_values_t::update_min_values(const min_values_t * val)
-{
- min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
- min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
- min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
- min_area = (min_area > val->min_area) ? val->min_area : min_area;
- min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
+void min_values_t::update_min_values(const min_values_t * val) {
+ min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
+ min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
+ min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
+ min_area = (min_area > val->min_area) ? val->min_area : min_area;
+ min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
}
-void min_values_t::update_min_values(const uca_org_t & res)
-{
- min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
- min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
- min_area = (min_area > res.area) ? res.area : min_area;
- min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
+void min_values_t::update_min_values(const uca_org_t & res) {
+ min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
+ min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res.area) ? res.area : min_area;
+ min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
}
-void min_values_t::update_min_values(const nuca_org_t * res)
-{
- min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
- min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
- min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
- min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
+void min_values_t::update_min_values(const nuca_org_t * res) {
+ min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
+ min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
+ min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
}
-void min_values_t::update_min_values(const mem_array * res)
-{
- min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
- min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
- min_area = (min_area > res->area) ? res->area : min_area;
- min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
+void min_values_t::update_min_values(const mem_array * res) {
+ min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
+ min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->area) ? res->area : min_area;
+ min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
}
-void * calc_time_mt_wrapper(void * void_obj)
-{
- calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
- uint32_t tid = calc_obj->tid;
- list<mem_array *> & data_arr = calc_obj->data_arr;
- list<mem_array *> & tag_arr = calc_obj->tag_arr;
- bool is_tag = calc_obj->is_tag;
- bool pure_ram = calc_obj->pure_ram;
- bool pure_cam = calc_obj->pure_cam;
- bool is_main_mem = calc_obj->is_main_mem;
- double Nspd_min = calc_obj->Nspd_min;
- min_values_t * data_res = calc_obj->data_res;
- min_values_t * tag_res = calc_obj->tag_res;
-
- data_arr.clear();
- data_arr.push_back(new mem_array);
- tag_arr.clear();
- tag_arr.push_back(new mem_array);
-
- uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
- uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
-
-
- bool is_valid_partition;
- int wt_min, wt_max;
-
- if (g_ip->force_wiretype) {
- if (g_ip->wt == 0) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
- else {
- wt_min = Global;
- wt_max = Low_swing-1;
- }
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
+void * calc_time_mt_wrapper(void * void_obj) {
+ calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
+ uint32_t tid = calc_obj->tid;
+ list<mem_array *> & data_arr = calc_obj->data_arr;
+ list<mem_array *> & tag_arr = calc_obj->tag_arr;
+ bool is_tag = calc_obj->is_tag;
+ bool pure_ram = calc_obj->pure_ram;
+ bool pure_cam = calc_obj->pure_cam;
+ bool is_main_mem = calc_obj->is_main_mem;
+ double Nspd_min = calc_obj->Nspd_min;
+ min_values_t * data_res = calc_obj->data_res;
+ min_values_t * tag_res = calc_obj->tag_res;
- for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
- {
- for (int wr = wt_min; wr <= wt_max; wr++)
- {
- for (uint32_t iter = tid; iter < niter; iter += nthreads)
- {
- // reconstruct Ndwl, Ndbl, Ndcm
- unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
- unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
- unsigned int Ndcm = 1 << (iter % Ndcm_niter);
- for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
- {
- for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
- {
- //for debuging
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = g_ip->wt;
- Ndwl = g_ip->ndwl;
- Ndbl = g_ip->ndbl;
- Ndcm = g_ip->ndcm;
- if(g_ip->nspd != 0) {
- Nspd = g_ip->nspd;
- }
- if(g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = g_ip->ndsam1;
- Ndsam_lev_2 = g_ip->ndsam2;
- }
- }
-
- if (is_tag == true)
- {
- is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- tag_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
- // If it's a fully-associative cache, the data array partition parameters are identical to that of
- // the tag array, so compute data array partition properties also here.
- if (is_tag == false || g_ip->fully_assoc)
- {
- is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- data_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
-
- if (is_valid_partition)
- {
- if (is_tag == true)
- {
- tag_arr.back()->wt = (enum Wire_type) wr;
- tag_res->update_min_values(tag_arr.back());
- tag_arr.push_back(new mem_array);
- }
- if (is_tag == false || g_ip->fully_assoc)
- {
- data_arr.back()->wt = (enum Wire_type) wr;
- data_res->update_min_values(data_arr.back());
- data_arr.push_back(new mem_array);
- }
- }
+ data_arr.clear();
+ data_arr.push_back(new mem_array);
+ tag_arr.clear();
+ tag_arr.push_back(new mem_array);
+
+ uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
+ uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
+
+
+ bool is_valid_partition;
+ int wt_min, wt_max;
+
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == 0) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing - 1;
+ }
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = wt_max;
- iter = niter;
- if(g_ip->nspd != 0) {
- Nspd = MAXDATASPD;
- }
- if (g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = MAX_COL_MUX+1;
- Ndsam_lev_2 = MAX_COL_MUX+1;
+ for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) {
+ for (int wr = wt_min; wr <= wt_max; wr++) {
+ for (uint32_t iter = tid; iter < niter; iter += nthreads) {
+ // reconstruct Ndwl, Ndbl, Ndcm
+ unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
+ unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter);
+ unsigned int Ndcm = 1 << (iter % Ndcm_niter);
+ for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX;
+ Ndsam_lev_1 *= 2) {
+ for (unsigned int Ndsam_lev_2 = 1;
+ Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) {
+ //for debuging
+ if (g_ip->force_cache_config && is_tag == false) {
+ wr = g_ip->wt;
+ Ndwl = g_ip->ndwl;
+ Ndbl = g_ip->ndbl;
+ Ndcm = g_ip->ndcm;
+ if (g_ip->nspd != 0) {
+ Nspd = g_ip->nspd;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = g_ip->ndsam1;
+ Ndsam_lev_2 = g_ip->ndsam2;
+ }
+ }
+
+ if (is_tag == true) {
+ is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ tag_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+ // If it's a fully-associative cache, the data array partition parameters are identical to that of
+ // the tag array, so compute data array partition properties also here.
+ if (is_tag == false || g_ip->fully_assoc) {
+ is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ data_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+
+ if (is_valid_partition) {
+ if (is_tag == true) {
+ tag_arr.back()->wt = (enum Wire_type) wr;
+ tag_res->update_min_values(tag_arr.back());
+ tag_arr.push_back(new mem_array);
+ }
+ if (is_tag == false || g_ip->fully_assoc) {
+ data_arr.back()->wt = (enum Wire_type) wr;
+ data_res->update_min_values(data_arr.back());
+ data_arr.push_back(new mem_array);
+ }
+ }
+
+ if (g_ip->force_cache_config && is_tag == false) {
+ wr = wt_max;
+ iter = niter;
+ if (g_ip->nspd != 0) {
+ Nspd = MAXDATASPD;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = MAX_COL_MUX + 1;
+ Ndsam_lev_2 = MAX_COL_MUX + 1;
+ }
+ }
+ }
}
}
- }
}
- }
}
- }
- delete data_arr.back();
- delete tag_arr.back();
- data_arr.pop_back();
- tag_arr.pop_back();
+ delete data_arr.back();
+ delete tag_arr.back();
+ data_arr.pop_back();
+ tag_arr.pop_back();
- pthread_exit(NULL);
+#ifndef DEBUG
+ pthread_exit(NULL);
+#else
+ return NULL;
+#endif
}
@@ -242,423 +230,448 @@ bool calculate_time(
int flag_results_populate,
results_mem_array *ptr_results,
uca_org_t *ptr_fin_res,
- bool is_main_mem)
-{
- DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
+ bool is_main_mem) {
+ DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
- if (dyn_p.is_valid == false)
- {
- return false;
- }
+ if (dyn_p.is_valid == false) {
+ return false;
+ }
- UCA * uca = new UCA(dyn_p);
+ UCA * uca = new UCA(dyn_p);
- if (flag_results_populate)
- { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
- }
- else
- {
- int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
- int num_mats = uca->bank.dp.num_mats;
- bool is_fa = uca->bank.dp.fully_assoc;
- bool pure_cam = uca->bank.dp.pure_cam;
+ //For the final solution, populate the ptr_results data structure
+ //-- TODO: copy only necessary variables
+ if (flag_results_populate) {
+ } else {
+ int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
+ int num_mats = uca->bank.dp.num_mats;
+ bool is_fa = uca->bank.dp.fully_assoc;
+ bool pure_cam = uca->bank.dp.pure_cam;
ptr_array->Ndwl = Ndwl;
- ptr_array->Ndbl = Ndbl;
- ptr_array->Nspd = Nspd;
- ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
- ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
- ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
- ptr_array->access_time = uca->access_time;
- ptr_array->cycle_time = uca->cycle_time;
- ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
- ptr_array->area_ram_cells = uca->area_all_dataramcells;
- ptr_array->area = uca->area.get_area();
- ptr_array->height = uca->area.h;
- ptr_array->width = uca->area.w;
- ptr_array->mat_height = uca->bank.mat.area.h;
- ptr_array->mat_length = uca->bank.mat.area.w;
- ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
- ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
- ptr_array->power = uca->power;
- ptr_array->delay_senseamp_mux_decoder =
- MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
- uca->delay_array_to_sa_mux_lev_2_decoder);
- ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
- ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
-
- ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
- ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
- ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
- ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
- ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
- ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
- ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
- ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
- ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
- ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
-
- ptr_array->all_banks_height = uca->area.h;
- ptr_array->all_banks_width = uca->area.w;
- ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
-
- ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
- ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
- ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
-// cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl;
- ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
-// cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl;
- ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
- ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
- ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
- ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
- ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
- ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
- ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
- ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
- ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bitlines = uca->bank.mat.power_bitline;
- ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_sense_amps = uca->bank.mat.power_sa;
- ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
- ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
- ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_comparators = uca->bank.mat.power_comparator;
- ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
-
-// cout << " num of mats: " << dyn_p.num_mats << endl;
- if (is_fa || pure_cam)
- {
- ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
-// cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl;
- ptr_array->power_htree_out_search = uca->bank.htree_out_search->power;
-// cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl;
- ptr_array->power_searchline = uca->bank.mat.power_searchline;
-// cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl;
- ptr_array->power_searchline.searchOp.dynamic *= num_mats;
- ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
- ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchlines = uca->bank.mat.power_matchline;
- ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
- ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
-// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl;
- }
-
- ptr_array->activate_energy = uca->activate_energy;
- ptr_array->read_energy = uca->read_energy;
- ptr_array->write_energy = uca->write_energy;
- ptr_array->precharge_energy = uca->precharge_energy;
- ptr_array->refresh_power = uca->refresh_power;
- ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
- ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
- ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
-
- ptr_array->precharge_delay = uca->precharge_delay;
-
-
-// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl;
-//
-// if (!(is_fa || pure_cam))
-// {
-// cout << " num of cols: " << dyn_p.num_c_subarray << endl;
-// }
-// else if (is_fa)
-// {
-// cout << " num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl;
-// } else
-// cout << " num of cols: " << dyn_p.tag_num_c_subarray<< endl;
-// cout << uca->bank.mat.subarray.get_total_cell_area()<<endl;
- }
+ ptr_array->Ndbl = Ndbl;
+ ptr_array->Nspd = Nspd;
+ ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
+ ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
+ ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
+ ptr_array->access_time = uca->access_time;
+ ptr_array->cycle_time = uca->cycle_time;
+ ptr_array->multisubbank_interleave_cycle_time =
+ uca->multisubbank_interleave_cycle_time;
+ ptr_array->area_ram_cells = uca->area_all_dataramcells;
+ ptr_array->area = uca->area.get_area();
+ ptr_array->height = uca->area.h;
+ ptr_array->width = uca->area.w;
+ ptr_array->mat_height = uca->bank.mat.area.h;
+ ptr_array->mat_length = uca->bank.mat.area.w;
+ ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
+ ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
+ ptr_array->power = uca->power;
+ ptr_array->delay_senseamp_mux_decoder =
+ MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
+ uca->delay_array_to_sa_mux_lev_2_decoder);
+ ptr_array->delay_before_subarray_output_driver =
+ uca->delay_before_subarray_output_driver;
+ ptr_array->delay_from_subarray_output_driver_to_output =
+ uca->delay_from_subarray_out_drv_to_out;
+
+ ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
+ ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
+ ptr_array->delay_row_predecode_driver_and_block =
+ uca->bank.mat.r_predec->delay;
+ ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
+ ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
+ ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
+ ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
+ ptr_array->delay_subarray_output_driver =
+ uca->bank.mat.delay_subarray_out_drv_htree;
+ ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
+ ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
+
+ ptr_array->all_banks_height = uca->area.h;
+ ptr_array->all_banks_width = uca->area.w;
+ ptr_array->area_efficiency = uca->area_all_dataramcells * 100 /
+ (uca->area.get_area());
+
+ ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
+ ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
+ ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
+ ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
+
+ ptr_array->power_row_predecoder_drivers =
+ uca->bank.mat.r_predec->driver_power;
+ ptr_array->power_row_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_row_predecoder_blocks =
+ uca->bank.mat.r_predec->block_power;
+ ptr_array->power_row_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
+ ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_drivers =
+ uca->bank.mat.b_mux_predec->driver_power;
+ ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_blocks =
+ uca->bank.mat.b_mux_predec->block_power;
+ ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
+ ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers =
+ uca->bank.mat.sa_mux_lev_1_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks =
+ uca->bank.mat.sa_mux_lev_1_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_decoders =
+ uca->bank.mat.power_sa_mux_lev_1_decoders;
+ ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers =
+ uca->bank.mat.sa_mux_lev_2_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks =
+ uca->bank.mat.sa_mux_lev_2_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_decoders =
+ uca->bank.mat.power_sa_mux_lev_2_decoders;
+ ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_bitlines = uca->bank.mat.power_bitline;
+ ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_sense_amps = uca->bank.mat.power_sa;
+ ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_prechg_eq_drivers =
+ uca->bank.mat.power_bl_precharge_eq_drv;
+ ptr_array->power_prechg_eq_drivers.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_output_drivers_at_subarray =
+ uca->bank.mat.power_subarray_out_drv;
+ ptr_array->power_output_drivers_at_subarray.readOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *=
+ num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *=
+ num_act_mats_hor_dir;
+
+ ptr_array->power_comparators = uca->bank.mat.power_comparator;
+ ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ if (is_fa || pure_cam) {
+ ptr_array->power_htree_in_search =
+ uca->bank.htree_in_search->power;
+ ptr_array->power_htree_out_search =
+ uca->bank.htree_out_search->power;
+ ptr_array->power_searchline = uca->bank.mat.power_searchline;
+ ptr_array->power_searchline.searchOp.dynamic *= num_mats;
+ ptr_array->power_searchline_precharge =
+ uca->bank.mat.power_searchline_precharge;
+ ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchlines = uca->bank.mat.power_matchline;
+ ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_precharge =
+ uca->bank.mat.power_matchline_precharge;
+ ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_to_wordline_drv =
+ uca->bank.mat.power_ml_to_ram_wl_drv;
+ }
+
+ ptr_array->activate_energy = uca->activate_energy;
+ ptr_array->read_energy = uca->read_energy;
+ ptr_array->write_energy = uca->write_energy;
+ ptr_array->precharge_energy = uca->precharge_energy;
+ ptr_array->refresh_power = uca->refresh_power;
+ ptr_array->leak_power_subbank_closed_page =
+ uca->leak_power_subbank_closed_page;
+ ptr_array->leak_power_subbank_open_page =
+ uca->leak_power_subbank_open_page;
+ ptr_array->leak_power_request_and_reply_networks =
+ uca->leak_power_request_and_reply_networks;
+
+ ptr_array->precharge_delay = uca->precharge_delay;
+ }
- delete uca;
- return true;
+ delete uca;
+ return true;
}
-bool check_uca_org(uca_org_t & u, min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
+bool check_uca_org(uca_org_t & u, min_values_t *minval) {
+ if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
+ g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage) * 100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area) / minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
}
-bool check_mem_org(mem_array & u, const min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
+bool check_mem_org(mem_array & u, const min_values_t *minval) {
+ if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
+ g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage) * 100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area) / minval->min_area) * 100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
}
-void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- float d, a, dp, lp, c;
-
- dp = g_ip->dynamic_power_wt;
- lp = g_ip->leakage_power_wt;
- a = g_ip->area_wt;
- d = g_ip->delay_wt;
- c = g_ip->cycle_time_wt;
+void find_optimal_uca(uca_org_t *res, min_values_t * minval,
+ list<uca_org_t> & ulist) {
+ double cost = 0;
+ double min_cost = BIGNUM;
+ float d, a, dp, lp, c;
- if (ulist.empty() == true)
- {
- cout << "ERROR: no valid cache organizations found" << endl;
- exit(0);
- }
+ dp = g_ip->dynamic_power_wt;
+ lp = g_ip->leakage_power_wt;
+ a = g_ip->area_wt;
+ d = g_ip->delay_wt;
+ c = g_ip->cycle_time_wt;
- for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
- {
- if (g_ip->ed == 1)
- {
- cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
- }
- else if (g_ip->ed == 2)
- {
- cost = ((niter)->access_time/minval->min_delay)*
- ((niter)->access_time/minval->min_delay)*
- ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
+ if (ulist.empty() == true) {
+ cout << "ERROR: no valid cache organizations found" << endl;
+ exit(0);
}
- else
- {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- bool v = check_uca_org(*niter, minval);
- //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v)
- {
- cost = (d * ((niter)->access_time/minval->min_delay) +
- c * ((niter)->cycle_time/minval->min_cyc) +
- dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
- lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
- a * ((niter)->area/minval->min_area));
- //fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- *res = (*(niter));
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
+
+ for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end();
+ niter++) {
+ if (g_ip->ed == 1) {
+ cost = ((niter)->access_time / minval->min_delay) *
+ ((niter)->power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ } else if (g_ip->ed == 2) {
+ cost = ((niter)->access_time / minval->min_delay) *
+ ((niter)->access_time / minval->min_delay) *
+ ((niter)->power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ } else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ bool v = check_uca_org(*niter, minval);
+
+ if (v) {
+ cost = (d * ((niter)->access_time / minval->min_delay) +
+ c * ((niter)->cycle_time / minval->min_cyc) +
+ dp * ((niter)->power.readOp.dynamic / minval->min_dyn) +
+ lp *
+ ((niter)->power.readOp.leakage / minval->min_leakage) +
+ a * ((niter)->area / minval->min_area));
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ niter = ulist.erase(niter);
+ if (niter != ulist.begin())
+ niter--;
+ }
+ } else {
+ niter = ulist.erase(niter);
+ if (niter != ulist.begin())
+ niter--;
+ }
}
- }
- else {
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
- }
}
- }
- if (min_cost == BIGNUM)
- {
- cout << "ERROR: no cache organizations met optimization criteria" << endl;
- exit(0);
- }
+ if (min_cost == BIGNUM) {
+ cout << "ERROR: no cache organizations met optimization criteria"
+ << endl;
+ exit(0);
+ }
}
-void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
-{
- double cost = BIGNUM;
- double cur_cost;
- double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
- mem_array * res = NULL;
+void filter_tag_arr(const min_values_t * min, list<mem_array *> & list) {
+ double cost = BIGNUM;
+ double cur_cost;
+ double wt_delay = g_ip->delay_wt;
+ double wt_dyn = g_ip->dynamic_power_wt;
+ double wt_leakage = g_ip->leakage_power_wt;
+ double wt_cyc = g_ip->cycle_time_wt;
+ double wt_area = g_ip->area_wt;
+ mem_array * res = NULL;
- if (list.empty() == true)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(1);
- }
+ if (list.empty() == true) {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(1);
+ }
- while (list.empty() != true)
- {
- bool v = check_mem_org(*list.back(), min);
- if (v)
- {
- cur_cost = wt_delay * (list.back()->access_time/min->min_delay) +
- wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
- wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
- wt_area * (list.back()->area/min->min_area) +
- wt_cyc * (list.back()->cycle_time/min->min_cyc);
- }
- else
- {
- cur_cost = BIGNUM;
- }
- if (cur_cost < cost)
- {
- if (res != NULL)
- {
- delete res;
- }
- cost = cur_cost;
- res = list.back();
+ while (list.empty() != true) {
+ bool v = check_mem_org(*list.back(), min);
+ if (v) {
+ cur_cost = wt_delay * (list.back()->access_time / min->min_delay) +
+ wt_dyn * (list.back()->power.readOp.dynamic /
+ min->min_dyn) +
+ wt_leakage * (list.back()->power.readOp.leakage /
+ min->min_leakage) +
+ wt_area * (list.back()->area / min->min_area) +
+ wt_cyc * (list.back()->cycle_time / min->min_cyc);
+ } else {
+ cur_cost = BIGNUM;
+ }
+ if (cur_cost < cost) {
+ if (res != NULL) {
+ delete res;
+ }
+ cost = cur_cost;
+ res = list.back();
+ } else {
+ delete list.back();
+ }
+ list.pop_back();
}
- else
- {
- delete list.back();
+ if (!res) {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(0);
}
- list.pop_back();
- }
- if(!res)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(0);
- }
- list.push_back(res);
+ list.push_back(res);
}
-void filter_data_arr(list<mem_array *> & curr_list)
-{
- if (curr_list.empty() == true)
- {
- cout << "ERROR: no valid data array organizations found" << endl;
- exit(1);
- }
+void filter_data_arr(list<mem_array *> & curr_list) {
+ if (curr_list.empty() == true) {
+ cout << "ERROR: no valid data array organizations found" << endl;
+ exit(1);
+ }
- list<mem_array *>::iterator iter;
+ list<mem_array *>::iterator iter;
- for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
- {
- mem_array * m = *iter;
+ for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) {
+ mem_array * m = *iter;
- if (m == NULL) exit(1);
+ if (m == NULL) exit(1);
- if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
- ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
- {
- delete m;
- iter = curr_list.erase(iter);
- iter --;
+ if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay >
+ 0.5) &&
+ ((m->power.readOp.dynamic - m->arr_min->min_dyn) /
+ m->arr_min->min_dyn > 0.5)) {
+ delete m;
+ iter = curr_list.erase(iter);
+ iter --;
+ }
}
- }
}
@@ -675,210 +688,199 @@ void filter_data_arr(list<mem_array *> & curr_list)
* above results
* 4. Cache model with least cost is picked from sol_list
*/
-void solve(uca_org_t *fin_res)
-{
- bool is_dram = false;
- int pure_ram = g_ip->pure_ram;
- bool pure_cam = g_ip->pure_cam;
-
- init_tech_params(g_ip->F_sz_um, false);
-
-
- list<mem_array *> tag_arr (0);
- list<mem_array *> data_arr(0);
- list<mem_array *>::iterator miter;
- list<uca_org_t> sol_list(1, uca_org_t());
-
- fin_res->tag_array.access_time = 0;
- fin_res->tag_array.Ndwl = 0;
- fin_res->tag_array.Ndbl = 0;
- fin_res->tag_array.Nspd = 0;
- fin_res->tag_array.deg_bl_muxing = 0;
- fin_res->tag_array.Ndsam_lev_1 = 0;
- fin_res->tag_array.Ndsam_lev_2 = 0;
-
-
- // distribute calculate_time() execution to multiple threads
- calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
- pthread_t threads[nthreads];
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].tid = t;
- calc_array[t].pure_ram = pure_ram;
- calc_array[t].pure_cam = pure_cam;
- calc_array[t].data_res = new min_values_t();
- calc_array[t].tag_res = new min_values_t();
- }
-
- bool is_tag;
- uint32_t ram_cell_tech_type;
-
- // If it's a cache, first calculate the area, delay and power for all tag array partitions.
- if (!(pure_ram||pure_cam||g_ip->fully_assoc))
- { //cache
- is_tag = true;
- ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
- init_tech_params(g_ip->F_sz_um, is_tag);
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = false;
- calc_array[t].Nspd_min = 0.125;
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+void solve(uca_org_t *fin_res) {
+ bool is_dram = false;
+ int pure_ram = g_ip->pure_ram;
+ bool pure_cam = g_ip->pure_cam;
+
+ init_tech_params(g_ip->F_sz_um, false);
+
+
+ list<mem_array *> tag_arr (0);
+ list<mem_array *> data_arr(0);
+ list<mem_array *>::iterator miter;
+ list<uca_org_t> sol_list(1, uca_org_t());
+
+ fin_res->tag_array.access_time = 0;
+ fin_res->tag_array.Ndwl = 0;
+ fin_res->tag_array.Ndbl = 0;
+ fin_res->tag_array.Nspd = 0;
+ fin_res->tag_array.deg_bl_muxing = 0;
+ fin_res->tag_array.Ndsam_lev_1 = 0;
+ fin_res->tag_array.Ndsam_lev_2 = 0;
+
+
+ // distribute calculate_time() execution to multiple threads
+ calc_time_mt_wrapper_struct * calc_array =
+ new calc_time_mt_wrapper_struct[nthreads];
+ pthread_t threads[nthreads];
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].tid = t;
+ calc_array[t].pure_ram = pure_ram;
+ calc_array[t].pure_cam = pure_cam;
+ calc_array[t].data_res = new min_values_t();
+ calc_array[t].tag_res = new min_values_t();
}
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
- }
+ bool is_tag;
+ uint32_t ram_cell_tech_type;
+
+ // If it's a cache, first calculate the area, delay and power for all tag array partitions.
+ if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache
+ is_tag = true;
+ ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) ||
+ (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = false;
+ calc_array[t].Nspd_min = 0.125;
+#ifndef DEBUG
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
+ (void *)(&(calc_array[t])));
+#else
+ calc_time_mt_wrapper((void *)(&(calc_array[t])));
+#endif
+ }
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- calc_array[t].tag_arr.sort(mem_array::lt);
- tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+#ifndef DEBUG
+ for (uint32_t t = 0; t < nthreads; t++) {
+ pthread_join(threads[t], NULL);
+ }
+#endif
+
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ calc_array[t].tag_arr.sort(mem_array::lt);
+ tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+ }
}
- }
- // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
-// if (!g_ip->fully_assoc)
-// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
+ // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
+ // in the new cacti, cam, fully_associative cache are processed as single array in the data portion
is_tag = false;
ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
init_tech_params(g_ip->F_sz_um, is_tag);
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = g_ip->is_main_mem;
- if (!(pure_cam||g_ip->fully_assoc))
- {
- calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
- }
- else
- {
- calc_array[t].Nspd_min = 1;
- }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = g_ip->is_main_mem;
+ if (!(pure_cam || g_ip->fully_assoc)) {
+ calc_array[t].Nspd_min = (double)(g_ip->out_w) /
+ (double)(g_ip->block_sz * 8);
+ } else {
+ calc_array[t].Nspd_min = 1;
+ }
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+#ifndef DEBUG
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
+ (void *)(&(calc_array[t])));
+#else
+ calc_time_mt_wrapper((void *)(&(calc_array[t])));
+#endif
}
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
+#ifndef DEBUG
+ for (uint32_t t = 0; t < nthreads; t++) {
+ pthread_join(threads[t], NULL);
}
+#endif
data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- }
-// }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- min_values_t * d_min = new min_values_t();
- min_values_t * t_min = new min_values_t();
- min_values_t * cache_min = new min_values_t();
+ }
- for (uint32_t t = 0; t < nthreads; t++)
- {
- d_min->update_min_values(calc_array[t].data_res);
- t_min->update_min_values(calc_array[t].tag_res);
- }
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- (*miter)->arr_min = d_min;
- }
+ min_values_t * d_min = new min_values_t();
+ min_values_t * t_min = new min_values_t();
+ min_values_t * cache_min = new min_values_t();
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
- filter_data_arr(data_arr);
- if(!(pure_ram||pure_cam||g_ip->fully_assoc))
- {
- filter_tag_arr(t_min, tag_arr);
- }
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
+ for (uint32_t t = 0; t < nthreads; t++) {
+ d_min->update_min_values(calc_array[t].data_res);
+ t_min->update_min_values(calc_array[t].tag_res);
+ }
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ (*miter)->arr_min = d_min;
+ }
- if (pure_ram||pure_cam||g_ip->fully_assoc)
- {
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = NULL;
- curr_org.data_array2 = (*miter);
+ filter_data_arr(data_arr);
+ if (!(pure_ram || pure_cam || g_ip->fully_assoc)) {
+ filter_tag_arr(t_min, tag_arr);
+ }
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
+ if (pure_ram || pure_cam || g_ip->fully_assoc) {
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = NULL;
+ curr_org.data_array2 = (*miter);
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
- sol_list.push_back(uca_org_t());
- }
- }
- else
- {
- while (tag_arr.empty() != true)
- {
- mem_array * arr_temp = (tag_arr.back());
- //delete tag_arr.back();
- tag_arr.pop_back();
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = arr_temp;
- curr_org.data_array2 = (*miter);
+ sol_list.push_back(uca_org_t());
+ }
+ } else {
+ while (tag_arr.empty() != true) {
+ mem_array * arr_temp = (tag_arr.back());
+ tag_arr.pop_back();
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = arr_temp;
+ curr_org.data_array2 = (*miter);
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
- sol_list.push_back(uca_org_t());
- }
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
}
- }
- sol_list.pop_back();
+ sol_list.pop_back();
- find_optimal_uca(fin_res, cache_min, sol_list);
+ find_optimal_uca(fin_res, cache_min, sol_list);
- sol_list.clear();
+ sol_list.clear();
- for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
- {
- if (*miter != fin_res->data_array2)
- {
- delete *miter;
+ for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) {
+ if (*miter != fin_res->data_array2) {
+ delete *miter;
+ }
}
- }
- data_arr.clear();
+ data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- delete calc_array[t].data_res;
- delete calc_array[t].tag_res;
- }
+ for (uint32_t t = 0; t < nthreads; t++) {
+ delete calc_array[t].data_res;
+ delete calc_array[t].tag_res;
+ }
- delete [] calc_array;
- delete cache_min;
- delete d_min;
- delete t_min;
+ delete [] calc_array;
+ delete cache_min;
+ delete d_min;
+ delete t_min;
}
void update(uca_org_t *fin_res)
@@ -886,7 +888,14 @@ void update(uca_org_t *fin_res)
if(fin_res->tag_array2)
{
init_tech_params(g_ip->F_sz_um,true);
- DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam,
+ fin_res->tag_array2->Nspd,
+ fin_res->tag_array2->Ndwl,
+ fin_res->tag_array2->Ndbl,
+ fin_res->tag_array2->Ndcm,
+ fin_res->tag_array2->Ndsam_lev_1,
+ fin_res->tag_array2->Ndsam_lev_2,
+ g_ip->is_main_mem);
if(tag_arr_dyn_p.is_valid)
{
UCA * tag_arr = new UCA(tag_arr_dyn_p);
@@ -894,12 +903,20 @@ void update(uca_org_t *fin_res)
}
else
{
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback"
+ << endl;
exit(1);
}
}
init_tech_params(g_ip->F_sz_um,false);
- DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam,
+ fin_res->data_array2->Nspd,
+ fin_res->data_array2->Ndwl,
+ fin_res->data_array2->Ndbl,
+ fin_res->data_array2->Ndcm,
+ fin_res->data_array2->Ndsam_lev_1,
+ fin_res->data_array2->Ndsam_lev_2,
+ g_ip->is_main_mem);
if(data_arr_dyn_p.is_valid)
{
UCA * data_arr = new UCA(data_arr_dyn_p);
@@ -907,7 +924,8 @@ void update(uca_org_t *fin_res)
}
else
{
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback"
+ << endl;
exit(1);
}
diff --git a/ext/mcpat/cacti/Ucache.h b/ext/mcpat/cacti/Ucache.h
index 20985fff1..87836adcd 100644
--- a/ext/mcpat/cacti/Ucache.h
+++ b/ext/mcpat/cacti/Ucache.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,9 +40,8 @@
#include "nuca.h"
#include "router.h"
-class min_values_t
-{
- public:
+class min_values_t {
+public:
double min_delay;
double min_dyn;
double min_leakage;
@@ -58,17 +58,16 @@ class min_values_t
-struct solution
-{
- int tag_array_index;
- int data_array_index;
- list<mem_array *>::iterator tag_array_iter;
- list<mem_array *>::iterator data_array_iter;
- double access_time;
- double cycle_time;
- double area;
- double efficiency;
- powerDef total_power;
+struct solution {
+ int tag_array_index;
+ int data_array_index;
+ list<mem_array *>::iterator tag_array_iter;
+ list<mem_array *>::iterator data_array_iter;
+ double access_time;
+ double cycle_time;
+ double area;
+ double efficiency;
+ powerDef total_power;
};
@@ -94,20 +93,19 @@ void solve(uca_org_t *fin_res);
void init_tech_params(double tech, bool is_tag);
-struct calc_time_mt_wrapper_struct
-{
- uint32_t tid;
- bool is_tag;
- bool pure_ram;
- bool pure_cam;
- bool is_main_mem;
- double Nspd_min;
+struct calc_time_mt_wrapper_struct {
+ uint32_t tid;
+ bool is_tag;
+ bool pure_ram;
+ bool pure_cam;
+ bool is_main_mem;
+ double Nspd_min;
- min_values_t * data_res;
- min_values_t * tag_res;
+ min_values_t * data_res;
+ min_values_t * tag_res;
- list<mem_array *> data_arr;
- list<mem_array *> tag_arr;
+ list<mem_array *> data_arr;
+ list<mem_array *> tag_arr;
};
void *calc_time_mt_wrapper(void * void_obj);
diff --git a/ext/mcpat/cacti/arbiter.cc b/ext/mcpat/cacti/arbiter.cc
index 6664abf13..8106d2025 100644
--- a/ext/mcpat/cacti/arbiter.cc
+++ b/ext/mcpat/cacti/arbiter.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,95 +37,107 @@ Arbiter::Arbiter(
double flit_size_,
double output_len,
TechnologyParameter::DeviceType *dt
- ):R(n_req), flit_size(flit_size_),
- o_len (output_len), deviceType(dt)
-{
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- Vdd = dt->Vdd;
- double technology = g_ip->F_sz_um;
- NTn1 = 13.5*technology/2;
- PTn1 = 76*technology/2;
- NTn2 = 13.5*technology/2;
- PTn2 = 76*technology/2;
- NTi = 12.5*technology/2;
- PTi = 25*technology/2;
- NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
- PTtr = 20*technology/2; /* pmos tr. length*/
+ ): R(n_req), flit_size(flit_size_),
+ o_len (output_len), deviceType(dt) {
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ double technology = g_ip->F_sz_um;
+ NTn1 = 13.5 * technology / 2;
+ PTn1 = 76 * technology / 2;
+ NTn2 = 13.5 * technology / 2;
+ PTn2 = 76 * technology / 2;
+ NTi = 12.5 * technology / 2;
+ PTi = 25 * technology / 2;
+ NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20 * technology / 2; /* pmos tr. length*/
}
-Arbiter::~Arbiter(){}
+Arbiter::~Arbiter() {}
double
Arbiter::arb_req() {
- double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
- gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
- drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
- return temp;
+ double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 *
+ gate_C(NTn2, 0) +
+ gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) +
+ drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
+ return temp;
}
double
Arbiter::arb_pri() {
- double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
- of flip-flop is ignored */
- return temp;
+ /* switching capacitance of flip-flop is ignored */
+ double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0));
+ return temp;
}
double
Arbiter::arb_grant() {
- double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
- return temp;
+ double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
+ drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
+ return temp;
}
double
Arbiter::arb_int() {
- double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
- 2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
- return temp;
+ double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
+ drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
+ 2 * gate_C(NTn2, 0) + gate_C(PTn2, 0));
+ return temp;
}
void
Arbiter::compute_power() {
- power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
- arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
- double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
- double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
- double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
- double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
- double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
- double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
- power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
- power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
+ power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() *
+ Vdd * Vdd / 2 +
+ arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd *
+ Vdd);
+ double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
+ min_w_pmos * PTn1 * 2, 2, nor);
+ double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R,
+ min_w_pmos * PTn2 * R, 2, nor);
+ double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi,
+ min_w_pmos * PTi, 1, inv);
+ double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
+ min_w_pmos * PTn1 * 2, 2, nor);
+ double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R,
+ min_w_pmos * PTn2 * R, 2, nor);
+ double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi,
+ min_w_pmos * PTi, 1, inv);
+ //FIXME include priority table leakage
+ power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd;
+ power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd +
+ not_leak_gate * Vdd;
}
double //wire cap with triple spacing
Arbiter::Cw3(double length) {
- Wire wc(g_ip->wt, length, 1, 3, 3);
- double temp = (wc.wire_cap(length,true));
- return temp;
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ double temp = (wc.wire_cap(length, true));
+ return temp;
}
double
Arbiter::crossbar_ctrline() {
- double temp = (Cw3(o_len * 1e-6 /* m */) +
- drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
- gate_C(NTi, 0) + gate_C(PTi, 0));
- return temp;
+ double temp = (Cw3(o_len * 1e-6 /* m */) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
+ gate_C(NTi, 0) + gate_C(PTi, 0));
+ return temp;
}
double
Arbiter::transmission_buf_ctrcap() {
- double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
- return temp;
+ double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0);
+ return temp;
}
-void Arbiter::print_arbiter()
-{
- cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
- cout << "Flit size : " << flit_size << " bits" << endl;
- cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
- cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
+void Arbiter::print_arbiter() {
+ cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
}
diff --git a/ext/mcpat/cacti/bank.cc b/ext/mcpat/cacti/bank.cc
index a18c7f1ed..b4fd95090 100755..100644
--- a/ext/mcpat/cacti/bank.cc
+++ b/ext/mcpat/cacti/bank.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,163 +37,174 @@
#include "bank.h"
Bank::Bank(const DynamicParameter & dyn_p):
- dp(dyn_p), mat(dp),
- num_addr_b_mat(dyn_p.number_addr_bits_mat),
- num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
-{
- int RWP;
- int ERP;
- int EWP;
- int SCHP;
-
- if (dp.use_inp_params)
- {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else
- {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
- }
-
- int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
- int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
- int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
- int searchinbits;
- int searchoutbits;
-
- if (dp.fully_assoc || dp.pure_cam)
- {
- datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
- dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
- searchinbits = dp.num_si_b_bank_per_port * SCHP;
- searchoutbits = dp.num_so_b_bank_per_port * SCHP;
- }
-
- if (!(dp.fully_assoc || dp.pure_cam))
- {
- if (g_ip->fast_access && dp.is_tag == false)
- {
- dataoutbits *= g_ip->data_assoc;
+ dp(dyn_p), mat(dp),
+ num_addr_b_mat(dyn_p.number_addr_bits_mat),
+ num_mats_hor_dir(dyn_p.num_mats_h_dir),
+ num_mats_ver_dir(dyn_p.num_mats_v_dir) {
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
+
+ if (dp.use_inp_params) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
}
- htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
- htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
- htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+ int total_addrbits = (dp.number_addr_bits_mat +
+ dp.number_subbanks_decode) * (RWP + ERP + EWP);
+ int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ int searchinbits;
+ int searchoutbits;
+
+ if (dp.fully_assoc || dp.pure_cam) {
+ datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ searchinbits = dp.num_si_b_bank_per_port * SCHP;
+ searchoutbits = dp.num_so_b_bank_per_port * SCHP;
+ }
+
+ if (!(dp.fully_assoc || dp.pure_cam)) {
+ if (g_ip->fast_access && dp.is_tag == false) {
+ dataoutbits *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w,
+ (double)mat.area.h,
+ total_addrbits, datainbits, 0, dataoutbits,
+ 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
+ Add_htree);
+ htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w,
+ (double)mat.area.h,
+ total_addrbits, datainbits, 0, dataoutbits,
+ 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
+ Data_in_htree);
+ htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w,
+ (double)mat.area.h,
+ total_addrbits, datainbits, 0, dataoutbits,
+ 0, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_out_htree);
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
-// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
- }
- else
- {
- htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
- htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
- htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
- htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
- htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
- }
-
- num_addr_b_row_dec = _log2(mat.subarray.num_rows);
- num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
- num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
+// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ } else {
+ htree_in_add =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Add_htree);
+ htree_in_data =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_in_htree);
+ htree_out_data =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_out_htree);
+ htree_in_search =
+ new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_in_htree, true, true);
+ htree_out_search =
+ new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits, dataoutbits,
+ searchoutbits, num_mats_ver_dir * 2,
+ num_mats_hor_dir * 2, Data_out_htree, true);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ }
+
+ num_addr_b_row_dec = _log2(mat.subarray.num_rows);
+ num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
+ num_addr_b_routed_to_mat_for_rd_or_wr =
+ num_addr_b_mat - num_addr_b_row_dec;
}
-Bank::~Bank()
-{
- delete htree_in_add;
- delete htree_out_data;
- delete htree_in_data;
- if (dp.fully_assoc || dp.pure_cam)
- {
- delete htree_in_search;
- delete htree_out_search;
- }
+Bank::~Bank() {
+ delete htree_in_add;
+ delete htree_out_data;
+ delete htree_in_data;
+ if (dp.fully_assoc || dp.pure_cam) {
+ delete htree_in_search;
+ delete htree_out_search;
+ }
}
-double Bank::compute_delays(double inrisetime)
-{
- return mat.compute_delays(inrisetime);
+double Bank::compute_delays(double inrisetime) {
+ return mat.compute_delays(inrisetime);
}
-void Bank::compute_power_energy()
-{
- mat.compute_power_energy();
+void Bank::compute_power_energy() {
+ mat.compute_power_energy();
- if (!(dp.fully_assoc || dp.pure_cam))
- {
- power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
- power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
- power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+ if (!(dp.fully_assoc || dp.pure_cam)) {
+ power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
- power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
- power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
- power.readOp.leakage += htree_in_add->power.readOp.leakage;
- power.readOp.leakage += htree_in_data->power.readOp.leakage;
- power.readOp.leakage += htree_out_data->power.readOp.leakage;
- power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
- }
- else
- {
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ } else {
- power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
- power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
- power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+ power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
- power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
- power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
- mat.power_sa.searchOp.dynamic +
- mat.power_bitline.searchOp.dynamic +
- mat.power_subarray_out_drv.searchOp.dynamic+
- mat.ml_to_ram_wl_drv->power.readOp.dynamic;
+ power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
+ power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
+ mat.power_sa.searchOp.dynamic +
+ mat.power_bitline.searchOp.dynamic +
+ mat.power_subarray_out_drv.searchOp.dynamic +
+ mat.ml_to_ram_wl_drv->power.readOp.dynamic;
- power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
- power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
- power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
- power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
+ power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
+ power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
- power.readOp.leakage += htree_in_add->power.readOp.leakage;
- power.readOp.leakage += htree_in_data->power.readOp.leakage;
- power.readOp.leakage += htree_out_data->power.readOp.leakage;
- power.readOp.leakage += htree_in_search->power.readOp.leakage;
- power.readOp.leakage += htree_out_search->power.readOp.leakage;
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.leakage += htree_in_search->power.readOp.leakage;
+ power.readOp.leakage += htree_out_search->power.readOp.leakage;
- power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
- }
+ }
}
diff --git a/ext/mcpat/cacti/bank.h b/ext/mcpat/cacti/bank.h
index 153609ab0..49151f050 100755
--- a/ext/mcpat/cacti/bank.h
+++ b/ext/mcpat/cacti/bank.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -39,9 +40,8 @@
#include "htree2.h"
#include "mat.h"
-class Bank : public Component
-{
- public:
+class Bank : public Component {
+public:
Bank(const DynamicParameter & dyn_p);
~Bank();
double compute_delays(double inrisetime); // return outrisetime
diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc
index 6efd5dd27..00ea3ce9d 100644
--- a/ext/mcpat/cacti/basic_circuit.cc
+++ b/ext/mcpat/cacti/basic_circuit.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,59 +40,48 @@
#include "basic_circuit.h"
#include "parameter.h"
-uint32_t _log2(uint64_t num)
-{
- uint32_t log2 = 0;
+uint32_t _log2(uint64_t num) {
+ uint32_t log2 = 0;
- if (num == 0)
- {
- std::cerr << "log0?" << std::endl;
- exit(1);
- }
+ if (num == 0) {
+ std::cerr << "log0?" << std::endl;
+ exit(1);
+ }
- while (num > 1)
- {
- num = (num >> 1);
- log2++;
- }
+ while (num > 1) {
+ num = (num >> 1);
+ log2++;
+ }
- return log2;
+ return log2;
}
-bool is_pow2(int64_t val)
-{
- if (val <= 0)
- {
- return false;
- }
- else if (val == 1)
- {
- return true;
- }
- else
- {
- return (_log2(val) != _log2(val-1));
- }
+bool is_pow2(int64_t val) {
+ if (val <= 0) {
+ return false;
+ } else if (val == 1) {
+ return true;
+ } else {
+ return (_log2(val) != _log2(val - 1));
+ }
}
-int powers (int base, int n)
-{
- int i, p;
+int powers (int base, int n) {
+ int i, p;
- p = 1;
- for (i = 1; i <= n; ++i)
- p *= base;
- return p;
+ p = 1;
+ for (i = 1; i <= n; ++i)
+ p *= base;
+ return p;
}
/*----------------------------------------------------------------------*/
-double logtwo (double x)
-{
- assert(x > 0);
- return ((double) (log (x) / log (2.0)));
+double logtwo (double x) {
+ assert(x > 0);
+ return ((double) (log (x) / log (2.0)));
}
/*----------------------------------------------------------------------*/
@@ -102,28 +92,20 @@ double gate_C(
double wirelength,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if (_is_dram && _is_cell)
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if (_is_dram && _is_wl_tr)
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if (!_is_dram && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+ bool _is_wl_tr) {
+ const TechnologyParameter::DeviceType * dt;
+
+ if (_is_dram && _is_cell) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if (_is_dram && _is_wl_tr) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if (!_is_dram && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
}
@@ -134,29 +116,21 @@ double gate_C_pass(
double wirelength, // poly wire length going to gate in lambda
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- // v5.0
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+ bool _is_wl_tr) {
+ // v5.0
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if ((!_is_dram) && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
}
@@ -169,83 +143,67 @@ double drain_C_(
double fold_dimension,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- double w_folded_tr;
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; // DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; // DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double c_junc_area = dt->C_junc;
- double c_junc_sidewall = dt->C_junc_sidewall;
- double c_fringe = 2*dt->C_fringe;
- double c_overlap = 2*dt->C_overlap;
- double drain_C_metal_connecting_folded_tr = 0;
-
- // determine the width of the transistor after folding (if it is getting folded)
- if (next_arg_thresh_folding_width_or_height_cell == 0)
- { // interpret fold_dimension as the the folding width threshold
- // i.e. the value of transistor width above which the transistor gets folded
- w_folded_tr = fold_dimension;
- }
- else
- { // interpret fold_dimension as the height of the cell that this transistor is part of.
- double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
- // TODO : w_folded_tr must come from Component::compute_gate_area()
- double ratio_p_to_n = 2.0 / (2.0 + 1.0);
- if (nchannel)
- {
- w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ bool _is_wl_tr) {
+ double w_folded_tr;
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; // DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; // DRAM wordline transistor
+ } else if ((!_is_dram) && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ double c_junc_area = dt->C_junc;
+ double c_junc_sidewall = dt->C_junc_sidewall;
+ double c_fringe = 2 * dt->C_fringe;
+ double c_overlap = 2 * dt->C_overlap;
+ double drain_C_metal_connecting_folded_tr = 0;
+
+ // determine the width of the transistor after folding (if it is getting folded)
+ if (next_arg_thresh_folding_width_or_height_cell == 0) {
+ // interpret fold_dimension as the the folding width threshold
+ // i.e. the value of transistor width above which the transistor gets folded
+ w_folded_tr = fold_dimension;
+ } else { // interpret fold_dimension as the height of the cell that this transistor is part of.
+ double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
+ // TODO : w_folded_tr must come from Component::compute_gate_area()
+ double ratio_p_to_n = 2.0 / (2.0 + 1.0);
+ if (nchannel) {
+ w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ } else {
+ w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ }
}
- else
- {
- w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ int num_folded_tr = (int) (ceil(width / w_folded_tr));
+
+ if (num_folded_tr < 2) {
+ w_folded_tr = width;
}
- }
- int num_folded_tr = (int) (ceil(width / w_folded_tr));
-
- if (num_folded_tr < 2)
- {
- w_folded_tr = width;
- }
-
- double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
- (stack - 1) * g_tp.spacing_poly_to_poly;
- double drain_h_for_sidewall = w_folded_tr;
- double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
- if (num_folded_tr > 1)
- {
- total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
- (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
-
- if (num_folded_tr%2 == 0)
- {
- drain_h_for_sidewall = 0;
+
+ double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
+ (stack - 1) * g_tp.spacing_poly_to_poly;
+ double drain_h_for_sidewall = w_folded_tr;
+ double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
+ if (num_folded_tr > 1) {
+ total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
+ (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
+
+ if (num_folded_tr % 2 == 0) {
+ drain_h_for_sidewall = 0;
+ }
+ total_drain_height_for_cap_wrt_gate *= num_folded_tr;
+ drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
}
- total_drain_height_for_cap_wrt_gate *= num_folded_tr;
- drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
- }
- double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
- double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
- double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
+ double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
+ double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
+ double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
- return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
+ return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
}
@@ -255,29 +213,21 @@ double tr_R_on(
int stack,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
- return (stack * restrans / width);
+ bool _is_wl_tr) {
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if ((!_is_dram) && _is_cell) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (stack * restrans / width);
}
@@ -291,46 +241,34 @@ double R_to_w(
int nchannel,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && (_is_cell))
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
- return (restrans / res);
+ bool _is_wl_tr) {
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell)) {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ } else if ((_is_dram) && (_is_wl_tr)) {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ } else if ((!_is_dram) && (_is_cell)) {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ } else {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (restrans / res);
}
double pmos_to_nmos_sz_ratio(
bool _is_dram,
- bool _is_wl_tr)
-{
- double p_to_n_sizing_ratio;
- if ((_is_dram) && (_is_wl_tr))
- { //DRAM wordline transistor
- p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
- }
- else
- { //DRAM or SRAM all other transistors
- p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
- }
- return p_to_n_sizing_ratio;
+ bool _is_wl_tr) {
+ double p_to_n_sizing_ratio;
+ if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
+ } else { //DRAM or SRAM all other transistors
+ p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
+ }
+ return p_to_n_sizing_ratio;
}
@@ -340,26 +278,23 @@ double horowitz(
double tf, // time constant of gate
double vs1, // threshold voltage
double vs2, // threshold voltage
- int rise) // whether input rises or fall
-{
- if (inputramptime == 0 && vs1 == vs2)
- {
- return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
- }
- double a, b, td;
-
- a = inputramptime / tf;
- if (rise == RISE)
- {
- b = 0.5;
- td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
- }
- else
- {
- b = 0.4;
- td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
- }
- return (td);
+ int rise) { // whether input rises or fall
+ if (inputramptime == 0 && vs1 == vs2) {
+ return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
+ }
+ double a, b, td;
+
+ a = inputramptime / tf;
+ if (rise == RISE) {
+ b = 0.5;
+ td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) +
+ tf * (log(vs1) - log(vs2));
+ } else {
+ b = 0.4;
+ td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) +
+ tf * (log(1.0 - vs1) - log(1.0 - vs2));
+ }
+ return (td);
}
double cmos_Ileak(
@@ -367,23 +302,17 @@ double cmos_Ileak(
double pWidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
}
@@ -391,107 +320,81 @@ double simplified_nmos_leakage(
double nwidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nwidth * dt->I_off_n;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nwidth * dt->I_off_n;
}
-int factorial(int n, int m)
-{
- int fa = m, i;
- for (i=m+1; i<=n; i++)
- fa *=i;
- return fa;
+int factorial(int n, int m) {
+ int fa = m, i;
+ for (i = m + 1; i <= n; i++)
+ fa *= i;
+ return fa;
}
-int combination(int n, int m)
-{
- int ret;
- ret = factorial(n, m+1) / factorial(n - m);
- return ret;
+int combination(int n, int m) {
+ int ret;
+ ret = factorial(n, m + 1) / factorial(n - m);
+ return ret;
}
double simplified_pmos_leakage(
double pwidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pwidth * dt->I_off_p;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pwidth * dt->I_off_p;
}
double cmos_Ig_n(
double nWidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nWidth*dt->I_g_on_n;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_g_on_n;
}
double cmos_Ig_p(
double pWidth,
bool _is_dram,
bool _is_cell,
- bool _is_wl_tr)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pWidth*dt->I_g_on_p;
+ bool _is_wl_tr) {
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ } else { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pWidth*dt->I_g_on_p;
}
double cmos_Isub_leakage(
@@ -502,98 +405,93 @@ double cmos_Isub_leakage(
bool _is_dram,
bool _is_cell,
bool _is_wl_tr,
- enum Half_net_topology topo)
-{
- assert (fanin>=1);
- double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
- double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
- double Isub=0;
+ enum Half_net_topology topo) {
+ assert (fanin >= 1);
+ double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Isub = 0;
int num_states;
int num_off_tx;
num_states = int(pow(2.0, fanin));
- switch (g_type)
- {
+ switch (g_type) {
case nmos:
- if (fanin==1)
- {
- Isub = nmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
- }
- else
- {
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
- {
- //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
+ if (fanin == 1) {
+ Isub = nmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ //only when all tx are off, leakage power is non-zero.
+ //The possibility of this state is 1/num_states
+ Isub = nmos_leak * fanin / num_states;
+ } else {
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ //when num_off_tx ==0 there is no leakage power
+ Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
+ Isub /= num_states;
+ }
}
break;
case pmos:
- if (fanin==1)
- {
- Isub = pmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
- }
- else
- {
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
- {
- //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
+ if (fanin == 1) {
+ Isub = pmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ //only when all tx are off, leakage power is non-zero.
+ //The possibility of this state is 1/num_states
+ Isub = pmos_leak * fanin / num_states;
+ } else {
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ //when num_off_tx ==0 there is no leakage power
+ Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
+ Isub /= num_states;
+ }
}
break;
case inv:
- Isub = (nmos_leak + pmos_leak)/2;
+ Isub = (nmos_leak + pmos_leak) / 2;
break;
case nand:
- Isub += fanin*pmos_leak;//the pullup network
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
- {
- //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ Isub += fanin * pmos_leak;//the pullup network
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ // the pulldown network
+ Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
- Isub /=num_states;
+ Isub /= num_states;
break;
case nor:
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
- {
- //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
+ // the pullup network
+ Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
+ (num_off_tx - 1)) *
+ combination(fanin, num_off_tx);
}
- Isub += fanin*nmos_leak;//the pulldown network
- Isub /=num_states;
+ Isub += fanin * nmos_leak;//the pulldown network
+ Isub /= num_states;
break;
case tri:
- Isub += (nmos_leak + pmos_leak)/2;//enabled
- Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
- Isub /=2;
+ Isub += (nmos_leak + pmos_leak) / 2;//enabled
+ //disabled upper bound of leakage power
+ Isub += nmos_leak * UNI_LEAK_STACK_FACTOR;
+ Isub /= 2;
break;
case tg:
- Isub = (nmos_leak + pmos_leak)/2;
+ Isub = (nmos_leak + pmos_leak) / 2;
break;
default:
assert(0);
break;
- }
+ }
return Isub;
}
@@ -607,120 +505,116 @@ double cmos_Ig_leakage(
bool _is_dram,
bool _is_cell,
bool _is_wl_tr,
- enum Half_net_topology topo)
-{
- assert (fanin>=1);
- double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
- double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
- double Ig_on=0;
- int num_states;
- int num_on_tx;
-
- num_states = int(pow(2.0, fanin));
-
- switch (g_type)
- {
- case nmos:
- if (fanin==1)
- {
- Ig_on = nmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- }
- else
- {
- Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
- //num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
- }
- Ig_on /=num_states;
- }
- }
- break;
- case pmos:
- if (fanin==1)
- {
- Ig_on = pmos_leak/num_states;
+ enum Half_net_topology topo) {
+ assert (fanin >= 1);
+ double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Ig_on = 0;
+ int num_states;
+ int num_on_tx;
+
+ num_states = int(pow(2.0, fanin));
+
+ switch (g_type) {
+ case nmos:
+ if (fanin == 1) {
+ Ig_on = nmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx;
}
- else
- {
- if (topo==parallel)
- {
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- }
- else
- {
- Ig_on += pmos_leak * fanin;//pull down network when all TXs are on.
- //num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
- }
- Ig_on /=num_states;
- }
+ } else {
+ //pull down network when all TXs are on.
+ Ig_on += nmos_leak * fanin;
+ //num_on_tx is the number of on tx
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ //when num_on_tx=[1,n-1]
+ //TODO: this is a approximation now, a precise computation
+ //will be very complicated.
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx / 2;
}
- break;
-
- case inv:
- Ig_on = (nmos_leak + pmos_leak)/2;
- break;
- case nand:
- //pull up network
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ Ig_on /= num_states;
+ }
+ }
+ break;
+ case pmos:
+ if (fanin == 1) {
+ Ig_on = pmos_leak / num_states;
+ } else {
+ if (topo == parallel) {
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx;
}
-
- //pull down network
- Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ } else {
+ //pull down network when all TXs are on.
+ Ig_on += pmos_leak * fanin;
//num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ //when num_on_tx=[1,n-1]
+ //TODO: this is a approximation now, a precise computation
+ //will be very complicated.
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) *
+ num_on_tx / 2;
}
- Ig_on /=num_states;
- break;
- case nor:
- // num_on_tx is the number of on tx in pull up network
- Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
- for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)
- {
- Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;
+ Ig_on /= num_states;
+ }
+ }
+ break;
- }
- //pull down network
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- Ig_on /=num_states;
- break;
- case tri:
- Ig_on += (2*nmos_leak + 2*pmos_leak)/2;//enabled
- Ig_on += (nmos_leak + pmos_leak)/2; //disabled upper bound of leakage power
- Ig_on /=2;
- break;
- case tg:
- Ig_on = (nmos_leak + pmos_leak)/2;
- break;
- default:
- assert(0);
- break;
- }
-
- return Ig_on;
+ case inv:
+ Ig_on = (nmos_leak + pmos_leak) / 2;
+ break;
+ case nand:
+ //pull up network
+ //when num_on_tx=[1,n]
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
+ }
+
+ //pull down network
+ Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ //num_on_tx is the number of on tx
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ //when num_on_tx=[1,n-1]
+ //TODO: this is a approximation now, a precise computation will be
+ //very complicated.
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
+ }
+ Ig_on /= num_states;
+ break;
+ case nor:
+ // num_on_tx is the number of on tx in pull up network
+ Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
+ for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
+ Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
+
+ }
+ //pull down network
+ for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
+ //when num_on_tx=[1,n]
+ Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
+ }
+ Ig_on /= num_states;
+ break;
+ case tri:
+ Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2;//enabled
+ //disabled upper bound of leakage power
+ Ig_on += (nmos_leak + pmos_leak) / 2;
+ Ig_on /= 2;
+ break;
+ case tg:
+ Ig_on = (nmos_leak + pmos_leak) / 2;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return Ig_on;
}
double shortcircuit_simple(
@@ -734,21 +628,28 @@ double shortcircuit_simple(
double i_on_p,
double i_on_n_in,
double i_on_p_in,
- double vdd)
-{
-
- double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
- double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
-
- fo_n = i_on_n/i_on_n_in;
- fo_p = i_on_p/i_on_p_in;
- fanout = c_out/c_in;
- beta_ratio = i_on_p/i_on_n;
- vt_to_vdd_ratio = vt/vdd;
-
- //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
- p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
- p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
+ double vdd) {
+
+ double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
+ double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+
+ fo_n = i_on_n / i_on_n_in;
+ fo_p = i_on_p / i_on_p_in;
+ fanout = c_out / c_in;
+ beta_ratio = i_on_p / i_on_n;
+ vt_to_vdd_ratio = vt / vdd;
+
+ //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+ p_short_circuit_discharge_low =
+ 10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
+ pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
+ vt_to_vdd_ratio)) * c_in *
+ vdd * vdd * fo_p * fo_p / fanout / beta_ratio;
+ p_short_circuit_charge_low =
+ 10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
+ pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
+ vt_to_vdd_ratio)) * c_in *
+ vdd * vdd * fo_n * fo_n / fanout * beta_ratio;
// double t1, t2, t3, t4, t5;
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
// t2=pow(velocity_index,2.0);
@@ -756,8 +657,12 @@ double shortcircuit_simple(
// t4=t1/t2/t3;
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
- p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
- p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
+ p_short_circuit_discharge_high =
+ pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd *
+ fo_p / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);
+ p_short_circuit_charge_high = pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) *
+ c_in * vdd * vdd * fo_n / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 *
+ velocity_index);
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
// t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
@@ -766,11 +671,11 @@ double shortcircuit_simple(
// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
- p_short_circuit_discharge = p_short_circuit_discharge_low;
- p_short_circuit_charge = p_short_circuit_charge_low;
- p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
+ p_short_circuit_discharge = p_short_circuit_discharge_low;
+ p_short_circuit_charge = p_short_circuit_charge_low;
+ p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2;
- return (p_short_circuit);
+ return (p_short_circuit);
}
double shortcircuit(
@@ -784,25 +689,33 @@ double shortcircuit(
double i_on_p,
double i_on_n_in,
double i_on_p_in,
- double vdd)
-{
-
- double p_short_circuit=0, p_short_circuit_discharge;//, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
- double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
- double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
-
- fo_n = i_on_n/i_on_n_in;
- fo_p = i_on_p/i_on_p_in;
- fanout = 1;
- beta_ratio = i_on_p/i_on_n;
- vt_to_vdd_ratio = vt/vdd;
- e = 2.71828;
- f_alpha = 1/(velocity_index+2) -velocity_index/(2*(velocity_index+3)) +velocity_index/(velocity_index+4)*(velocity_index/2-1);
- k_v = 0.9/0.8+(vdd-vt)/0.8*log(10*(vdd-vt)/e);
- g_v_alpha = (velocity_index + 1)*pow((1-velocity_index),velocity_index)*pow((1-velocity_index),velocity_index/2)/f_alpha/pow((1-velocity_index-velocity_index),(velocity_index/2+velocity_index+2));
- h_v_alpha = pow(2, velocity_index)*(velocity_index+1)*pow((1-velocity_index),velocity_index)/pow((1-velocity_index-velocity_index),(velocity_index+1));
-
- //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
+ double vdd) {
+
+ //this is actually energy
+ double p_short_circuit = 0, p_short_circuit_discharge;
+ double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
+ double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
+
+ fo_n = i_on_n / i_on_n_in;
+ fo_p = i_on_p / i_on_p_in;
+ fanout = 1;
+ beta_ratio = i_on_p / i_on_n;
+ vt_to_vdd_ratio = vt / vdd;
+ e = 2.71828;
+ f_alpha = 1 / (velocity_index + 2) - velocity_index /
+ (2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) *
+ (velocity_index / 2 - 1);
+ k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * log(10 * (vdd - vt) / e);
+ g_v_alpha = (velocity_index + 1) *
+ pow((1 - velocity_index), velocity_index) *
+ pow((1 - velocity_index), velocity_index / 2) / f_alpha /
+ pow((1 - velocity_index - velocity_index),
+ (velocity_index / 2 + velocity_index + 2));
+ h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) *
+ pow((1 - velocity_index), velocity_index) /
+ pow((1 - velocity_index - velocity_index), (velocity_index + 1));
+
+ //p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
// p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
// p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
// double t1, t2, t3, t4, t5;
@@ -824,6 +737,8 @@ double shortcircuit(
//
// p_short_circuit = p_short_circuit_discharge;
- p_short_circuit_discharge = k_v*vdd*vdd*c_in*fo_p*fo_p/((vdd-vt)*g_v_alpha*fanout*beta_ratio/2/k_v + h_v_alpha*fo_p);
- return (p_short_circuit);
+ p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p /
+ ((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha *
+ fo_p);
+ return (p_short_circuit);
}
diff --git a/ext/mcpat/cacti/basic_circuit.h b/ext/mcpat/cacti/basic_circuit.h
index aaab6c0ea..e4bb5760a 100644
--- a/ext/mcpat/cacti/basic_circuit.h
+++ b/ext/mcpat/cacti/basic_circuit.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -49,10 +50,10 @@ int combination(int n, int m);
//#define DBG
#ifdef DBG
- #define PRINTDW(a);\
+#define PRINTDW(a);\
a;
#else
- #define PRINTDW(a);\
+#define PRINTDW(a);\
#endif
@@ -76,7 +77,7 @@ enum Htree_type {
enum Gate_type {
nmos,
pmos,
- inv,
+ inv,
nand,
nor,
tri,
@@ -164,13 +165,13 @@ double cmos_Ig_n(
double nWidth,
bool _is_dram = false,
bool _is_cell = false,
- bool _is_wl_tr= false);
+ bool _is_wl_tr = false);
double cmos_Ig_p(
double pWidth,
bool _is_dram = false,
bool _is_cell = false,
- bool _is_wl_tr= false);
+ bool _is_wl_tr = false);
double cmos_Isub_leakage(
@@ -220,29 +221,29 @@ double shortcircuit_simple(
double vdd);
//set power point product mask; strictly speaking this is not real point product
inline void set_pppm(
- double * pppv,
- double a=1,
- double b=1,
- double c=1,
- double d=1
- ){
- pppv[0]= a;
- pppv[1]= b;
- pppv[2]= c;
- pppv[3]= d;
+ double * pppv,
+ double a = 1,
+ double b = 1,
+ double c = 1,
+ double d = 1
+) {
+ pppv[0] = a;
+ pppv[1] = b;
+ pppv[2] = c;
+ pppv[3] = d;
}
inline void set_sppm(
- double * sppv,
- double a=1,
- double b=1,
- double c=1,
- double d=1
- ){
- sppv[0]= a;
- sppv[1]= b;
- sppv[2]= c;
+ double * sppv,
+ double a = 1,
+ double b = 1,
+ double c = 1,
+ double d = 1
+) {
+ sppv[0] = a;
+ sppv[1] = b;
+ sppv[2] = c;
}
#endif
diff --git a/ext/mcpat/cacti/cacti_interface.cc b/ext/mcpat/cacti/cacti_interface.cc
index b6d0d13de..b397db897 100644
--- a/ext/mcpat/cacti/cacti_interface.cc
+++ b/ext/mcpat/cacti/cacti_interface.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -47,127 +48,107 @@
using namespace std;
-bool mem_array::lt(const mem_array * m1, const mem_array * m2)
-{
- if (m1->Nspd < m2->Nspd) return true;
- else if (m1->Nspd > m2->Nspd) return false;
- else if (m1->Ndwl < m2->Ndwl) return true;
- else if (m1->Ndwl > m2->Ndwl) return false;
- else if (m1->Ndbl < m2->Ndbl) return true;
- else if (m1->Ndbl > m2->Ndbl) return false;
- else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
- else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
- else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
- else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
- else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
- else return false;
+bool mem_array::lt(const mem_array * m1, const mem_array * m2) {
+ if (m1->Nspd < m2->Nspd) return true;
+ else if (m1->Nspd > m2->Nspd) return false;
+ else if (m1->Ndwl < m2->Ndwl) return true;
+ else if (m1->Ndwl > m2->Ndwl) return false;
+ else if (m1->Ndbl < m2->Ndbl) return true;
+ else if (m1->Ndbl > m2->Ndbl) return false;
+ else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
+ else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
+ else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
+ else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
+ else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
+ else return false;
}
-void uca_org_t::find_delay()
-{
- mem_array * data_arr = data_array2;
- mem_array * tag_arr = tag_array2;
-
- // check whether it is a regular cache or scratch ram
- if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
- {
- access_time = data_arr->access_time;
- }
- // Both tag and data lookup happen in parallel
- // and the entire set is sent over the data array h-tree without
- // waiting for the way-select signal --TODO add the corresponding
- // power overhead Nav
- else if (g_ip->fast_access == true)
- {
- access_time = MAX(tag_arr->access_time, data_arr->access_time);
- }
- // Tag is accessed first. On a hit, way-select signal along with the
- // address is sent to read/write the appropriate block in the data
- // array
- else if (g_ip->is_seq_acc == true)
- {
- access_time = tag_arr->access_time + data_arr->access_time;
- }
- // Normal access: tag array access and data array access happen in parallel.
- // But, the data array will wait for the way-select and transfer only the
- // appropriate block over the h-tree.
- else
- {
- access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
- data_arr->delay_before_subarray_output_driver) +
- data_arr->delay_from_subarray_output_driver_to_output;
- }
+void uca_org_t::find_delay() {
+ mem_array * data_arr = data_array2;
+ mem_array * tag_arr = tag_array2;
+
+ // check whether it is a regular cache or scratch ram
+ if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
+ access_time = data_arr->access_time;
+ }
+ // Both tag and data lookup happen in parallel
+ // and the entire set is sent over the data array h-tree without
+ // waiting for the way-select signal --TODO add the corresponding
+ // power overhead Nav
+ else if (g_ip->fast_access == true) {
+ access_time = MAX(tag_arr->access_time, data_arr->access_time);
+ }
+ // Tag is accessed first. On a hit, way-select signal along with the
+ // address is sent to read/write the appropriate block in the data
+ // array
+ else if (g_ip->is_seq_acc == true) {
+ access_time = tag_arr->access_time + data_arr->access_time;
+ }
+ // Normal access: tag array access and data array access happen in parallel.
+ // But, the data array will wait for the way-select and transfer only the
+ // appropriate block over the h-tree.
+ else {
+ access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
+ data_arr->delay_before_subarray_output_driver) +
+ data_arr->delay_from_subarray_output_driver_to_output;
+ }
}
-void uca_org_t::find_energy()
-{
- if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
- power = data_array2->power + tag_array2->power;
- else
- power = data_array2->power;
+void uca_org_t::find_energy() {
+ if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc))
+ power = data_array2->power + tag_array2->power;
+ else
+ power = data_array2->power;
}
-void uca_org_t::find_area()
-{
- if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
- {
- cache_ht = data_array2->height;
- cache_len = data_array2->width;
- }
- else
- {
- cache_ht = MAX(tag_array2->height, data_array2->height);
- cache_len = tag_array2->width + data_array2->width;
- }
- area = cache_ht * cache_len;
+void uca_org_t::find_area() {
+ if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
+ cache_ht = data_array2->height;
+ cache_len = data_array2->width;
+ } else {
+ cache_ht = MAX(tag_array2->height, data_array2->height);
+ cache_len = tag_array2->width + data_array2->width;
+ }
+ area = cache_ht * cache_len;
}
-void uca_org_t::adjust_area()
-{
- double area_adjust;
- if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
- {
- if (data_array2->area_efficiency/100.0<0.2)
- {
- //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
- area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
- cache_ht = cache_ht/area_adjust;
- cache_len = cache_len/area_adjust;
+void uca_org_t::adjust_area() {
+ double area_adjust;
+ if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
+ if (data_array2->area_efficiency / 100.0 < 0.2) {
+ //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
+ area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0));
+ cache_ht = cache_ht / area_adjust;
+ cache_len = cache_len / area_adjust;
+ }
}
- }
- area = cache_ht * cache_len;
+ area = cache_ht * cache_len;
}
-void uca_org_t::find_cyc()
-{
- if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
- {
- cycle_time = data_array2->cycle_time;
- }
- else
- {
- cycle_time = MAX(tag_array2->cycle_time,
- data_array2->cycle_time);
- }
+void uca_org_t::find_cyc() {
+ if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
+ cycle_time = data_array2->cycle_time;
+ } else {
+ cycle_time = MAX(tag_array2->cycle_time,
+ data_array2->cycle_time);
+ }
}
uca_org_t :: uca_org_t()
-:tag_array2(0),
- data_array2(0)
-{
+ : tag_array2(0),
+ data_array2(0) {
}
-void uca_org_t :: cleanup()
-{
- if (data_array2!=0)
- delete data_array2;
- if (tag_array2!=0)
- delete tag_array2;
+void uca_org_t :: cleanup() {
+ if (data_array2 != 0)
+ delete data_array2;
+ if (tag_array2 != 0)
+ delete tag_array2;
}
diff --git a/ext/mcpat/cacti/cacti_interface.h b/ext/mcpat/cacti/cacti_interface.h
index f37596554..a2bddd819 100644
--- a/ext/mcpat/cacti/cacti_interface.h
+++ b/ext/mcpat/cacti/cacti_interface.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -50,9 +51,8 @@ class mem_array;
class uca_org_t;
-class powerComponents
-{
- public:
+class powerComponents {
+public:
double dynamic;
double leakage;
double gate_leakage;
@@ -60,17 +60,24 @@ class powerComponents
double longer_channel_leakage;
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
- powerComponents(const powerComponents & obj) { *this = obj; }
- powerComponents & operator=(const powerComponents & rhs)
- {
- dynamic = rhs.dynamic;
- leakage = rhs.leakage;
- gate_leakage = rhs.gate_leakage;
- short_circuit = rhs.short_circuit;
- longer_channel_leakage = rhs.longer_channel_leakage;
- return *this;
+ powerComponents(const powerComponents & obj) {
+ *this = obj;
+ }
+ powerComponents & operator=(const powerComponents & rhs) {
+ dynamic = rhs.dynamic;
+ leakage = rhs.leakage;
+ gate_leakage = rhs.gate_leakage;
+ short_circuit = rhs.short_circuit;
+ longer_channel_leakage = rhs.longer_channel_leakage;
+ return *this;
+ }
+ void reset() {
+ dynamic = 0;
+ leakage = 0;
+ gate_leakage = 0;
+ short_circuit = 0;
+ longer_channel_leakage = 0;
}
- void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
friend powerComponents operator*(const powerComponents & x, double const * const y);
@@ -78,22 +85,24 @@ class powerComponents
-class powerDef
-{
- public:
+class powerDef {
+public:
powerComponents readOp;
powerComponents writeOp;
powerComponents searchOp;//Sheng: for CAM and FA
powerDef() : readOp(), writeOp(), searchOp() { }
- void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
+ void reset() {
+ readOp.reset();
+ writeOp.reset();
+ searchOp.reset();
+ }
friend powerDef operator+(const powerDef & x, const powerDef & y);
friend powerDef operator*(const powerDef & x, double const * const y);
};
-enum Wire_type
-{
+enum Wire_type {
Global /* gloabl wires with repeaters */,
Global_5 /* 5% delay penalty */,
Global_10 /* 10% delay penalty */,
@@ -108,12 +117,12 @@ enum Wire_type
-class InputParameter
-{
- public:
+class InputParameter {
+public:
void parse_cfg(const string & infile);
- bool error_checking(); // return false if the input parameters are problematic
+ // return false if the input parameters are problematic
+ bool error_checking(string name = "CACTI");
void display_ip();
unsigned int cache_sz; // in bytes
@@ -172,14 +181,14 @@ class InputParameter
int force_nuca_bank;
int delay_wt, dynamic_power_wt, leakage_power_wt,
- cycle_time_wt, area_wt;
+ cycle_time_wt, area_wt;
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
- cycle_time_wt_nuca, area_wt_nuca;
+ cycle_time_wt_nuca, area_wt_nuca;
int delay_dev, dynamic_power_dev, leakage_power_dev,
- cycle_time_dev, area_dev;
+ cycle_time_dev, area_dev;
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
- cycle_time_dev_nuca, area_dev_nuca;
+ cycle_time_dev_nuca, area_dev_nuca;
int ed; //ED or ED2 optimization
int nuca;
@@ -194,167 +203,113 @@ class InputParameter
bool add_ecc_b_;
- //parameters for design constraint
- double throughput;
- double latency;
- bool pipelinable;
- int pipeline_stages;
- int per_stage_vector;
- bool with_clock_grid;
+ //parameters for design constraint
+ double throughput;
+ double latency;
+ bool pipelinable;
+ int pipeline_stages;
+ int per_stage_vector;
+ bool with_clock_grid;
};
-typedef struct{
- int Ndwl;
- int Ndbl;
- double Nspd;
- int deg_bl_muxing;
- int Ndsam_lev_1;
- int Ndsam_lev_2;
- int number_activated_mats_horizontal_direction;
- int number_subbanks;
- int page_size_in_bits;
- double delay_route_to_bank;
- double delay_crossbar;
- double delay_addr_din_horizontal_htree;
- double delay_addr_din_vertical_htree;
- double delay_row_predecode_driver_and_block;
- double delay_row_decoder;
- double delay_bitlines;
- double delay_sense_amp;
- double delay_subarray_output_driver;
- double delay_bit_mux_predecode_driver_and_block;
- double delay_bit_mux_decoder;
- double delay_senseamp_mux_lev_1_predecode_driver_and_block;
- double delay_senseamp_mux_lev_1_decoder;
- double delay_senseamp_mux_lev_2_predecode_driver_and_block;
- double delay_senseamp_mux_lev_2_decoder;
- double delay_input_htree;
- double delay_output_htree;
- double delay_dout_vertical_htree;
- double delay_dout_horizontal_htree;
- double delay_comparator;
- double access_time;
- double cycle_time;
- double multisubbank_interleave_cycle_time;
- double delay_request_network;
- double delay_inside_mat;
- double delay_reply_network;
- double trcd;
- double cas_latency;
- double precharge_delay;
- powerDef power_routing_to_bank;
- powerDef power_addr_input_htree;
- powerDef power_data_input_htree;
- powerDef power_data_output_htree;
- powerDef power_addr_horizontal_htree;
- powerDef power_datain_horizontal_htree;
- powerDef power_dataout_horizontal_htree;
- powerDef power_addr_vertical_htree;
- powerDef power_datain_vertical_htree;
- powerDef power_row_predecoder_drivers;
- powerDef power_row_predecoder_blocks;
- powerDef power_row_decoders;
- powerDef power_bit_mux_predecoder_drivers;
- powerDef power_bit_mux_predecoder_blocks;
- powerDef power_bit_mux_decoders;
- powerDef power_senseamp_mux_lev_1_predecoder_drivers;
- powerDef power_senseamp_mux_lev_1_predecoder_blocks;
- powerDef power_senseamp_mux_lev_1_decoders;
- powerDef power_senseamp_mux_lev_2_predecoder_drivers;
- powerDef power_senseamp_mux_lev_2_predecoder_blocks;
- powerDef power_senseamp_mux_lev_2_decoders;
- powerDef power_bitlines;
- powerDef power_sense_amps;
- powerDef power_prechg_eq_drivers;
- powerDef power_output_drivers_at_subarray;
- powerDef power_dataout_vertical_htree;
- powerDef power_comparators;
- powerDef power_crossbar;
- powerDef total_power;
- double area;
- double all_banks_height;
- double all_banks_width;
- double bank_height;
- double bank_width;
- double subarray_memory_cell_area_height;
- double subarray_memory_cell_area_width;
- double mat_height;
- double mat_width;
- double routing_area_height_within_bank;
- double routing_area_width_within_bank;
- double area_efficiency;
-// double perc_power_dyn_routing_to_bank;
-// double perc_power_dyn_addr_horizontal_htree;
-// double perc_power_dyn_datain_horizontal_htree;
-// double perc_power_dyn_dataout_horizontal_htree;
-// double perc_power_dyn_addr_vertical_htree;
-// double perc_power_dyn_datain_vertical_htree;
-// double perc_power_dyn_row_predecoder_drivers;
-// double perc_power_dyn_row_predecoder_blocks;
-// double perc_power_dyn_row_decoders;
-// double perc_power_dyn_bit_mux_predecoder_drivers;
-// double perc_power_dyn_bit_mux_predecoder_blocks;
-// double perc_power_dyn_bit_mux_decoders;
-// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
-// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
-// double perc_power_dyn_senseamp_mux_lev_1_decoders;
-// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
-// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
-// double perc_power_dyn_senseamp_mux_lev_2_decoders;
-// double perc_power_dyn_bitlines;
-// double perc_power_dyn_sense_amps;
-// double perc_power_dyn_prechg_eq_drivers;
-// double perc_power_dyn_subarray_output_drivers;
-// double perc_power_dyn_dataout_vertical_htree;
-// double perc_power_dyn_comparators;
-// double perc_power_dyn_crossbar;
-// double perc_power_dyn_spent_outside_mats;
-// double perc_power_leak_routing_to_bank;
-// double perc_power_leak_addr_horizontal_htree;
-// double perc_power_leak_datain_horizontal_htree;
-// double perc_power_leak_dataout_horizontal_htree;
-// double perc_power_leak_addr_vertical_htree;
-// double perc_power_leak_datain_vertical_htree;
-// double perc_power_leak_row_predecoder_drivers;
-// double perc_power_leak_row_predecoder_blocks;
-// double perc_power_leak_row_decoders;
-// double perc_power_leak_bit_mux_predecoder_drivers;
-// double perc_power_leak_bit_mux_predecoder_blocks;
-// double perc_power_leak_bit_mux_decoders;
-// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
-// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
-// double perc_power_leak_senseamp_mux_lev_1_decoders;
-// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
-// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
-// double perc_power_leak_senseamp_mux_lev_2_decoders;
-// double perc_power_leak_bitlines;
-// double perc_power_leak_sense_amps;
-// double perc_power_leak_prechg_eq_drivers;
-// double perc_power_leak_subarray_output_drivers;
-// double perc_power_leak_dataout_vertical_htree;
-// double perc_power_leak_comparators;
-// double perc_power_leak_crossbar;
-// double perc_leak_mats;
-// double perc_active_mats;
- double refresh_power;
- double dram_refresh_period;
- double dram_array_availability;
- double dyn_read_energy_from_closed_page;
- double dyn_read_energy_from_open_page;
- double leak_power_subbank_closed_page;
- double leak_power_subbank_open_page;
- double leak_power_request_and_reply_networks;
- double activate_energy;
- double read_energy;
- double write_energy;
- double precharge_energy;
+typedef struct {
+ int Ndwl;
+ int Ndbl;
+ double Nspd;
+ int deg_bl_muxing;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ int number_activated_mats_horizontal_direction;
+ int number_subbanks;
+ int page_size_in_bits;
+ double delay_route_to_bank;
+ double delay_crossbar;
+ double delay_addr_din_horizontal_htree;
+ double delay_addr_din_vertical_htree;
+ double delay_row_predecode_driver_and_block;
+ double delay_row_decoder;
+ double delay_bitlines;
+ double delay_sense_amp;
+ double delay_subarray_output_driver;
+ double delay_bit_mux_predecode_driver_and_block;
+ double delay_bit_mux_decoder;
+ double delay_senseamp_mux_lev_1_predecode_driver_and_block;
+ double delay_senseamp_mux_lev_1_decoder;
+ double delay_senseamp_mux_lev_2_predecode_driver_and_block;
+ double delay_senseamp_mux_lev_2_decoder;
+ double delay_input_htree;
+ double delay_output_htree;
+ double delay_dout_vertical_htree;
+ double delay_dout_horizontal_htree;
+ double delay_comparator;
+ double access_time;
+ double cycle_time;
+ double multisubbank_interleave_cycle_time;
+ double delay_request_network;
+ double delay_inside_mat;
+ double delay_reply_network;
+ double trcd;
+ double cas_latency;
+ double precharge_delay;
+ powerDef power_routing_to_bank;
+ powerDef power_addr_input_htree;
+ powerDef power_data_input_htree;
+ powerDef power_data_output_htree;
+ powerDef power_addr_horizontal_htree;
+ powerDef power_datain_horizontal_htree;
+ powerDef power_dataout_horizontal_htree;
+ powerDef power_addr_vertical_htree;
+ powerDef power_datain_vertical_htree;
+ powerDef power_row_predecoder_drivers;
+ powerDef power_row_predecoder_blocks;
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_predecoder_drivers;
+ powerDef power_bit_mux_predecoder_blocks;
+ powerDef power_bit_mux_decoders;
+ powerDef power_senseamp_mux_lev_1_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_1_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_1_decoders;
+ powerDef power_senseamp_mux_lev_2_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_2_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_2_decoders;
+ powerDef power_bitlines;
+ powerDef power_sense_amps;
+ powerDef power_prechg_eq_drivers;
+ powerDef power_output_drivers_at_subarray;
+ powerDef power_dataout_vertical_htree;
+ powerDef power_comparators;
+ powerDef power_crossbar;
+ powerDef total_power;
+ double area;
+ double all_banks_height;
+ double all_banks_width;
+ double bank_height;
+ double bank_width;
+ double subarray_memory_cell_area_height;
+ double subarray_memory_cell_area_width;
+ double mat_height;
+ double mat_width;
+ double routing_area_height_within_bank;
+ double routing_area_width_within_bank;
+ double area_efficiency;
+ double refresh_power;
+ double dram_refresh_period;
+ double dram_array_availability;
+ double dyn_read_energy_from_closed_page;
+ double dyn_read_energy_from_open_page;
+ double leak_power_subbank_closed_page;
+ double leak_power_subbank_open_page;
+ double leak_power_request_and_reply_networks;
+ double activate_energy;
+ double read_energy;
+ double write_energy;
+ double precharge_energy;
} results_mem_array;
-class uca_org_t
-{
- public:
+class uca_org_t {
+public:
mem_array * tag_array2;
mem_array * data_array2;
double access_time;
@@ -378,7 +333,7 @@ class uca_org_t
void find_cyc();
void adjust_area();//for McPAT only to adjust routing overhead
void cleanup();
- ~uca_org_t(){};
+ ~uca_org_t() {};
};
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
@@ -387,103 +342,62 @@ uca_org_t cacti_interface(const string & infile_name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(InputParameter * const local_interface);
//McPAT's plain interface, please keep !!!
-uca_org_t init_interface(InputParameter * const local_interface);
+uca_org_t init_interface(InputParameter * const local_interface,
+ const string &name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(
- int cache_size,
- int line_size,
- int associativity,
- int rw_ports,
- int excl_read_ports,
- int excl_write_ports,
- int single_ended_read_ports,
- int search_ports,
- int banks,
- double tech_node,
- int output_width,
- int specific_tag,
- int tag_width,
- int access_mode,
- int cache,
- int main_mem,
- int obj_func_delay,
- int obj_func_dynamic_power,
- int obj_func_leakage_power,
- int obj_func_cycle_time,
- int obj_func_area,
- int dev_func_delay,
- int dev_func_dynamic_power,
- int dev_func_leakage_power,
- int dev_func_area,
- int dev_func_cycle_time,
- int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
- int temp,
- int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
- int data_arr_ram_cell_tech_flavor_in,
- int data_arr_peri_global_tech_flavor_in,
- int tag_arr_ram_cell_tech_flavor_in,
- int tag_arr_peri_global_tech_flavor_in,
- int interconnect_projection_type_in,
- int wire_inside_mat_type_in,
- int wire_outside_mat_type_in,
- int REPEATERS_IN_HTREE_SEGMENTS_in,
- int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
- int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
- int PAGE_SIZE_BITS_in,
- int BURST_LENGTH_in,
- int INTERNAL_PREFETCH_WIDTH_in,
- int force_wiretype,
- int wiretype,
- int force_config,
- int ndwl,
- int ndbl,
- int nspd,
- int ndcm,
- int ndsam1,
- int ndsam2,
- int ecc);
-// int cache_size,
-// int line_size,
-// int associativity,
-// int rw_ports,
-// int excl_read_ports,
-// int excl_write_ports,
-// int single_ended_read_ports,
-// int banks,
-// double tech_node,
-// int output_width,
-// int specific_tag,
-// int tag_width,
-// int access_mode,
-// int cache,
-// int main_mem,
-// int obj_func_delay,
-// int obj_func_dynamic_power,
-// int obj_func_leakage_power,
-// int obj_func_area,
-// int obj_func_cycle_time,
-// int dev_func_delay,
-// int dev_func_dynamic_power,
-// int dev_func_leakage_power,
-// int dev_func_area,
-// int dev_func_cycle_time,
-// int temp,
-// int data_arr_ram_cell_tech_flavor_in,
-// int data_arr_peri_global_tech_flavor_in,
-// int tag_arr_ram_cell_tech_flavor_in,
-// int tag_arr_peri_global_tech_flavor_in,
-// int interconnect_projection_type_in,
-// int wire_inside_mat_type_in,
-// int wire_outside_mat_type_in,
-// int REPEATERS_IN_HTREE_SEGMENTS_in,
-// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
-// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
-//// double MAXAREACONSTRAINT_PERC_in,
-//// double MAXACCTIMECONSTRAINT_PERC_in,
-//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
-// int PAGE_SIZE_BITS_in,
-// int BURST_LENGTH_in,
-// int INTERNAL_PREFETCH_WIDTH_in);
+ int cache_size,
+ int line_size,
+ int associativity,
+ int rw_ports,
+ int excl_read_ports,
+ int excl_write_ports,
+ int single_ended_read_ports,
+ int search_ports,
+ int banks,
+ double tech_node,
+ int output_width,
+ int specific_tag,
+ int tag_width,
+ int access_mode,
+ int cache,
+ int main_mem,
+ int obj_func_delay,
+ int obj_func_dynamic_power,
+ int obj_func_leakage_power,
+ int obj_func_cycle_time,
+ int obj_func_area,
+ int dev_func_delay,
+ int dev_func_dynamic_power,
+ int dev_func_leakage_power,
+ int dev_func_area,
+ int dev_func_cycle_time,
+ int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
+ int temp,
+ int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
+ int data_arr_ram_cell_tech_flavor_in,
+ int data_arr_peri_global_tech_flavor_in,
+ int tag_arr_ram_cell_tech_flavor_in,
+ int tag_arr_peri_global_tech_flavor_in,
+ int interconnect_projection_type_in,
+ int wire_inside_mat_type_in,
+ int wire_outside_mat_type_in,
+ int REPEATERS_IN_HTREE_SEGMENTS_in,
+ int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
+ int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
+ int PAGE_SIZE_BITS_in,
+ int BURST_LENGTH_in,
+ int INTERNAL_PREFETCH_WIDTH_in,
+ int force_wiretype,
+ int wiretype,
+ int force_config,
+ int ndwl,
+ int ndbl,
+ int nspd,
+ int ndcm,
+ int ndsam1,
+ int ndsam2,
+ int ecc);
//Naveen's interface
uca_org_t cacti_interface(
@@ -542,91 +456,90 @@ uca_org_t cacti_interface(
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
int p_input);
-class mem_array
-{
- public:
- int Ndcm;
- int Ndwl;
- int Ndbl;
- double Nspd;
- int deg_bl_muxing;
- int Ndsam_lev_1;
- int Ndsam_lev_2;
- double access_time;
- double cycle_time;
- double multisubbank_interleave_cycle_time;
- double area_ram_cells;
- double area;
- powerDef power;
- double delay_senseamp_mux_decoder;
- double delay_before_subarray_output_driver;
- double delay_from_subarray_output_driver_to_output;
- double height;
- double width;
-
- double mat_height;
- double mat_length;
- double subarray_length;
- double subarray_height;
-
- double delay_route_to_bank,
- delay_input_htree,
- delay_row_predecode_driver_and_block,
- delay_row_decoder,
- delay_bitlines,
- delay_sense_amp,
- delay_subarray_output_driver,
- delay_dout_htree,
- delay_comparator,
- delay_matchlines;
-
- double all_banks_height,
- all_banks_width,
- area_efficiency;
-
- powerDef power_routing_to_bank;
- powerDef power_addr_input_htree;
- powerDef power_data_input_htree;
- powerDef power_data_output_htree;
- powerDef power_htree_in_search;
- powerDef power_htree_out_search;
- powerDef power_row_predecoder_drivers;
- powerDef power_row_predecoder_blocks;
- powerDef power_row_decoders;
- powerDef power_bit_mux_predecoder_drivers;
- powerDef power_bit_mux_predecoder_blocks;
- powerDef power_bit_mux_decoders;
- powerDef power_senseamp_mux_lev_1_predecoder_drivers;
- powerDef power_senseamp_mux_lev_1_predecoder_blocks;
- powerDef power_senseamp_mux_lev_1_decoders;
- powerDef power_senseamp_mux_lev_2_predecoder_drivers;
- powerDef power_senseamp_mux_lev_2_predecoder_blocks;
- powerDef power_senseamp_mux_lev_2_decoders;
- powerDef power_bitlines;
- powerDef power_sense_amps;
- powerDef power_prechg_eq_drivers;
- powerDef power_output_drivers_at_subarray;
- powerDef power_dataout_vertical_htree;
- powerDef power_comparators;
-
- powerDef power_cam_bitline_precharge_eq_drv;
- powerDef power_searchline;
- powerDef power_searchline_precharge;
- powerDef power_matchlines;
- powerDef power_matchline_precharge;
- powerDef power_matchline_to_wordline_drv;
-
- min_values_t *arr_min;
- enum Wire_type wt;
-
- // dram stats
- double activate_energy, read_energy, write_energy, precharge_energy,
- refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
- leak_power_request_and_reply_networks;
-
- double precharge_delay;
-
- static bool lt(const mem_array * m1, const mem_array * m2);
+class mem_array {
+public:
+ int Ndcm;
+ int Ndwl;
+ int Ndbl;
+ double Nspd;
+ int deg_bl_muxing;
+ int Ndsam_lev_1;
+ int Ndsam_lev_2;
+ double access_time;
+ double cycle_time;
+ double multisubbank_interleave_cycle_time;
+ double area_ram_cells;
+ double area;
+ powerDef power;
+ double delay_senseamp_mux_decoder;
+ double delay_before_subarray_output_driver;
+ double delay_from_subarray_output_driver_to_output;
+ double height;
+ double width;
+
+ double mat_height;
+ double mat_length;
+ double subarray_length;
+ double subarray_height;
+
+ double delay_route_to_bank,
+ delay_input_htree,
+ delay_row_predecode_driver_and_block,
+ delay_row_decoder,
+ delay_bitlines,
+ delay_sense_amp,
+ delay_subarray_output_driver,
+ delay_dout_htree,
+ delay_comparator,
+ delay_matchlines;
+
+ double all_banks_height,
+ all_banks_width,
+ area_efficiency;
+
+ powerDef power_routing_to_bank;
+ powerDef power_addr_input_htree;
+ powerDef power_data_input_htree;
+ powerDef power_data_output_htree;
+ powerDef power_htree_in_search;
+ powerDef power_htree_out_search;
+ powerDef power_row_predecoder_drivers;
+ powerDef power_row_predecoder_blocks;
+ powerDef power_row_decoders;
+ powerDef power_bit_mux_predecoder_drivers;
+ powerDef power_bit_mux_predecoder_blocks;
+ powerDef power_bit_mux_decoders;
+ powerDef power_senseamp_mux_lev_1_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_1_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_1_decoders;
+ powerDef power_senseamp_mux_lev_2_predecoder_drivers;
+ powerDef power_senseamp_mux_lev_2_predecoder_blocks;
+ powerDef power_senseamp_mux_lev_2_decoders;
+ powerDef power_bitlines;
+ powerDef power_sense_amps;
+ powerDef power_prechg_eq_drivers;
+ powerDef power_output_drivers_at_subarray;
+ powerDef power_dataout_vertical_htree;
+ powerDef power_comparators;
+
+ powerDef power_cam_bitline_precharge_eq_drv;
+ powerDef power_searchline;
+ powerDef power_searchline_precharge;
+ powerDef power_matchlines;
+ powerDef power_matchline_precharge;
+ powerDef power_matchline_to_wordline_drv;
+
+ min_values_t *arr_min;
+ enum Wire_type wt;
+
+ // dram stats
+ double activate_energy, read_energy, write_energy, precharge_energy,
+ refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
+ leak_power_request_and_reply_networks;
+
+ double precharge_delay;
+
+ static bool lt(const mem_array * m1, const mem_array * m2);
};
diff --git a/ext/mcpat/cacti/component.cc b/ext/mcpat/cacti/component.cc
index 733108407..90e9baedf 100644
--- a/ext/mcpat/cacti/component.cc
+++ b/ext/mcpat/cacti/component.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -45,34 +46,30 @@ using namespace std;
Component::Component()
- :area(), power(), rt_power(),delay(0)
-{
+ : area(), power(), rt_power(), delay(0) {
}
-Component::~Component()
-{
+Component::~Component() {
}
-double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
-{
- double w_poly = g_ip->F_sz_um;
- double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
- double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
- num_stacked_in * w_poly +
- (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
+double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) {
+ double w_poly = g_ip->F_sz_um;
+ double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
+ double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
+ num_stacked_in * w_poly +
+ (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
- if (num_folded_tr > 1)
- {
- total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
- (num_folded_tr - 1) * num_stacked_in * w_poly +
- (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
- }
+ if (num_folded_tr > 1) {
+ total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
+ (num_folded_tr - 1) * num_stacked_in * w_poly +
+ (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
+ }
- return total_diff_w;
+ return total_diff_w;
}
@@ -82,105 +79,96 @@ double Component::compute_gate_area(
int num_inputs,
double w_pmos,
double w_nmos,
- double h_gate)
-{
- if (w_pmos <= 0.0 || w_nmos <= 0.0)
- {
- return 0.0;
- }
-
- double w_folded_pmos, w_folded_nmos;
- int num_folded_pmos, num_folded_nmos;
- double total_ndiff_w, total_pdiff_w;
- Area gate;
-
- double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
- double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
-
- if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
- {
- return 0.0;
- }
-
- w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
- w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
- assert(w_folded_pmos > 0);
-
- num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
- num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
-
- switch (gate_type)
- {
+ double h_gate) {
+ if (w_pmos <= 0.0 || w_nmos <= 0.0) {
+ return 0.0;
+ }
+
+ double w_folded_pmos, w_folded_nmos;
+ int num_folded_pmos, num_folded_nmos;
+ double total_ndiff_w, total_pdiff_w;
+ Area gate;
+
+ double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
+ double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
+
+ if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) {
+ return 0.0;
+ }
+
+ w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
+ w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
+ assert(w_folded_pmos > 0);
+
+ num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
+ num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
+
+ switch (gate_type) {
case INV:
- total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
- total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
- break;
+ total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
+ break;
case NOR:
- total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
- total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
- break;
+ total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
+ break;
case NAND:
- total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
- total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
- break;
+ total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
+ total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
+ break;
default:
- cout << "Unknown gate type: " << gate_type << endl;
- exit(1);
- }
-
- gate.w = MAX(total_ndiff_w, total_pdiff_w);
-
- if (w_folded_nmos > w_nmos)
- {
- //means that the height of the gate can
- //be made smaller than the input height specified, so calculate the height of the gate.
- gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
- }
- else
- {
- gate.h = h_gate;
- }
- return gate.get_area();
+ cout << "Unknown gate type: " << gate_type << endl;
+ exit(1);
+ }
+
+ gate.w = MAX(total_ndiff_w, total_pdiff_w);
+
+ if (w_folded_nmos > w_nmos) {
+ //means that the height of the gate can
+ //be made smaller than the input height specified, so calculate the height of the gate.
+ gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
+ } else {
+ gate.h = h_gate;
+ }
+ return gate.get_area();
}
double Component::compute_tr_width_after_folding(
double input_width,
- double threshold_folding_width)
-{//This is actually the width of the cell not the width of a device.
-//The width of a cell and the width of a device is orthogonal.
- if (input_width <= 0)
- {
- return 0;
- }
-
- int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
- double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
- double width_poly = g_ip->F_sz_um;
- double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
-
- return total_diff_width;
+ double threshold_folding_width) {
+ //This is actually the width of the cell not the width of a device.
+ //The width of a cell and the width of a device is orthogonal.
+ if (input_width <= 0) {
+ return 0;
+ }
+
+ int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
+ double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
+ double width_poly = g_ip->F_sz_um;
+ double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
+
+ return total_diff_width;
}
-double Component::height_sense_amplifier(double pitch_sense_amp)
-{
- // compute the height occupied by all PMOS transistors
- double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
- compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
- 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
+double Component::height_sense_amplifier(double pitch_sense_amp) {
+ // compute the height occupied by all PMOS transistors
+ double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
+ compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
+ 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
- // compute the height occupied by all NMOS transistors
- double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
- compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
- 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
+ // compute the height occupied by all NMOS transistors
+ double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
+ compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
+ 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
- // compute total height by considering gap between the p and n diffusion areas
- return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
+ // compute total height by considering gap between the p and n diffusion areas
+ return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
}
@@ -195,42 +183,39 @@ int Component::logical_effort(
double p_to_n_sz_ratio,
bool is_dram_,
bool is_wl_tr_,
- double max_w_nmos)
-{
- int num_gates = (int) (log(F) / log(fopt));
-
- // check if num_gates is odd. if so, add 1 to make it even
- num_gates+= (num_gates % 2) ? 1 : 0;
- num_gates = MAX(num_gates, num_gates_min);
-
- // recalculate the effective fanout of each stage
- double f = pow(F, 1.0 / num_gates);
- int i = num_gates - 1;
- double C_in = C_load / f;
- w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
- w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
- w_p[i] = p_to_n_sz_ratio * w_n[i];
-
- if (w_n[i] > max_w_nmos)
- {
- double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
- F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
- num_gates = (int) (log(F) / log(fopt)) + 1;
- num_gates+= (num_gates % 2) ? 1 : 0;
+ double max_w_nmos) {
+ int num_gates = (int) (log(F) / log(fopt));
+
+ // check if num_gates is odd. if so, add 1 to make it even
+ num_gates += (num_gates % 2) ? 1 : 0;
num_gates = MAX(num_gates, num_gates_min);
- f = pow(F, 1.0 / (num_gates - 1));
- i = num_gates - 1;
- w_n[i] = max_w_nmos;
- w_p[i] = p_to_n_sz_ratio * w_n[i];
- }
- for (i = num_gates - 2; i >= 1; i--)
- {
- w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
- w_p[i] = p_to_n_sz_ratio * w_n[i];
- }
+ // recalculate the effective fanout of each stage
+ double f = pow(F, 1.0 / num_gates);
+ int i = num_gates - 1;
+ double C_in = C_load / f;
+ w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
+ w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
- assert(num_gates <= MAX_NUMBER_GATES_STAGE);
- return num_gates;
+ if (w_n[i] > max_w_nmos) {
+ double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
+ F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
+ num_gates = (int) (log(F) / log(fopt)) + 1;
+ num_gates += (num_gates % 2) ? 1 : 0;
+ num_gates = MAX(num_gates, num_gates_min);
+ f = pow(F, 1.0 / (num_gates - 1));
+ i = num_gates - 1;
+ w_n[i] = max_w_nmos;
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+ }
+
+ for (i = num_gates - 2; i >= 1; i--) {
+ w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
+ w_p[i] = p_to_n_sz_ratio * w_n[i];
+ }
+
+ assert(num_gates <= MAX_NUMBER_GATES_STAGE);
+ return num_gates;
}
diff --git a/ext/mcpat/cacti/component.h b/ext/mcpat/cacti/component.h
index 75e2cb075..416e4e8e5 100644
--- a/ext/mcpat/cacti/component.h
+++ b/ext/mcpat/cacti/component.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -42,41 +43,32 @@ using namespace std;
class Crossbar;
class Bank;
-class Component
-{
- public:
+class Component {
+public:
Component();
~Component();
Area area;
- powerDef power,rt_power;
+ // TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE
+ // VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS
+ // MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER
+ // CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS
+ powerDef power, rt_power;
double delay;
double cycle_time;
- double compute_gate_area(
- int gate_type,
- int num_inputs,
- double w_pmos,
- double w_nmos,
- double h_gate);
-
- double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
+ double compute_gate_area(int gate_type, int num_inputs, double w_pmos,
+ double w_nmos, double h_gate);
+ double compute_tr_width_after_folding(double input_width,
+ double threshold_folding_width);
double height_sense_amplifier(double pitch_sense_amp);
- protected:
- int logical_effort(
- int num_gates_min,
- double g,
- double F,
- double * w_n,
- double * w_p,
- double C_load,
- double p_to_n_sz_ratio,
- bool is_dram_,
- bool is_wl_tr_,
- double max_w_nmos);
+protected:
+ int logical_effort(int num_gates_min, double g, double F, double * w_n,
+ double * w_p, double C_load, double p_to_n_sz_ratio,
+ bool is_dram_, bool is_wl_tr_, double max_w_nmos);
- private:
+private:
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
};
diff --git a/ext/mcpat/cacti/const.h b/ext/mcpat/cacti/const.h
index aef7d019b..c9b3905bf 100644
--- a/ext/mcpat/cacti/const.h
+++ b/ext/mcpat/cacti/const.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -249,21 +250,20 @@ const double bit_to_byte = 8.0;
// v : vertical or velocity
-enum ram_cell_tech_type_num
-{
- itrs_hp = 0,
- itrs_lstp = 1,
- itrs_lop = 2,
- lp_dram = 3,
- comm_dram = 4
+enum ram_cell_tech_type_num {
+ itrs_hp = 0,
+ itrs_lstp = 1,
+ itrs_lop = 2,
+ lp_dram = 3,
+ comm_dram = 4
};
-const double pppm[4] = {1,1,1,1};
-const double pppm_lkg[4] = {0,1,1,0};
-const double pppm_dyn[4] = {1,0,0,0};
-const double pppm_Isub[4] = {0,1,0,0};
-const double pppm_Ig[4] = {0,0,1,0};
-const double pppm_sc[4] = {0,0,0,1};
+const double pppm[4] = {1, 1, 1, 1};
+const double pppm_lkg[4] = {0, 1, 1, 0};
+const double pppm_dyn[4] = {1, 0, 0, 0};
+const double pppm_Isub[4] = {0, 1, 0, 0};
+const double pppm_Ig[4] = {0, 0, 1, 0};
+const double pppm_sc[4] = {0, 0, 0, 1};
diff --git a/ext/mcpat/cacti/crossbar.cc b/ext/mcpat/cacti/crossbar.cc
index a3d8532d5..ef2a373d6 100644
--- a/ext/mcpat/cacti/crossbar.cc
+++ b/ext/mcpat/cacti/crossbar.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,123 +40,140 @@ Crossbar::Crossbar(
double n_out_,
double flit_size_,
TechnologyParameter::DeviceType *dt
- ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
-{
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- Vdd = dt->Vdd;
- CB_ADJ = 1;
+): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ CB_ADJ = 1;
}
-Crossbar::~Crossbar(){}
+Crossbar::~Crossbar() {}
-double Crossbar::output_buffer()
-{
+double Crossbar::output_buffer() {
- //Wire winit(4, 4);
- double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
- Wire w1(g_ip->wt, l_eff);
- //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
- double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
- TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
- TriS2 = s1; //driver transistor
+ //Wire winit(4, 4);
+ double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
+ Wire w1(g_ip->wt, l_eff);
+ //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
+ double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
+ l_eff * ADJ / w1.repeater_spacing : ADJ);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
+ TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
+ TriS2 = s1; //driver transistor
- if (TriS1 < 1)
- TriS1 = 1;
+ if (TriS1 < 1)
+ TriS1 = 1;
- double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
- gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
+ double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) +
+ gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
// gate_C(TriS2*min_w_pmos, 0);
- tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
- gate_C(TriS2*g_tp.min_w_nmos_, 0)+
- drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
- drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(TriS2*min_w_pmos, 0);
- double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
- double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
-
- tri_inp_cap = input_cap;
- tri_out_cap = output_cap;
- tri_ctr_cap = ctr_cap;
- return input_cap + output_cap + ctr_cap;
+ tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
+ gate_C(TriS2 * g_tp.min_w_nmos_, 0) +
+ drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
+ drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(TriS2 * min_w_pmos, 0);
+ double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
+ g_tp.cell_h_def) +
+ drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
+ double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
+
+ tri_inp_cap = input_cap;
+ tri_out_cap = output_cap;
+ tri_ctr_cap = ctr_cap;
+ return input_cap + output_cap + ctr_cap;
}
-void Crossbar::compute_power()
-{
-
- Wire winit(4, 4);
- double tri_cap = output_buffer();
- assert(tri_cap > 0);
- //area of a tristate logic
- double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
- g_area *= 2; // to model area of output transistors
- g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
- g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
- double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
- // effective no. of tristate buffers that need to be laid side by side
- int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
- double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
- Wire w1(g_ip->wt, wire_len);
-
- area.w = wire_len;
- area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
- Wire w2(g_ip->wt, area.h);
-
- double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
- if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
-
- if (aspect_ratio_cb < ASPECT_THRESHOLD) {
- if (n_out > 2 && n_inp > 2) {
- CB_ADJ+=0.2;
- //cout << "CB ADJ " << CB_ADJ << endl;
- if (CB_ADJ < 4) {
- this->compute_power();
- }
+void Crossbar::compute_power() {
+
+ Wire winit(4, 4);
+ double tri_cap = output_buffer();
+ assert(tri_cap > 0);
+ //area of a tristate logic
+ double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
+ TriS2 * min_w_pmos, g_tp.cell_h_def);
+ g_area *= 2; // to model area of output transistors
+ g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
+ TriS1 * min_w_pmos, g_tp.cell_h_def);
+ g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
+ TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
+ double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
+ // effective no. of tristate buffers that need to be laid side by side
+ int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
+ double wire_len = MAX(width * ntri * n_out,
+ flit_size * g_tp.wire_outside_mat.pitch * n_out);
+ Wire w1(g_ip->wt, wire_len);
+
+ area.w = wire_len;
+ area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ;
+ Wire w2(g_ip->wt, area.h);
+
+ double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp);
+ if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
+
+ if (aspect_ratio_cb < ASPECT_THRESHOLD) {
+ if (n_out > 2 && n_inp > 2) {
+ CB_ADJ += 0.2;
+ //cout << "CB ADJ " << CB_ADJ << endl;
+ if (CB_ADJ < 4) {
+ this->compute_power();
+ }
+ }
}
- }
-
-
-
- power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
- power.readOp.leakage = n_inp * n_out * flit_size * (
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
- w1.power.readOp.leakage + w2.power.readOp.leakage);
- power.readOp.gate_leakage = n_inp * n_out * flit_size * (
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
- w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
-
- // delay calculation
- double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
- Wire wdriver(g_ip->wt, l_eff);
- double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
- double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
- delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
-
- Wire wreset();
+
+
+
+ power.readOp.dynamic =
+ (w1.power.readOp.dynamic + w2.power.readOp.dynamic +
+ (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
+ tri_int_cap) * Vdd * Vdd) * flit_size;
+ power.readOp.leakage = n_inp * n_out * flit_size * (
+ cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
+ 1, inv) * Vdd +
+ cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nand) * Vdd +
+ cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nor) * Vdd +
+ w1.power.readOp.leakage + w2.power.readOp.leakage);
+ power.readOp.gate_leakage = n_inp * n_out * flit_size * (
+ cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
+ 1, inv) * Vdd +
+ cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nand) * Vdd +
+ cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
+ 2, nor) * Vdd +
+ w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
+
+ // delay calculation
+ double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
+ Wire wdriver(g_ip->wt, l_eff);
+ double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
+ tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
+ double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
+ tri_inp_cap + n_inp * tri_out_cap;
+ delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
+ deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
+
+ Wire wreset();
}
-void Crossbar::print_crossbar()
-{
- cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
- cout << "Flit size : " << flit_size << " bits" << endl;
- cout << "Width : " << area.w << " u" << endl;
- cout << "Height : " << area.h << " u" << endl;
- cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
- cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
- cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
- cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
+void Crossbar::print_crossbar() {
+ cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Width : " << area.w << " u" << endl;
+ cout << "Height : " << area.h << " u" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
+ MIN(n_inp, n_out) << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
+ << endl;
+ cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
+ << " (mW)" << endl;
+ cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
}
diff --git a/ext/mcpat/cacti/crossbar.h b/ext/mcpat/cacti/crossbar.h
index 3b926517c..b8de7547b 100644
--- a/ext/mcpat/cacti/crossbar.h
+++ b/ext/mcpat/cacti/crossbar.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -44,14 +45,13 @@
#include "parameter.h"
#include "wire.h"
-class Crossbar : public Component
-{
- public:
+class Crossbar : public Component {
+public:
Crossbar(
- double in,
- double out,
- double flit_sz,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ double in,
+ double out,
+ double flit_sz,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
~Crossbar();
void print_crossbar();
@@ -62,18 +62,18 @@ class Crossbar : public Component
double flit_size;
double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
- private:
- double CB_ADJ;
- /*
- * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
- * buffer is adjusted to get an aspect ratio of whole cross bar close to one;
- * when adjust the ratio, the number of wires route over the tri-state buffers does not change,
- * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
- * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
- * will increase. As a result, the height of the crossbar (area.h) will increase.
- */
-
- TechnologyParameter::DeviceType *deviceType;
+private:
+ double CB_ADJ;
+ /*
+ * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
+ * buffer is adjusted to get an aspect ratio of whole cross bar close to one;
+ * when adjust the ratio, the number of wires route over the tri-state buffers does not change,
+ * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
+ * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
+ * will increase. As a result, the height of the crossbar (area.h) will increase.
+ */
+
+ TechnologyParameter::DeviceType *deviceType;
double TriS1, TriS2;
double min_w_pmos, Vdd;
diff --git a/ext/mcpat/cacti/decoder.cc b/ext/mcpat/cacti/decoder.cc
index 0de6f6157..7fa66b4ff 100644
--- a/ext/mcpat/cacti/decoder.cc
+++ b/ext/mcpat/cacti/decoder.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -51,207 +52,184 @@ Decoder::Decoder(
bool is_dram_,
bool is_wl_tr_,
const Area & cell_)
-:exist(false),
- C_ld_dec_out(_C_ld_dec_out),
- R_wire_dec_out(_R_wire_dec_out),
- num_gates(0), num_gates_min(2),
- delay(0),
- //power(),
- fully_assoc(fully_assoc_), is_dram(is_dram_),
- is_wl_tr(is_wl_tr_), cell(cell_)
-{
-
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- w_dec_n[i] = 0;
- w_dec_p[i] = 0;
- }
-
- /*
- * _num_dec_signals is the number of decoded signal as output
- * num_addr_bits_dec is the number of signal to be decoded
- * as the decoders input.
- */
- int num_addr_bits_dec = _log2(_num_dec_signals);
-
- if (num_addr_bits_dec < 4)
- {
- if (flag_way_select)
- {
- exist = true;
- num_in_signals = 2;
+ : exist(false),
+ C_ld_dec_out(_C_ld_dec_out),
+ R_wire_dec_out(_R_wire_dec_out),
+ num_gates(0), num_gates_min(2),
+ delay(0),
+ //power(),
+ fully_assoc(fully_assoc_), is_dram(is_dram_),
+ is_wl_tr(is_wl_tr_), cell(cell_) {
+
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ w_dec_n[i] = 0;
+ w_dec_p[i] = 0;
}
- else
- {
- num_in_signals = 0;
- }
- }
- else
- {
- exist = true;
- if (flag_way_select)
- {
- num_in_signals = 3;
- }
- else
- {
- num_in_signals = 2;
+ /*
+ * _num_dec_signals is the number of decoded signal as output
+ * num_addr_bits_dec is the number of signal to be decoded
+ * as the decoders input.
+ */
+ int num_addr_bits_dec = _log2(_num_dec_signals);
+
+ if (num_addr_bits_dec < 4) {
+ if (flag_way_select) {
+ exist = true;
+ num_in_signals = 2;
+ } else {
+ num_in_signals = 0;
+ }
+ } else {
+ exist = true;
+
+ if (flag_way_select) {
+ num_in_signals = 3;
+ } else {
+ num_in_signals = 2;
+ }
}
- }
- assert(cell.h>0);
- assert(cell.w>0);
- // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
- //area.h = 4 * cell.h;
- area.h = g_tp.h_dec * cell.h;
+ assert(cell.h > 0);
+ assert(cell.w > 0);
+ // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
+ //area.h = 4 * cell.h;
+ area.h = g_tp.h_dec * cell.h;
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void Decoder::compute_widths()
-{
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
-
- if (exist)
- {
- if (num_in_signals == 2 || fully_assoc)
- {
- w_dec_n[0] = 2 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2;
+void Decoder::compute_widths() {
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+
+ if (exist) {
+ if (num_in_signals == 2 || fully_assoc) {
+ w_dec_n[0] = 2 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2;
+ } else {
+ w_dec_n[0] = 3 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+
+ F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
+ gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
+ num_gates = logical_effort(
+ num_gates_min,
+ num_in_signals == 2 ? gnand2 : gnand3,
+ F,
+ w_dec_n,
+ w_dec_p,
+ C_ld_dec_out,
+ p_to_n_sz_ratio,
+ is_dram,
+ is_wl_tr,
+ g_tp.max_w_nmos_dec);
}
- else
- {
- w_dec_n[0] = 3 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3;
- }
-
- F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
- gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
- num_gates = logical_effort(
- num_gates_min,
- num_in_signals == 2 ? gnand2 : gnand3,
- F,
- w_dec_n,
- w_dec_p,
- C_ld_dec_out,
- p_to_n_sz_ratio,
- is_dram,
- is_wl_tr,
- g_tp.max_w_nmos_dec);
- }
}
-void Decoder::compute_area()
-{
- double cumulative_area = 0;
- double cumulative_curr = 0; // cumulative leakage current
- double cumulative_curr_Ig = 0; // cumulative leakage current
-
- if (exist)
- { // First check if this decoder exists
- if (num_in_signals == 2)
- {
- cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- }
- else if (num_in_signals == 3)
- {
- cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
- }
-
- for (int i = 1; i < num_gates; i++)
- {
- cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
- cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+void Decoder::compute_area() {
+ double cumulative_area = 0;
+ double cumulative_curr = 0; // cumulative leakage current
+ double cumulative_curr_Ig = 0; // cumulative leakage current
+
+ if (exist) { // First check if this decoder exists
+ if (num_in_signals == 2) {
+ cumulative_area =
+ compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr =
+ cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
+ } else if (num_in_signals == 3) {
+ cumulative_area =
+ compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr =
+ cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
+ }
+
+ for (int i = 1; i < num_gates; i++) {
+ cumulative_area +=
+ compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
+ cumulative_curr +=
+ cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ }
+ power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
+
+ area.w = (cumulative_area / area.h);
}
- power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
- power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
-
- area.w = (cumulative_area / area.h);
- }
}
-double Decoder::compute_delays(double inrisetime)
-{
- if (exist)
- {
- double ret_val = 0; // outrisetime
- int i;
- double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
- double Vdd = g_tp.peri_global.Vdd;
+double Decoder::compute_delays(double inrisetime) {
+ if (exist) {
+ double ret_val = 0; // outrisetime
+ int i;
+ double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
+ double Vdd = g_tp.peri_global.Vdd;
- if ((is_wl_tr) && (is_dram))
- {
- Vpp = g_tp.vpp;
- }
- else if (is_wl_tr)
- {
- Vpp = g_tp.sram_cell.Vdd;
- }
- else
- {
- Vpp = g_tp.peri_global.Vdd;
- }
+ if ((is_wl_tr) && (is_dram)) {
+ Vpp = g_tp.vpp;
+ } else if (is_wl_tr) {
+ Vpp = g_tp.sram_cell.Vdd;
+ } else {
+ Vpp = g_tp.peri_global.Vdd;
+ }
- // first check whether a decoder is required at all
- rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
- drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-
- for (i = 1; i < num_gates - 1; ++i)
- {
- rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
- drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ // first check whether a decoder is required at all
+ rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
+ drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ for (i = 1; i < num_gates - 1; ++i) {
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ }
+
+ // add delay of final inverter that drives the wordline
+ i = num_gates - 1;
+ c_load = C_ld_dec_out;
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ ret_val = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
+
+ return ret_val;
+ } else {
+ return 0.0;
}
-
- // add delay of final inverter that drives the wordline
- i = num_gates - 1;
- c_load = C_ld_dec_out;
- rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
- drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- ret_val = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
-
- return ret_val;
- }
- else
- {
- return 0.0;
- }
}
void Decoder::leakage_feedback(double temperature)
@@ -291,610 +269,568 @@ PredecBlk::PredecBlk(
int num_dec_per_predec,
bool is_dram,
bool is_blk1)
- :dec(dec_),
- exist(false),
- number_input_addr_bits(0),
- C_ld_predec_blk_out(0),
- R_wire_predec_blk_out(0),
- branch_effort_nand2_gate_output(1),
- branch_effort_nand3_gate_output(1),
- flag_two_unique_paths(false),
- flag_L2_gate(0),
- number_inputs_L1_gate(0),
- number_gates_L1_nand2_path(0),
- number_gates_L1_nand3_path(0),
- number_gates_L2(0),
- min_number_gates_L1(2),
- min_number_gates_L2(2),
- num_L1_active_nand2_path(0),
- num_L1_active_nand3_path(0),
- delay_nand2_path(0),
- delay_nand3_path(0),
- power_nand2_path(),
- power_nand3_path(),
- power_L2(),
- is_dram_(is_dram)
-{
- int branch_effort_predec_out;
- double C_ld_dec_gate;
- int num_addr_bits_dec = _log2(num_dec_signals);
- int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
- int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
-
- w_L1_nand2_n[0] = 0;
- w_L1_nand2_p[0] = 0;
- w_L1_nand3_n[0] = 0;
- w_L1_nand3_p[0] = 0;
-
- if (is_blk1 == true)
- {
- if (num_addr_bits_dec <= 0)
- {
- return;
+ : dec(dec_),
+ exist(false),
+ number_input_addr_bits(0),
+ C_ld_predec_blk_out(0),
+ R_wire_predec_blk_out(0),
+ branch_effort_nand2_gate_output(1),
+ branch_effort_nand3_gate_output(1),
+ flag_two_unique_paths(false),
+ flag_L2_gate(0),
+ number_inputs_L1_gate(0),
+ number_gates_L1_nand2_path(0),
+ number_gates_L1_nand3_path(0),
+ number_gates_L2(0),
+ min_number_gates_L1(2),
+ min_number_gates_L2(2),
+ num_L1_active_nand2_path(0),
+ num_L1_active_nand3_path(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ power_L2(),
+ is_dram_(is_dram) {
+ int branch_effort_predec_out;
+ double C_ld_dec_gate;
+ int num_addr_bits_dec = _log2(num_dec_signals);
+ int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
+ int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
+
+ w_L1_nand2_n[0] = 0;
+ w_L1_nand2_p[0] = 0;
+ w_L1_nand3_n[0] = 0;
+ w_L1_nand3_p[0] = 0;
+
+ if (is_blk1 == true) {
+ if (num_addr_bits_dec <= 0) {
+ return;
+ } else if (num_addr_bits_dec < 4) {
+ // Just one predecoder block is required with NAND2 gates. No decoder required.
+ // The first level of predecoding directly drives the decoder output load
+ exist = true;
+ number_input_addr_bits = num_addr_bits_dec;
+ R_wire_predec_blk_out = dec->R_wire_dec_out;
+ C_ld_predec_blk_out = dec->C_ld_dec_out;
+ } else {
+ exist = true;
+ number_input_addr_bits = blk1_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
+ } else {
+ if (num_addr_bits_dec >= 4) {
+ exist = true;
+ number_input_addr_bits = blk2_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
}
- else if (num_addr_bits_dec < 4)
- {
- // Just one predecoder block is required with NAND2 gates. No decoder required.
- // The first level of predecoding directly drives the decoder output load
- exist = true;
- number_input_addr_bits = num_addr_bits_dec;
- R_wire_predec_blk_out = dec->R_wire_dec_out;
- C_ld_predec_blk_out = dec->C_ld_dec_out;
- }
- else
- {
- exist = true;
- number_input_addr_bits = blk1_num_input_addr_bits;
- branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
- C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
- R_wire_predec_blk_out = R_wire_predec_blk_out_;
- C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
- }
- }
- else
- {
- if (num_addr_bits_dec >= 4)
- {
- exist = true;
- number_input_addr_bits = blk2_num_input_addr_bits;
- branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
- C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
- R_wire_predec_blk_out = R_wire_predec_blk_out_;
- C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
- }
- }
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void PredecBlk::compute_widths()
-{
- double F, c_load_nand3_path, c_load_nand2_path;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+void PredecBlk::compute_widths() {
+ double F, c_load_nand3_path, c_load_nand2_path;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- if (exist == false) return;
+ if (exist == false) return;
- switch (number_input_addr_bits)
- {
+ switch (number_input_addr_bits) {
case 1:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 0;
- break;
- case 2:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 0;
- break;
- case 3:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 0;
- break;
- case 4:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 2;
- branch_effort_nand2_gate_output = 4;
- break;
- case 5:
- flag_two_unique_paths = true;
- flag_L2_gate = 2;
- branch_effort_nand2_gate_output = 8;
- branch_effort_nand3_gate_output = 4;
- break;
- case 6:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 2;
- branch_effort_nand3_gate_output = 8;
- break;
- case 7:
- flag_two_unique_paths = true;
- flag_L2_gate = 3;
- branch_effort_nand2_gate_output = 32;
- branch_effort_nand3_gate_output = 16;
- break;
- case 8:
- flag_two_unique_paths = true;
- flag_L2_gate = 3;
- branch_effort_nand2_gate_output = 64;
- branch_effort_nand3_gate_output = 32;
- break;
- case 9:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 3;
- branch_effort_nand3_gate_output = 64;
- break;
- default:
- assert(0);
- break;
- }
-
- // find the number of gates and sizing in second level of predecoder (if there is a second level)
- if (flag_L2_gate)
- {
- if (flag_L2_gate == 2)
- { // 2nd level is a NAND2 gate
- w_L2_n[0] = 2 * g_tp.min_w_nmos_;
- F = gnand2;
- }
- else
- { // 2nd level is a NAND3 gate
- w_L2_n[0] = 3 * g_tp.min_w_nmos_;
- F = gnand3;
- }
- w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- number_gates_L2 = logical_effort(
- min_number_gates_L2,
- flag_L2_gate == 2 ? gnand2 : gnand3,
- F,
- w_L2_n,
- w_L2_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
-
- // Now find the number of gates and widths in first level of predecoder
- if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
- { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
- // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
- // a NAND2 gate is used in the first level of the predecoder
- c_load_nand2_path = branch_effort_nand2_gate_output *
- (gate_C(w_L2_n[0], 0, is_dram_) +
- gate_C(w_L2_p[0], 0, is_dram_));
- w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
- w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2 * c_load_nand2_path /
- (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
- gate_C(w_L1_nand2_p[0], 0, is_dram_));
- number_gates_L1_nand2_path = logical_effort(
- min_number_gates_L1,
- gnand2,
- F,
- w_L1_nand2_n,
- w_L1_nand2_p,
- c_load_nand2_path,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
-
- //Now find widths of gates along path in which first gate is a NAND3
- if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
- { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
- // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
- // a NAND3 gate is used in the first level of the predecoder
- c_load_nand3_path = branch_effort_nand3_gate_output *
- (gate_C(w_L2_n[0], 0, is_dram_) +
- gate_C(w_L2_p[0], 0, is_dram_));
- w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
- w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3 * c_load_nand3_path /
- (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
- gate_C(w_L1_nand3_p[0], 0, is_dram_));
- number_gates_L1_nand3_path = logical_effort(
- min_number_gates_L1,
- gnand3,
- F,
- w_L1_nand3_n,
- w_L1_nand3_p,
- c_load_nand3_path,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- }
- else
- { // find number of gates and widths in first level of predecoder block when there is no second level
- if (number_inputs_L1_gate == 2)
- {
- w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
- w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2*C_ld_predec_blk_out /
- (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
- gate_C(w_L1_nand2_p[0], 0, is_dram_));
- number_gates_L1_nand2_path = logical_effort(
- min_number_gates_L1,
- gnand2,
- F,
- w_L1_nand2_n,
- w_L1_nand2_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- else if (number_inputs_L1_gate == 3)
- {
- w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
- w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3*C_ld_predec_blk_out /
- (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
- gate_C(w_L1_nand3_p[0], 0, is_dram_));
- number_gates_L1_nand3_path = logical_effort(
- min_number_gates_L1,
- gnand3,
- F,
- w_L1_nand3_n,
- w_L1_nand3_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- }
-}
-
-
-
-void PredecBlk::compute_area()
-{
- if (exist)
- { // First check whether a predecoder block is needed
- int num_L1_nand2 = 0;
- int num_L1_nand3 = 0;
- int num_L2 = 0;
- double tot_area_L1_nand3 =0;
- double leak_L1_nand3 =0;
- double gate_leak_L1_nand3 =0;
-
- double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
- double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
- double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
- if (number_inputs_L1_gate != 3) {
- tot_area_L1_nand3 = 0;
- leak_L1_nand3 = 0;
- gate_leak_L1_nand3 =0;
- }
- else {
- tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
- leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
- gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
- }
-
- switch (number_input_addr_bits)
- {
- case 1: //2 NAND2 gates
- num_L1_nand2 = 2;
- num_L2 = 0;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =0;
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
break;
- case 2: //4 NAND2 gates
- num_L1_nand2 = 4;
- num_L2 = 0;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =0;
+ case 2:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
break;
- case 3: //8 NAND3 gates
- num_L1_nand3 = 8;
- num_L2 = 0;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =1;
+ case 3:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 0;
break;
- case 4: //4 + 4 NAND2 gates
- num_L1_nand2 = 8;
- num_L2 = 16;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =0;
+ case 4:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 4;
break;
- case 5: //4 NAND2 gates, 8 NAND3 gates
- num_L1_nand2 = 4;
- num_L1_nand3 = 8;
- num_L2 = 32;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =1;
+ case 5:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 8;
+ branch_effort_nand3_gate_output = 4;
break;
- case 6: //8 + 8 NAND3 gates
- num_L1_nand3 = 16;
- num_L2 = 64;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =2;
+ case 6:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 2;
+ branch_effort_nand3_gate_output = 8;
break;
- case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
- num_L1_nand2 = 8;
- num_L1_nand3 = 8;
- num_L2 = 128;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =1;
+ case 7:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 32;
+ branch_effort_nand3_gate_output = 16;
break;
- case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
- num_L1_nand2 = 4;
- num_L1_nand3 = 16;
- num_L2 = 256;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =2;
+ case 8:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 64;
+ branch_effort_nand3_gate_output = 32;
break;
- case 9: //8 + 8 + 8 NAND3 gates
- num_L1_nand3 = 24;
- num_L2 = 512;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =3;
+ case 9:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 3;
+ branch_effort_nand3_gate_output = 64;
break;
- default:
+ default:
+ assert(0);
break;
}
- for (int i = 1; i < number_gates_L1_nand2_path; ++i)
- {
- tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
- leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
- gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ // find the number of gates and sizing in second level of predecoder (if there is a second level)
+ if (flag_L2_gate) {
+ if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
+ w_L2_n[0] = 2 * g_tp.min_w_nmos_;
+ F = gnand2;
+ } else { // 2nd level is a NAND3 gate
+ w_L2_n[0] = 3 * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+ w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
+ number_gates_L2 = logical_effort(
+ min_number_gates_L2,
+ flag_L2_gate == 2 ? gnand2 : gnand3,
+ F,
+ w_L2_n,
+ w_L2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+
+ // Now find the number of gates and widths in first level of predecoder
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
+ // Whenever flag_two_unique_paths is true, it means first level of
+ // decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
+ // it means
+ // a NAND2 gate is used in the first level of the predecoder
+ c_load_nand2_path = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * c_load_nand2_path /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ c_load_nand2_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+
+ //Now find widths of gates along path in which first gate is a NAND3
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
+ // a NAND3 gate is used in the first level of the predecoder
+ c_load_nand3_path = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * c_load_nand3_path /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ c_load_nand3_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+ } else { // find number of gates and widths in first level of predecoder block when there is no second level
+ if (number_inputs_L1_gate == 2) {
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * C_ld_predec_blk_out /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ } else if (number_inputs_L1_gate == 3) {
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * C_ld_predec_blk_out /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
}
- tot_area_L1_nand2 *= num_L1_nand2;
- leak_L1_nand2 *= num_L1_nand2;
- gate_leak_L1_nand2 *= num_L1_nand2;
-
- for (int i = 1; i < number_gates_L1_nand3_path; ++i)
- {
- tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
- leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
- gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
- }
- tot_area_L1_nand3 *= num_L1_nand3;
- leak_L1_nand3 *= num_L1_nand3;
- gate_leak_L1_nand3 *= num_L1_nand3;
+}
- double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
- double cumulative_area_L2 = 0.0;
- double leakage_L2 = 0.0;
- double gate_leakage_L2 = 0.0;
- if (flag_L2_gate == 2)
- {
- cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
- leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
- gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
- }
- else if (flag_L2_gate == 3)
- {
- cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
- leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
- gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
- }
- for (int i = 1; i < number_gates_L2; ++i)
- {
- cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
- leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
- gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+void PredecBlk::compute_area() {
+ if (exist) { // First check whether a predecoder block is needed
+ int num_L1_nand2 = 0;
+ int num_L1_nand3 = 0;
+ int num_L2 = 0;
+ double tot_area_L1_nand3 = 0;
+ double leak_L1_nand3 = 0;
+ double gate_leak_L1_nand3 = 0;
+
+ double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
+ double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ if (number_inputs_L1_gate != 3) {
+ tot_area_L1_nand3 = 0;
+ leak_L1_nand3 = 0;
+ gate_leak_L1_nand3 = 0;
+ } else {
+ tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
+ leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ }
+
+ switch (number_input_addr_bits) {
+ case 1: //2 NAND2 gates
+ num_L1_nand2 = 2;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 2: //4 NAND2 gates
+ num_L1_nand2 = 4;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 3: //8 NAND3 gates
+ num_L1_nand3 = 8;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 4: //4 + 4 NAND2 gates
+ num_L1_nand2 = 8;
+ num_L2 = 16;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 5: //4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 8;
+ num_L2 = 32;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 6: //8 + 8 NAND3 gates
+ num_L1_nand3 = 16;
+ num_L2 = 64;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 2;
+ break;
+ case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 8;
+ num_L1_nand3 = 8;
+ num_L2 = 128;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 16;
+ num_L2 = 256;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 2;
+ break;
+ case 9: //8 + 8 + 8 NAND3 gates
+ num_L1_nand3 = 24;
+ num_L2 = 512;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 3;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
+ tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
+ leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ }
+ tot_area_L1_nand2 *= num_L1_nand2;
+ leak_L1_nand2 *= num_L1_nand2;
+ gate_leak_L1_nand2 *= num_L1_nand2;
+
+ for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
+ tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
+ leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ }
+ tot_area_L1_nand3 *= num_L1_nand3;
+ leak_L1_nand3 *= num_L1_nand3;
+ gate_leak_L1_nand3 *= num_L1_nand3;
+
+ double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
+ double cumulative_area_L2 = 0.0;
+ double leakage_L2 = 0.0;
+ double gate_leakage_L2 = 0.0;
+
+ if (flag_L2_gate == 2) {
+ cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ } else if (flag_L2_gate == 3) {
+ cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ }
+
+ for (int i = 1; i < number_gates_L2; ++i) {
+ cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
+ leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ }
+ cumulative_area_L2 *= num_L2;
+ leakage_L2 *= num_L2;
+ gate_leakage_L2 *= num_L2;
+
+ power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
+ area.set_area(cumulative_area_L1 + cumulative_area_L2);
+ power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
}
- cumulative_area_L2 *= num_L2;
- leakage_L2 *= num_L2;
- gate_leakage_L2 *= num_L2;
-
- power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
- power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
- area.set_area(cumulative_area_L1 + cumulative_area_L2);
- power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
- power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
- }
}
pair<double, double> PredecBlk::compute_delays(
- pair<double, double> inrisetime) // <nand2, nand3>
-{
- pair<double, double> ret_val;
- ret_val.first = 0; // outrisetime_nand2_path
- ret_val.second = 0; // outrisetime_nand3_path
-
- double inrisetime_nand2_path = inrisetime.first;
- double inrisetime_nand3_path = inrisetime.second;
- int i;
- double rd, c_load, c_intrinsic, tf, this_delay;
- double Vdd = g_tp.peri_global.Vdd;
-
- // TODO: following delay calculation part can be greatly simplified.
- // first check whether a predecoder block is required
- if (exist)
- {
- //Find delay in first level of predecoder block
- //First find delay in path
- if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
- {
- //First gate is a NAND2 gate
- rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
- c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
- c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-
- //Add delays of all but the last inverter in the chain
- for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
- {
- rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of the last inverter
- i = number_gates_L1_nand2_path - 1;
- rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
- if (flag_L2_gate)
- {
- c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { //First level directly drives decoder output load
- c_load = C_ld_predec_blk_out;
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- }
+ pair<double, double> inrisetime) { // <nand2, nand3>
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
- if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
- { //Check if the number of gates in the first level is more than 1.
- //First gate is a NAND3 gate
- rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
- c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
- c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
-
- //Add delays of all but the last inverter in the chain
- for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
- {
- rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of the last inverter
- i = number_gates_L1_nand3_path - 1;
- rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
- if (flag_L2_gate)
- {
- c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { //First level directly drives decoder output load
- c_load = C_ld_predec_blk_out;
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- }
+ double inrisetime_nand2_path = inrisetime.first;
+ double inrisetime_nand3_path = inrisetime.second;
+ int i;
+ double rd, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
- // Find delay through second level
- if (flag_L2_gate)
- {
- if (flag_L2_gate == 2)
- {
- rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
- c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
- c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { // flag_L2_gate = 3
- rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
- c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
- c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- for (i = 1; i < number_gates_L2 - 1; ++i)
- {
- rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of final inverter that drives the wordline decoders
- i = number_gates_L2 - 1;
- c_load = C_ld_predec_blk_out;
- rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ // TODO: following delay calculation part can be greatly simplified.
+ // first check whether a predecoder block is required
+ if (exist) {
+ //Find delay in first level of predecoder block
+ //First find delay in path
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
+ //First gate is a NAND2 gate
+ rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand2_path - 1;
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate) {
+ c_load = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
+ //Check if the number of gates in the first level is more than 1.
+ //First gate is a NAND3 gate
+ rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand3_path - 1;
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate) {
+ c_load = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
+ is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ // Find delay through second level
+ if (flag_L2_gate) {
+ if (flag_L2_gate == 2) {
+ rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { // flag_L2_gate = 3
+ rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ for (i = 1; i < number_gates_L2 - 1; ++i) {
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of final inverter that drives the wordline decoders
+ i = number_gates_L2 - 1;
+ c_load = C_ld_predec_blk_out;
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
}
- }
- delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
- return ret_val;
+ delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
+ return ret_val;
}
void PredecBlk::leakage_feedback(double temperature)
@@ -1033,302 +969,287 @@ PredecBlkDrv::PredecBlkDrv(
int way_select_,
PredecBlk * blk_,
bool is_dram)
- :flag_driver_exists(0),
- number_gates_nand2_path(0),
- number_gates_nand3_path(0),
- min_number_gates(2),
- num_buffers_driving_1_nand2_load(0),
- num_buffers_driving_2_nand2_load(0),
- num_buffers_driving_4_nand2_load(0),
- num_buffers_driving_2_nand3_load(0),
- num_buffers_driving_8_nand3_load(0),
- num_buffers_nand3_path(0),
- c_load_nand2_path_out(0),
- c_load_nand3_path_out(0),
- r_load_nand2_path_out(0),
- r_load_nand3_path_out(0),
- delay_nand2_path(0),
- delay_nand3_path(0),
- power_nand2_path(),
- power_nand3_path(),
- blk(blk_), dec(blk->dec),
- is_dram_(is_dram),
- way_select(way_select_)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_nand2_path_n[i] = 0;
- width_nand2_path_p[i] = 0;
- width_nand3_path_n[i] = 0;
- width_nand3_path_p[i] = 0;
- }
-
- number_input_addr_bits = blk->number_input_addr_bits;
-
- if (way_select > 1)
- {
- flag_driver_exists = 1;
- number_input_addr_bits = way_select;
- if (dec->num_in_signals == 2)
- {
- c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
- num_buffers_driving_2_nand2_load = number_input_addr_bits;
- }
- else if (dec->num_in_signals == 3)
- {
- c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
- num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ : flag_driver_exists(0),
+ number_gates_nand2_path(0),
+ number_gates_nand3_path(0),
+ min_number_gates(2),
+ num_buffers_driving_1_nand2_load(0),
+ num_buffers_driving_2_nand2_load(0),
+ num_buffers_driving_4_nand2_load(0),
+ num_buffers_driving_2_nand3_load(0),
+ num_buffers_driving_8_nand3_load(0),
+ num_buffers_nand3_path(0),
+ c_load_nand2_path_out(0),
+ c_load_nand3_path_out(0),
+ r_load_nand2_path_out(0),
+ r_load_nand3_path_out(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ blk(blk_), dec(blk->dec),
+ is_dram_(is_dram),
+ way_select(way_select_) {
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ width_nand2_path_n[i] = 0;
+ width_nand2_path_p[i] = 0;
+ width_nand3_path_n[i] = 0;
+ width_nand3_path_p[i] = 0;
}
- }
- else if (way_select == 0)
- {
- if (blk->exist)
- {
- flag_driver_exists = 1;
+
+ number_input_addr_bits = blk->number_input_addr_bits;
+
+ if (way_select > 1) {
+ flag_driver_exists = 1;
+ number_input_addr_bits = way_select;
+ if (dec->num_in_signals == 2) {
+ c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand2_load = number_input_addr_bits;
+ } else if (dec->num_in_signals == 3) {
+ c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ }
+ } else if (way_select == 0) {
+ if (blk->exist) {
+ flag_driver_exists = 1;
+ }
}
- }
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void PredecBlkDrv::compute_widths()
-{
- // The predecode block driver accepts as input the address bits from the h-tree network. For
- // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
- // inversion to generate addrbar and simply treat addrbar as addr.
-
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
-
- if (flag_driver_exists)
- {
- double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
- double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
-
- if (way_select == 0)
- {
- if (blk->number_input_addr_bits == 1)
- { //2 NAND2 gates
- num_buffers_driving_2_nand2_load = 1;
- c_load_nand2_path_out = 2 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 2)
- { //4 NAND2 gates one 2-4 decoder
- num_buffers_driving_4_nand2_load = 2;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 3)
- { //8 NAND3 gates one 3-8 decoder
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 4)
- { //4 + 4 NAND2 gates two 2-4 decoder
- num_buffers_driving_4_nand2_load = 4;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 5)
- { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
- num_buffers_driving_4_nand2_load = 2;
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 6)
- { //8 + 8 NAND3 gates two 3-8 decoder
- num_buffers_driving_8_nand3_load = 6;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 7)
- { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
- num_buffers_driving_4_nand2_load = 4;
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 8)
- { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
- num_buffers_driving_4_nand2_load = 2;
- num_buffers_driving_8_nand3_load = 6;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 9)
- { //8 + 8 + 8 NAND3 gates three 3-8 decoder
- num_buffers_driving_8_nand3_load = 9;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- }
-
- if ((blk->flag_two_unique_paths) ||
- (blk->number_inputs_L1_gate == 2) ||
- (number_input_addr_bits == 0) ||
- ((way_select)&&(dec->num_in_signals == 2)))
- { //this means that way_select is driving NAND2 in decoder.
- width_nand2_path_n[0] = g_tp.min_w_nmos_;
- width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
- F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
- number_gates_nand2_path = logical_effort(
- min_number_gates,
- 1,
- F,
- width_nand2_path_n,
- width_nand2_path_p,
- c_load_nand2_path_out,
- p_to_n_sz_ratio,
- is_dram_, false, g_tp.max_w_nmos_);
- }
-
- if ((blk->flag_two_unique_paths) ||
- (blk->number_inputs_L1_gate == 3) ||
- ((way_select)&&(dec->num_in_signals == 3)))
- { //this means that way_select is driving NAND3 in decoder.
- width_nand3_path_n[0] = g_tp.min_w_nmos_;
- width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
- F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
- number_gates_nand3_path = logical_effort(
- min_number_gates,
- 1,
- F,
- width_nand3_path_n,
- width_nand3_path_p,
- c_load_nand3_path_out,
- p_to_n_sz_ratio,
- is_dram_, false, g_tp.max_w_nmos_);
+void PredecBlkDrv::compute_widths() {
+ // The predecode block driver accepts as input the address bits from the h-tree network. For
+ // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
+ // inversion to generate addrbar and simply treat addrbar as addr.
+
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+
+ if (flag_driver_exists) {
+ double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
+ double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
+
+ if (way_select == 0) {
+ if (blk->number_input_addr_bits == 1) {
+ //2 NAND2 gates
+ num_buffers_driving_2_nand2_load = 1;
+ c_load_nand2_path_out = 2 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 2) {
+ //4 NAND2 gates one 2-4 decoder
+ num_buffers_driving_4_nand2_load = 2;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 3) {
+ //8 NAND3 gates one 3-8 decoder
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 4) {
+ //4 + 4 NAND2 gates two 2-4 decoder
+ num_buffers_driving_4_nand2_load = 4;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 5) {
+ //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 6) {
+ //8 + 8 NAND3 gates two 3-8 decoder
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 7) {
+ //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 4;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 8) {
+ //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 9) {
+ //8 + 8 + 8 NAND3 gates three 3-8 decoder
+ num_buffers_driving_8_nand3_load = 9;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 2) ||
+ (number_input_addr_bits == 0) ||
+ ((way_select) && (dec->num_in_signals == 2))) {
+ //this means that way_select is driving NAND2 in decoder.
+ width_nand2_path_n[0] = g_tp.min_w_nmos_;
+ width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
+ F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
+ number_gates_nand2_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand2_path_n,
+ width_nand2_path_p,
+ c_load_nand2_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 3) ||
+ ((way_select) && (dec->num_in_signals == 3))) {
+ //this means that way_select is driving NAND3 in decoder.
+ width_nand3_path_n[0] = g_tp.min_w_nmos_;
+ width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
+ F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
+ number_gates_nand3_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand3_path_n,
+ width_nand3_path_p,
+ c_load_nand3_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
}
- }
}
-void PredecBlkDrv::compute_area()
-{
- double area_nand2_path = 0;
- double area_nand3_path = 0;
- double leak_nand2_path = 0;
- double leak_nand3_path = 0;
- double gate_leak_nand2_path = 0;
- double gate_leak_nand3_path = 0;
-
- if (flag_driver_exists)
- { // first check whether a predecoder block driver is needed
- for (int i = 0; i < number_gates_nand2_path; ++i)
- {
- area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
- leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- }
- area_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- leak_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+void PredecBlkDrv::compute_area() {
+ double area_nand2_path = 0;
+ double area_nand3_path = 0;
+ double leak_nand2_path = 0;
+ double leak_nand3_path = 0;
+ double gate_leak_nand2_path = 0;
+ double gate_leak_nand3_path = 0;
+
+ if (flag_driver_exists) {
+ // first check whether a predecoder block driver is needed
+ for (int i = 0; i < number_gates_nand2_path; ++i) {
+ area_nand2_path +=
+ compute_gate_area(INV, 1, width_nand2_path_p[i],
+ width_nand2_path_n[i], g_tp.cell_h_def);
+ leak_nand2_path +=
+ cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
+ 1, inv, is_dram_);
+ gate_leak_nand2_path +=
+ cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
+ 1, inv, is_dram_);
+ }
+ area_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
-
- for (int i = 0; i < number_gates_nand3_path; ++i)
- {
- area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
- leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
- gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+ gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+
+ for (int i = 0; i < number_gates_nand3_path; ++i) {
+ area_nand3_path +=
+ compute_gate_area(INV, 1, width_nand3_path_p[i],
+ width_nand3_path_n[i], g_tp.cell_h_def);
+ leak_nand3_path +=
+ cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
+ 1, inv, is_dram_);
+ gate_leak_nand3_path +=
+ cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
+ 1, inv, is_dram_);
+ }
+ area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+
+ power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
+ power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
+ area.set_area(area_nand2_path + area_nand3_path);
}
- area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
-
- power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
- power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
- area.set_area(area_nand2_path + area_nand3_path);
- }
}
pair<double, double> PredecBlkDrv::compute_delays(
double inrisetime_nand2_path,
- double inrisetime_nand3_path)
-{
- pair<double, double> ret_val;
- ret_val.first = 0; // outrisetime_nand2_path
- ret_val.second = 0; // outrisetime_nand3_path
- int i;
- double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
- double Vdd = g_tp.peri_global.Vdd;
-
- if (flag_driver_exists)
- {
- for (i = 0; i < number_gates_nand2_path - 1; ++i)
- {
- rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
- c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
- }
+ double inrisetime_nand3_path) {
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
+ int i;
+ double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
- // Final inverter drives the predecoder block or the decoder output load
- if (number_gates_nand2_path != 0)
- {
- i = number_gates_nand2_path - 1;
- rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- c_load = c_load_nand2_path_out;
- tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ if (flag_driver_exists) {
+ for (i = 0; i < number_gates_nand2_path - 1; ++i) {
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand2_path != 0) {
+ i = number_gates_nand2_path - 1;
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand2_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
+ }
+
+ for (i = 0; i < number_gates_nand3_path - 1; ++i) {
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand3_path != 0) {
+ i = number_gates_nand3_path - 1;
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand3_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ }
}
-
- for (i = 0; i < number_gates_nand3_path - 1; ++i)
- {
- rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
- c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
- }
-
- // Final inverter drives the predecoder block or the decoder output load
- if (number_gates_nand3_path != 0)
- {
- i = number_gates_nand3_path - 1;
- rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- c_load = c_load_nand3_path_out;
- tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
- }
- }
- return ret_val;
+ return ret_val;
}
-double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
-{
- return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
- num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
+double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
+ return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
+ num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
}
@@ -1336,31 +1257,30 @@ double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
Predec::Predec(
PredecBlkDrv * drv1_,
PredecBlkDrv * drv2_)
-:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
-{
- driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
- drv1->power_nand3_path.readOp.leakage +
- drv2->power_nand2_path.readOp.leakage +
- drv2->power_nand3_path.readOp.leakage;
- block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
- blk1->power_nand3_path.readOp.leakage +
- blk1->power_L2.readOp.leakage +
- blk2->power_nand2_path.readOp.leakage +
- blk2->power_nand3_path.readOp.leakage +
- blk2->power_L2.readOp.leakage;
- power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
-
- driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
- drv1->power_nand3_path.readOp.gate_leakage +
- drv2->power_nand2_path.readOp.gate_leakage +
- drv2->power_nand3_path.readOp.gate_leakage;
- block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
- blk1->power_nand3_path.readOp.gate_leakage +
- blk1->power_L2.readOp.gate_leakage +
- blk2->power_nand2_path.readOp.gate_leakage +
- blk2->power_nand3_path.readOp.gate_leakage +
- blk2->power_L2.readOp.gate_leakage;
- power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
+ : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
+ driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
+ drv1->power_nand3_path.readOp.leakage +
+ drv2->power_nand2_path.readOp.leakage +
+ drv2->power_nand3_path.readOp.leakage;
+ block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
+ blk1->power_nand3_path.readOp.leakage +
+ blk1->power_L2.readOp.leakage +
+ blk2->power_nand2_path.readOp.leakage +
+ blk2->power_nand3_path.readOp.leakage +
+ blk2->power_L2.readOp.leakage;
+ power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
+
+ driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
+ drv1->power_nand3_path.readOp.gate_leakage +
+ drv2->power_nand2_path.readOp.gate_leakage +
+ drv2->power_nand3_path.readOp.gate_leakage;
+ block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
+ blk1->power_nand3_path.readOp.gate_leakage +
+ blk1->power_L2.readOp.gate_leakage +
+ blk2->power_nand2_path.readOp.gate_leakage +
+ blk2->power_nand3_path.readOp.gate_leakage +
+ blk2->power_L2.readOp.gate_leakage;
+ power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
}
void PredecBlkDrv::leakage_feedback(double temperature)
@@ -1399,37 +1319,35 @@ void PredecBlkDrv::leakage_feedback(double temperature)
}
}
-double Predec::compute_delays(double inrisetime)
-{
- // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
- pair<double, double> tmp_pair1, tmp_pair2;
- tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
- tmp_pair1 = blk1->compute_delays(tmp_pair1);
- tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
- tmp_pair2 = blk2->compute_delays(tmp_pair2);
- tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
-
- driver_power.readOp.dynamic =
- drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
- drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
- drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
- drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
-
- block_power.readOp.dynamic =
- blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk1->power_L2.readOp.dynamic +
- blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk2->power_L2.readOp.dynamic;
-
- power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
-
- delay = tmp_pair1.first;
- return tmp_pair1.second;
+double Predec::compute_delays(double inrisetime) {
+ // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
+ pair<double, double> tmp_pair1, tmp_pair2;
+ tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
+ tmp_pair1 = blk1->compute_delays(tmp_pair1);
+ tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
+ tmp_pair2 = blk2->compute_delays(tmp_pair2);
+ tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
+
+ driver_power.readOp.dynamic =
+ drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
+ drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
+ drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
+ drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
+
+ block_power.readOp.dynamic =
+ blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
+ blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
+ blk1->power_L2.readOp.dynamic +
+ blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
+ blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
+ blk2->power_L2.readOp.dynamic;
+
+ power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
+
+ delay = tmp_pair1.first;
+ return tmp_pair1.second;
}
-
void Predec::leakage_feedback(double temperature)
{
drv1->leakage_feedback(temperature);
@@ -1465,113 +1383,116 @@ void Predec::leakage_feedback(double temperature)
// returns <delay, risetime>
pair<double, double> Predec::get_max_delay_before_decoder(
pair<double, double> input_pair1,
- pair<double, double> input_pair2)
-{
- pair<double, double> ret_val;
- double delay;
-
- delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
- ret_val.first = delay;
- ret_val.second = input_pair1.first;
- delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair1.second;
- }
- delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair2.first;
- }
- delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
- if (ret_val.first < delay)
- {
+ pair<double, double> input_pair2) {
+ pair<double, double> ret_val;
+ double delay;
+
+ delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
ret_val.first = delay;
- ret_val.second = input_pair2.second;
- }
+ ret_val.second = input_pair1.first;
+ delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair1.second;
+ }
+ delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.first;
+ }
+ delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.second;
+ }
- return ret_val;
+ return ret_val;
}
-Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
-:number_gates(0),
- min_number_gates(2),
- c_gate_load(c_gate_load_),
- c_wire_load(c_wire_load_),
- r_wire_load(r_wire_load_),
- delay(0),
- power(),
- is_dram_(is_dram)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_n[i] = 0;
- width_p[i] = 0;
- }
+Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
+ bool is_dram)
+ : number_gates(0),
+ min_number_gates(2),
+ c_gate_load(c_gate_load_),
+ c_wire_load(c_wire_load_),
+ r_wire_load(r_wire_load_),
+ delay(0),
+ power(),
+ is_dram_(is_dram) {
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ width_n[i] = 0;
+ width_p[i] = 0;
+ }
- compute_widths();
+ compute_widths();
}
-void Driver::compute_widths()
-{
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double c_load = c_gate_load + c_wire_load;
- width_n[0] = g_tp.min_w_nmos_;
- width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
-
- double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
- number_gates = logical_effort(
- min_number_gates,
- 1,
- F,
- width_n,
- width_p,
- c_load,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
+void Driver::compute_widths() {
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double c_load = c_gate_load + c_wire_load;
+ width_n[0] = g_tp.min_w_nmos_;
+ width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+
+ double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
+ number_gates = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_n,
+ width_p,
+ c_load,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
}
-double Driver::compute_delay(double inrisetime)
-{
- int i;
- double rd, c_load, c_intrinsic, tf;
- double this_delay = 0;
+double Driver::compute_delay(double inrisetime) {
+ int i;
+ double rd, c_load, c_intrinsic, tf;
+ double this_delay = 0;
+
+ for (i = 0; i < number_gates - 1; ++i) {
+ rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd;
+ power.readOp.leakage +=
+ cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ }
- for (i = 0; i < number_gates - 1; ++i)
- {
+ i = number_gates - 1;
+ c_load = c_gate_load + c_wire_load;
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + r_wire_load *
+ (c_wire_load / 2 + c_gate_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
- }
-
- i = number_gates - 1;
- c_load = c_gate_load + c_wire_load;
- rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
-
- return this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd;
+ power.readOp.leakage +=
+ cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+
+ return this_delay / (1.0 - 0.5);
}
diff --git a/ext/mcpat/cacti/decoder.h b/ext/mcpat/cacti/decoder.h
index 35631e84b..a2ddf722c 100644
--- a/ext/mcpat/cacti/decoder.h
+++ b/ext/mcpat/cacti/decoder.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -42,9 +43,8 @@
using namespace std;
-class Decoder : public Component
-{
- public:
+class Decoder : public Component {
+public:
Decoder(
int _num_dec_signals,
bool flag_way_select,
@@ -80,125 +80,120 @@ class Decoder : public Component
-class PredecBlk : public Component
-{
- public:
- PredecBlk(
- int num_dec_signals,
- Decoder * dec,
- double C_wire_predec_blk_out,
- double R_wire_predec_blk_out,
- int num_dec_per_predec,
- bool is_dram_,
- bool is_blk1);
-
- Decoder * dec;
- bool exist;
- int number_input_addr_bits;
- double C_ld_predec_blk_out;
- double R_wire_predec_blk_out;
- int branch_effort_nand2_gate_output;
- int branch_effort_nand3_gate_output;
- bool flag_two_unique_paths;
- int flag_L2_gate;
- int number_inputs_L1_gate;
- int number_gates_L1_nand2_path;
- int number_gates_L1_nand3_path;
- int number_gates_L2;
- int min_number_gates_L1;
- int min_number_gates_L2;
- int num_L1_active_nand2_path;
- int num_L1_active_nand3_path;
- double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
- double w_L2_n[MAX_NUMBER_GATES_STAGE];
- double w_L2_p[MAX_NUMBER_GATES_STAGE];
- double delay_nand2_path;
- double delay_nand3_path;
- powerDef power_nand2_path;
- powerDef power_nand3_path;
- powerDef power_L2;
-
- bool is_dram_;
-
- void compute_widths();
- void compute_area();
-
- void leakage_feedback(double temperature);
-
- pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
- // return <outrise_nand2, outrise_nand3>
+class PredecBlk : public Component {
+public:
+ PredecBlk(
+ int num_dec_signals,
+ Decoder * dec,
+ double C_wire_predec_blk_out,
+ double R_wire_predec_blk_out,
+ int num_dec_per_predec,
+ bool is_dram_,
+ bool is_blk1);
+
+ Decoder * dec;
+ bool exist;
+ int number_input_addr_bits;
+ double C_ld_predec_blk_out;
+ double R_wire_predec_blk_out;
+ int branch_effort_nand2_gate_output;
+ int branch_effort_nand3_gate_output;
+ bool flag_two_unique_paths;
+ int flag_L2_gate;
+ int number_inputs_L1_gate;
+ int number_gates_L1_nand2_path;
+ int number_gates_L1_nand3_path;
+ int number_gates_L2;
+ int min_number_gates_L1;
+ int min_number_gates_L2;
+ int num_L1_active_nand2_path;
+ int num_L1_active_nand3_path;
+ double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
+ double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
+ double w_L2_n[MAX_NUMBER_GATES_STAGE];
+ double w_L2_p[MAX_NUMBER_GATES_STAGE];
+ double delay_nand2_path;
+ double delay_nand3_path;
+ powerDef power_nand2_path;
+ powerDef power_nand3_path;
+ powerDef power_L2;
+
+ bool is_dram_;
+
+ void compute_widths();
+ void compute_area();
+
+ void leakage_feedback(double temperature);
+
+ pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
+ // return <outrise_nand2, outrise_nand3>
};
-class PredecBlkDrv : public Component
-{
- public:
- PredecBlkDrv(
- int way_select,
- PredecBlk * blk_,
- bool is_dram);
-
- int flag_driver_exists;
- int number_input_addr_bits;
- int number_gates_nand2_path;
- int number_gates_nand3_path;
- int min_number_gates;
- int num_buffers_driving_1_nand2_load;
- int num_buffers_driving_2_nand2_load;
- int num_buffers_driving_4_nand2_load;
- int num_buffers_driving_2_nand3_load;
- int num_buffers_driving_8_nand3_load;
- int num_buffers_nand3_path;
- double c_load_nand2_path_out;
- double c_load_nand3_path_out;
- double r_load_nand2_path_out;
- double r_load_nand3_path_out;
- double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
- double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
- double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
- double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
- double delay_nand2_path;
- double delay_nand3_path;
- powerDef power_nand2_path;
- powerDef power_nand3_path;
-
- PredecBlk * blk;
- Decoder * dec;
- bool is_dram_;
- int way_select;
-
- void compute_widths();
- void compute_area();
-
- void leakage_feedback(double temperature);
-
-
- pair<double, double> compute_delays(
- double inrisetime_nand2_path,
- double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
-
- inline int num_addr_bits_nand2_path()
- {
- return num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load;
- }
- inline int num_addr_bits_nand3_path()
- {
- return num_buffers_driving_2_nand3_load +
- num_buffers_driving_8_nand3_load;
- }
- double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
+class PredecBlkDrv : public Component {
+public:
+ PredecBlkDrv(
+ int way_select,
+ PredecBlk * blk_,
+ bool is_dram);
+
+ int flag_driver_exists;
+ int number_input_addr_bits;
+ int number_gates_nand2_path;
+ int number_gates_nand3_path;
+ int min_number_gates;
+ int num_buffers_driving_1_nand2_load;
+ int num_buffers_driving_2_nand2_load;
+ int num_buffers_driving_4_nand2_load;
+ int num_buffers_driving_2_nand3_load;
+ int num_buffers_driving_8_nand3_load;
+ int num_buffers_nand3_path;
+ double c_load_nand2_path_out;
+ double c_load_nand3_path_out;
+ double r_load_nand2_path_out;
+ double r_load_nand3_path_out;
+ double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
+ double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
+ double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
+ double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
+ double delay_nand2_path;
+ double delay_nand3_path;
+ powerDef power_nand2_path;
+ powerDef power_nand3_path;
+
+ PredecBlk * blk;
+ Decoder * dec;
+ bool is_dram_;
+ int way_select;
+
+ void compute_widths();
+ void compute_area();
+
+ void leakage_feedback(double temperature);
+
+
+ pair<double, double> compute_delays(
+ double inrisetime_nand2_path,
+ double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
+
+ inline int num_addr_bits_nand2_path() {
+ return num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load;
+ }
+ inline int num_addr_bits_nand3_path() {
+ return num_buffers_driving_2_nand3_load +
+ num_buffers_driving_8_nand3_load;
+ }
+ double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
};
-class Predec : public Component
-{
- public:
+class Predec : public Component {
+public:
Predec(
PredecBlkDrv * drv1,
PredecBlkDrv * drv2);
@@ -214,7 +209,7 @@ class Predec : public Component
powerDef block_power;
powerDef driver_power;
- private:
+private:
// returns <delay, risetime>
pair<double, double> get_max_delay_before_decoder(
pair<double, double> input_pair1,
@@ -223,24 +218,23 @@ class Predec : public Component
-class Driver : public Component
-{
- public:
- Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
+class Driver : public Component {
+public:
+ Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
- int number_gates;
- int min_number_gates;
- double width_n[MAX_NUMBER_GATES_STAGE];
- double width_p[MAX_NUMBER_GATES_STAGE];
- double c_gate_load;
- double c_wire_load;
- double r_wire_load;
- double delay;
- powerDef power;
- bool is_dram_;
+ int number_gates;
+ int min_number_gates;
+ double width_n[MAX_NUMBER_GATES_STAGE];
+ double width_p[MAX_NUMBER_GATES_STAGE];
+ double c_gate_load;
+ double c_wire_load;
+ double r_wire_load;
+ double delay;
+ powerDef power;
+ bool is_dram_;
- void compute_widths();
- double compute_delay(double inrisetime);
+ void compute_widths();
+ double compute_delay(double inrisetime);
};
diff --git a/ext/mcpat/cacti/htree2.cc b/ext/mcpat/cacti/htree2.cc
index 817ea6a7c..55724c397 100644
--- a/ext/mcpat/cacti/htree2.cc
+++ b/ext/mcpat/cacti/htree2.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,15 +40,17 @@
Htree2::Htree2(
enum Wire_type wire_model, double mat_w, double mat_h,
- int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
+ int a_bits, int d_inbits, int search_data_in, int d_outbits,
+ int search_data_out, int bl, int wl, enum Htree_type htree_type,
bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
- :in_rise_time(0), out_rise_time(0),
- tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
- add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
- search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
- uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
-{
- assert(ndbl >= 2 && ndwl >= 2);
+ : in_rise_time(0), out_rise_time(0),
+ tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
+ add_bits(a_bits), data_in_bits(d_inbits),
+ search_data_in_bits(search_data_in), data_out_bits(d_outbits),
+ search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
+ uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model),
+ deviceType(dt) {
+ assert(ndbl >= 2 && ndwl >= 2);
// if (ndbl == 1 && ndwl == 1)
// {
@@ -61,177 +64,211 @@ Htree2::Htree2(
// if (ndwl == 1) ndwl++;
// if (ndbl == 1) ndbl++;
- max_unpipelined_link_delay = 0; //TODO
- min_w_nmos = g_tp.min_w_nmos_;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
+ max_unpipelined_link_delay = 0; //TODO
+ min_w_nmos = g_tp.min_w_nmos_;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
- switch (htree_type)
- {
+ switch (htree_type) {
case Add_htree:
- wire_bw = init_wire_bw = add_bits;
- in_htree();
- break;
+ wire_bw = init_wire_bw = add_bits;
+ in_htree();
+ break;
case Data_in_htree:
- wire_bw = init_wire_bw = data_in_bits;
- in_htree();
- break;
+ wire_bw = init_wire_bw = data_in_bits;
+ in_htree();
+ break;
case Data_out_htree:
- wire_bw = init_wire_bw = data_out_bits;
- out_htree();
- break;
+ wire_bw = init_wire_bw = data_out_bits;
+ out_htree();
+ break;
case Search_in_htree:
- wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
- in_htree();
- break;
+ wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
+ in_htree();
+ break;
case Search_out_htree:
- wire_bw = init_wire_bw = search_data_out_bits;
- out_htree();
- break;
+ wire_bw = init_wire_bw = search_data_out_bits;
+ out_htree();
+ break;
default:
- assert(0);
- break;
- }
+ assert(0);
+ break;
+ }
- power_bit = power;
- power.readOp.dynamic *= init_wire_bw;
+ power_bit = power;
+ power.readOp.dynamic *= init_wire_bw;
- assert(power.readOp.dynamic >= 0);
- assert(power.readOp.leakage >= 0);
+ assert(power.readOp.dynamic >= 0);
+ assert(power.readOp.leakage >= 0);
}
// nand gate sizing calculation
-void Htree2::input_nand(double s1, double s2, double l_eff)
-{
- Wire w1(wt, l_eff);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // input capacitance of a repeater = input capacitance of nand.
- double nsize = s1*(1 + pton_size)/(2 + pton_size);
- nsize = (nsize < 1) ? 1 : nsize;
-
- double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
- (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
- 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
- delay+= horowitz (w1.out_rise_time, tc,
- deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
- power.readOp.dynamic += 0.5 *
- (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+void Htree2::input_nand(double s1, double s2, double l_eff) {
+ Wire w1(wt, l_eff);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // input capacitance of a repeater = input capacitance of nand.
+ double nsize = s1 * (1 + pton_size) / (2 + pton_size);
+ nsize = (nsize < 1) ? 1 : nsize;
+
+ double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) *
+ (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
+ 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0));
+ delay += horowitz(w1.out_rise_time, tc,
+ deviceType->Vth / deviceType->Vdd, deviceType->Vth /
+ deviceType->Vdd, RISE);
+ power.readOp.dynamic += 0.5 *
+ (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd * wire_bw ;
- power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
- power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
+ (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * wire_bw ;
+ power.readOp.leakage += (wire_bw *
+ cmos_Isub_leakage(min_w_nmos * (nsize * 2),
+ min_w_pmos * nsize * 2, 2,
+ nand)) * deviceType->Vdd;
+ power.readOp.gate_leakage += (wire_bw *
+ cmos_Ig_leakage(min_w_nmos * (nsize * 2),
+ min_w_pmos * nsize * 2, 2,
+ nand)) * deviceType->Vdd;
}
// tristate buffer model consisting of not, nand, nor, and driver transistors
-void Htree2::output_buffer(double s1, double s2, double l_eff)
-{
- Wire w1(wt, l_eff);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // input capacitance of repeater = input capacitance of nand + nor.
- double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
- double s_eff = //stage eff of a repeater in a wire
- (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
- gate_C(s2*(min_w_nmos + min_w_pmos), 0);
- double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
- size = (size < 1) ? 1 : size;
-
- double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
- double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
- double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
- gate_C(tr_size*min_w_pmos, 0);
- double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
- gate_C(s1*(min_w_nmos + min_w_pmos), 0);
-
- double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
-
-
- delay += horowitz (w1.out_rise_time, tc,
- deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
-
- //nand
- power.readOp.dynamic += 0.5 *
- (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(tr_size*(min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+void Htree2::output_buffer(double s1, double s2, double l_eff) {
+ Wire w1(wt, l_eff);
+ double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
+ // input capacitance of repeater = input capacitance of nand + nor.
+ double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
+ double s_eff = //stage eff of a repeater in a wire
+ (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6,
+ true)) /
+ gate_C(s2 * (min_w_nmos + min_w_pmos), 0);
+ double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 /
+ (s_eff * gate_C(min_w_pmos, 0));
+ size = (size < 1) ? 1 : size;
+
+ double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1);
+ double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1);
+ double cap_nand_out =
+ drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
+ gate_C(tr_size * min_w_pmos, 0);
+ double cap_ptrans_out = 2 *
+ (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
+ gate_C(s1 * (min_w_nmos + min_w_pmos), 0);
+
+ double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
+
+
+ delay += horowitz(w1.out_rise_time, tc,
+ deviceType->Vth / deviceType->Vdd, deviceType->Vth /
+ deviceType->Vdd, RISE);
+
+ //nand
+ power.readOp.dynamic += 0.5 *
+ (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(tr_size * (min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(tr_size*(min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //not
- power.readOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+ (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(tr_size * (min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ //not
+ power.readOp.dynamic += 0.5 *
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //nor
- power.readOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ //nor
+ power.readOp.dynamic += 0.5 *
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //output transistor
- power.readOp.dynamic += 0.5 *
- ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
- + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
+ (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
+ + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ //output transistor
+ power.readOp.dynamic += 0.5 *
+ ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
+ + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd;
power.searchOp.dynamic += 0.5 *
- ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
- + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- if(uca_tree) {
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
-
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
- //power.readOp.gate_leakage *=;
- }
- else {
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
-
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
- //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
- }
+ ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
+ + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
+ + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
+ deviceType->Vdd * deviceType->Vdd * init_wire_bw;
+
+ if (uca_tree) {
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
+ 2, 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
+ 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+ } else {
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
+ 2, 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.leakage +=
+ cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
+ 1, inv) *
+ deviceType->Vdd * wire_bw;/*inverter + output tr*/
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nand) * deviceType->Vdd * wire_bw;//nand
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
+ nor) * deviceType->Vdd * wire_bw;//nor
+ }
}
@@ -250,192 +287,200 @@ void Htree2::output_buffer(double s1, double s2, double l_eff)
* hor. links left. After this it goes through the remaining vertical
* links.
*/
- void
-Htree2::in_htree()
-{
- //temp var
- double s1 = 0, s2 = 0, s3 = 0;
- double l_eff = 0;
- Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
- double len = 0, ht = 0;
- int option = 0;
-
- int h = (int) _log2(ndwl/2); // horizontal nodes
- int v = (int) _log2(ndbl/2); // vertical nodes
- double len_temp;
- double ht_temp;
- if (uca_tree)
- {//Sheng: this computation do not consider the wires that route from edge to middle.
- ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,h))))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,v))))/2;
- }
- else
- {
- if (ndwl == ndbl) {
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else if (ndwl > ndbl) {
- double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
- (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
+void
+Htree2::in_htree() {
+ //temp var
+ double s1 = 0, s2 = 0, s3 = 0;
+ double l_eff = 0;
+ Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
+ double len = 0, ht = 0;
+ int option = 0;
+
+ int h = (int) _log2(ndwl / 2); // horizontal nodes
+ int v = (int) _log2(ndbl / 2); // vertical nodes
+ double len_temp;
+ double ht_temp;
+ if (uca_tree) {
+ //Sheng: this computation do not consider the wires that route from
+ //edge to middle.
+ ht_temp = (mat_height * ndbl / 2 +
+ /* since uca_tree models interbank tree,
+ mat_height => bank height */
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, h)))) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, v)))) / 2;
+ } else {
+ if (ndwl == ndbl) {
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) * (ndbl / 2 - 1) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) * (ndwl / 2 - 1) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+ } else if (ndwl > ndbl) {
+ double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits + + (search_data_in_bits +
+ search_data_out_bits)) *
+ ((ndbl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+ } else {
+ double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (h + 2 * (1 - pow(0.5, v - h)))) / 2;
+ }
}
- else {
- double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
+
+ area.h = ht_temp * 2;
+ area.w = len_temp * 2;
+ delay = 0;
+ power.readOp.dynamic = 0;
+ power.readOp.leakage = 0;
+ power.searchOp.dynamic = 0;
+ len = len_temp;
+ ht = ht_temp / 2;
+
+ while (v > 0 || h > 0) {
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+
+ if (h > v) {
+ //the iteration considers only one horizontal link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, len / 2); // ver
+ len_temp = len;
+ len /= 2;
+ wtemp3 = 0;
+ h--;
+ option = 0;
+ } else if (v > 0 && h > 0) {
+ //considers one horizontal link and one vertical link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, ht); // ver
+ wtemp3 = new Wire(wt, len / 2); // next hor
+ len_temp = len;
+ ht_temp = ht;
+ len /= 2;
+ ht /= 2;
+ v--;
+ h--;
+ option = 1;
+ } else {
+ // considers only one vertical link
+ assert(h == 0);
+ wtemp1 = new Wire(wt, ht); // ver
+ wtemp2 = new Wire(wt, ht / 2); // hor
+ ht_temp = ht;
+ ht /= 2;
+ wtemp3 = 0;
+ v--;
+ option = 2;
+ }
+
+ delay += wtemp1->delay;
+ power.readOp.dynamic += wtemp1->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw;
+ power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
+ if ((uca_tree == false && option == 2) || search_tree == true) {
+ wire_bw *= 2; // wire bandwidth doubles only for vertical branches
+ }
+
+ if (uca_tree == false) {
+ if (len_temp > wtemp1->repeater_spacing) {
+ s1 = wtemp1->repeater_size;
+ l_eff = wtemp1->repeater_spacing;
+ } else {
+ s1 = (len_temp / wtemp1->repeater_spacing) *
+ wtemp1->repeater_size;
+ l_eff = len_temp;
+ }
+
+ if (ht_temp > wtemp2->repeater_spacing) {
+ s2 = wtemp2->repeater_size;
+ } else {
+ s2 = (len_temp / wtemp2->repeater_spacing) *
+ wtemp2->repeater_size;
+ }
+ // first level
+ input_nand(s1, s2, l_eff);
+ }
+
+
+ if (option != 1) {
+ continue;
+ }
+
+ // second level
+ delay += wtemp2->delay;
+ power.readOp.dynamic += wtemp2->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw;
+ power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
+
+ if (uca_tree) {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ } else {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ wire_bw *= 2;
+
+ if (ht_temp > wtemp3->repeater_spacing) {
+ s3 = wtemp3->repeater_size;
+ l_eff = wtemp3->repeater_spacing;
+ } else {
+ s3 = (len_temp / wtemp3->repeater_spacing) *
+ wtemp3->repeater_size;
+ l_eff = ht_temp;
+ }
+
+ input_nand(s2, s3, l_eff);
+ }
}
- }
-
- area.h = ht_temp * 2;
- area.w = len_temp * 2;
- delay = 0;
- power.readOp.dynamic = 0;
- power.readOp.leakage = 0;
- power.searchOp.dynamic =0;
- len = len_temp;
- ht = ht_temp/2;
-
- while (v > 0 || h > 0)
- {
+
if (wtemp1) delete wtemp1;
if (wtemp2) delete wtemp2;
if (wtemp3) delete wtemp3;
-
- if (h > v)
- {
- //the iteration considers only one horizontal link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, len/2); // ver
- len_temp = len;
- len /= 2;
- wtemp3 = 0;
- h--;
- option = 0;
- }
- else if (v>0 && h>0)
- {
- //considers one horizontal link and one vertical link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, ht); // ver
- wtemp3 = new Wire(wt, len/2); // next hor
- len_temp = len;
- ht_temp = ht;
- len /= 2;
- ht /= 2;
- v--;
- h--;
- option = 1;
- }
- else
- {
- // considers only one vertical link
- assert(h == 0);
- wtemp1 = new Wire(wt, ht); // ver
- wtemp2 = new Wire(wt, ht/2); // hor
- ht_temp = ht;
- ht /= 2;
- wtemp3 = 0;
- v--;
- option = 2;
- }
-
- delay += wtemp1->delay;
- power.readOp.dynamic += wtemp1->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
- power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
- if ((uca_tree == false && option == 2) || search_tree==true)
- {
- wire_bw*=2; // wire bandwidth doubles only for vertical branches
- }
-
- if (uca_tree == false)
- {
- if (len_temp > wtemp1->repeater_spacing)
- {
- s1 = wtemp1->repeater_size;
- l_eff = wtemp1->repeater_spacing;
- }
- else
- {
- s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
- l_eff = len_temp;
- }
-
- if (ht_temp > wtemp2->repeater_spacing)
- {
- s2 = wtemp2->repeater_size;
- }
- else
- {
- s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
- }
- // first level
- input_nand(s1, s2, l_eff);
- }
-
-
- if (option != 1)
- {
- continue;
- }
-
- // second level
- delay += wtemp2->delay;
- power.readOp.dynamic += wtemp2->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
- power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
-
- if (uca_tree)
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- }
- else
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- wire_bw*=2;
-
- if (ht_temp > wtemp3->repeater_spacing)
- {
- s3 = wtemp3->repeater_size;
- l_eff = wtemp3->repeater_spacing;
- }
- else
- {
- s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
- l_eff = ht_temp;
- }
-
- input_nand(s2, s3, l_eff);
- }
- }
-
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
}
@@ -452,190 +497,198 @@ Htree2::in_htree()
* hor. links left. After this it goes through the remaining vertical
* links.
*/
-void Htree2::out_htree()
-{
- //temp var
- double s1 = 0, s2 = 0, s3 = 0;
- double l_eff = 0;
- Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
- double len = 0, ht = 0;
- int option = 0;
-
- int h = (int) _log2(ndwl/2);
- int v = (int) _log2(ndbl/2);
- double len_temp;
- double ht_temp;
- if (uca_tree)
- {
- ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,h))))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,v))))/2;
- }
- else
- {
- if (ndwl == ndbl) {
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
-
+void Htree2::out_htree() {
+ //temp var
+ double s1 = 0, s2 = 0, s3 = 0;
+ double l_eff = 0;
+ Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
+ double len = 0, ht = 0;
+ int option = 0;
+
+ int h = (int) _log2(ndwl / 2);
+ int v = (int) _log2(ndbl / 2);
+ double len_temp;
+ double ht_temp;
+ if (uca_tree) {
+ ht_temp = (mat_height * ndbl / 2 +
+ /* since uca_tree models interbank tree,
+ mat_height => bank height */
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, h)))) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + data_in_bits + data_out_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ g_tp.wire_outside_mat.pitch *
+ 2 * (1 - pow(0.5, v)))) / 2;
+ } else {
+ if (ndwl == ndbl) {
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) *
+ (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) * (ndwl / 2 - 1) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+
+ } else if (ndwl > ndbl) {
+ double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndbl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * v)) / 2;
+ } else {
+ double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
+ ht_temp = ((mat_height * ndbl / 2) +
+ ((add_bits +
+ (search_data_in_bits + search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ ((data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch * h)
+ ) / 2;
+ len_temp = (mat_width * ndwl / 2 +
+ ((add_bits + (search_data_in_bits +
+ search_data_out_bits)) *
+ ((ndwl / 2 - 1) + excess_part) *
+ g_tp.wire_outside_mat.pitch) +
+ (data_in_bits + data_out_bits) *
+ g_tp.wire_outside_mat.pitch *
+ (h + 2 * (1 - pow(0.5, v - h)))) / 2;
+ }
}
- else if (ndwl > ndbl) {
- double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
- (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else {
- double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
+ area.h = ht_temp * 2;
+ area.w = len_temp * 2;
+ delay = 0;
+ power.readOp.dynamic = 0;
+ power.readOp.leakage = 0;
+ power.readOp.gate_leakage = 0;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ len = len_temp;
+ ht = ht_temp / 2;
+
+ while (v > 0 || h > 0) { //finds delay/power of each link in the tree
+ if (wtemp1) delete wtemp1;
+ if (wtemp2) delete wtemp2;
+ if (wtemp3) delete wtemp3;
+
+ if (h > v) {
+ //the iteration considers only one horizontal link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, len / 2); // ver
+ len_temp = len;
+ len /= 2;
+ wtemp3 = 0;
+ h--;
+ option = 0;
+ } else if (v > 0 && h > 0) {
+ //considers one horizontal link and one vertical link
+ wtemp1 = new Wire(wt, len); // hor
+ wtemp2 = new Wire(wt, ht); // ver
+ wtemp3 = new Wire(wt, len / 2); // next hor
+ len_temp = len;
+ ht_temp = ht;
+ len /= 2;
+ ht /= 2;
+ v--;
+ h--;
+ option = 1;
+ } else {
+ // considers only one vertical link
+ assert(h == 0);
+ wtemp1 = new Wire(wt, ht); // hor
+ wtemp2 = new Wire(wt, ht / 2); // ver
+ ht_temp = ht;
+ ht /= 2;
+ wtemp3 = 0;
+ v--;
+ option = 2;
+ }
+ delay += wtemp1->delay;
+ power.readOp.dynamic += wtemp1->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw;
+ power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
+ if ((uca_tree == false && option == 2) || search_tree == true) {
+ wire_bw *= 2;
+ }
+
+ if (uca_tree == false) {
+ if (len_temp > wtemp1->repeater_spacing) {
+ s1 = wtemp1->repeater_size;
+ l_eff = wtemp1->repeater_spacing;
+ } else {
+ s1 = (len_temp / wtemp1->repeater_spacing) *
+ wtemp1->repeater_size;
+ l_eff = len_temp;
+ }
+ if (ht_temp > wtemp2->repeater_spacing) {
+ s2 = wtemp2->repeater_size;
+ } else {
+ s2 = (len_temp / wtemp2->repeater_spacing) *
+ wtemp2->repeater_size;
+ }
+ // first level
+ output_buffer(s1, s2, l_eff);
+ }
+
+
+ if (option != 1) {
+ continue;
+ }
+
+ // second level
+ delay += wtemp2->delay;
+ power.readOp.dynamic += wtemp2->power.readOp.dynamic;
+ power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw;
+ power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
+ power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ if (uca_tree) {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ } else {
+ power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
+ power.readOp.gate_leakage +=
+ wtemp2->power.readOp.gate_leakage * wire_bw;
+ wire_bw *= 2;
+
+ if (ht_temp > wtemp3->repeater_spacing) {
+ s3 = wtemp3->repeater_size;
+ l_eff = wtemp3->repeater_spacing;
+ } else {
+ s3 = (len_temp / wtemp3->repeater_spacing) *
+ wtemp3->repeater_size;
+ l_eff = ht_temp;
+ }
+
+ output_buffer(s2, s3, l_eff);
+ }
+ //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
+ //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
+ //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
}
- }
- area.h = ht_temp * 2;
- area.w = len_temp * 2;
- delay = 0;
- power.readOp.dynamic = 0;
- power.readOp.leakage = 0;
- power.readOp.gate_leakage = 0;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- len = len_temp;
- ht = ht_temp/2;
-
- while (v > 0 || h > 0)
- { //finds delay/power of each link in the tree
+
if (wtemp1) delete wtemp1;
if (wtemp2) delete wtemp2;
if (wtemp3) delete wtemp3;
-
- if(h > v) {
- //the iteration considers only one horizontal link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, len/2); // ver
- len_temp = len;
- len /= 2;
- wtemp3 = 0;
- h--;
- option = 0;
- }
- else if (v>0 && h>0) {
- //considers one horizontal link and one vertical link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, ht); // ver
- wtemp3 = new Wire(wt, len/2); // next hor
- len_temp = len;
- ht_temp = ht;
- len /= 2;
- ht /= 2;
- v--;
- h--;
- option = 1;
- }
- else {
- // considers only one vertical link
- assert(h == 0);
- wtemp1 = new Wire(wt, ht); // hor
- wtemp2 = new Wire(wt, ht/2); // ver
- ht_temp = ht;
- ht /= 2;
- wtemp3 = 0;
- v--;
- option = 2;
- }
- delay += wtemp1->delay;
- power.readOp.dynamic += wtemp1->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
- power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- if ((uca_tree == false && option == 2) || search_tree==true)
- {
- wire_bw*=2;
- }
-
- if (uca_tree == false)
- {
- if (len_temp > wtemp1->repeater_spacing)
- {
- s1 = wtemp1->repeater_size;
- l_eff = wtemp1->repeater_spacing;
- }
- else
- {
- s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
- l_eff = len_temp;
- }
- if (ht_temp > wtemp2->repeater_spacing)
- {
- s2 = wtemp2->repeater_size;
- }
- else
- {
- s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
- }
- // first level
- output_buffer(s1, s2, l_eff);
- }
-
-
- if (option != 1)
- {
- continue;
- }
-
- // second level
- delay += wtemp2->delay;
- power.readOp.dynamic += wtemp2->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
- power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- if (uca_tree)
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- }
- else
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- wire_bw*=2;
-
- if (ht_temp > wtemp3->repeater_spacing)
- {
- s3 = wtemp3->repeater_size;
- l_eff = wtemp3->repeater_spacing;
- }
- else
- {
- s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
- l_eff = ht_temp;
- }
-
- output_buffer(s2, s3, l_eff);
- }
- //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
- //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
- //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
- }
-
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
}
diff --git a/ext/mcpat/cacti/htree2.h b/ext/mcpat/cacti/htree2.h
index 053e43a27..cae71c62e 100644
--- a/ext/mcpat/cacti/htree2.h
+++ b/ext/mcpat/cacti/htree2.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -44,13 +45,12 @@
// leakge power includes entire htree in a bank (when uca_tree == false)
// leakge power includes only part to one bank when uca_tree == true
-class Htree2 : public Component
-{
- public:
+class Htree2 : public Component {
+public:
Htree2(enum Wire_type wire_model,
- double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
- enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
+ enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
~Htree2() {};
void in_htree();
@@ -64,16 +64,15 @@ class Htree2 : public Component
double in_rise_time, out_rise_time;
- void set_in_rise_time(double rt)
- {
- in_rise_time = rt;
+ void set_in_rise_time(double rt) {
+ in_rise_time = rt;
}
double max_unpipelined_link_delay;
powerDef power_bit;
- private:
+private:
double wire_bw;
double init_wire_bw; // bus width at root
enum Htree_type tree_type;
@@ -81,7 +80,11 @@ class Htree2 : public Component
double htree_vnodes;
double mat_width;
double mat_height;
- int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
+ int add_bits;
+ int data_in_bits;
+ int search_data_in_bits;
+ int data_out_bits;
+ int search_data_out_bits;
int ndbl, ndwl;
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
bool search_tree;
diff --git a/ext/mcpat/cacti/io.cc b/ext/mcpat/cacti/io.cc
index 56725ab7c..c3035d70f 100644
--- a/ext/mcpat/cacti/io.cc
+++ b/ext/mcpat/cacti/io.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -49,629 +50,578 @@ using namespace std;
/* Parses "cache.cfg" file */
- void
-InputParameter::parse_cfg(const string & in_file)
-{
- FILE *fp = fopen(in_file.c_str(), "r");
- char line[5000];
- char jk[5000];
- char temp_var[5000];
-
- if(!fp) {
- cout << in_file << " is missing!\n";
- exit(-1);
- }
+void
+InputParameter::parse_cfg(const string & in_file) {
+ FILE *fp = fopen(in_file.c_str(), "r");
+ char line[5000];
+ char jk[5000];
+ char temp_var[5000];
+
+ if (!fp) {
+ cout << in_file << " is missing!\n";
+ exit(-1);
+ }
- while(fscanf(fp, "%[^\n]\n", line) != EOF) {
+ while (fscanf(fp, "%[^\n]\n", line) != EOF) {
- if (!strncmp("-size", line, strlen("-size"))) {
- sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
- continue;
- }
+ if (!strncmp("-size", line, strlen("-size"))) {
+ sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
+ continue;
+ }
- if (!strncmp("-page size", line, strlen("-page size"))) {
- sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
- continue;
- }
+ if (!strncmp("-page size", line, strlen("-page size"))) {
+ sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
+ continue;
+ }
- if (!strncmp("-burst length", line, strlen("-burst length"))) {
- sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
- continue;
- }
+ if (!strncmp("-burst length", line, strlen("-burst length"))) {
+ sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
+ continue;
+ }
- if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
- sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
- continue;
- }
+ if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
+ sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
+ continue;
+ }
- if (!strncmp("-block", line, strlen("-block"))) {
- sscanf(line, "-block size (bytes) %d", &(line_sz));
- continue;
- }
+ if (!strncmp("-block", line, strlen("-block"))) {
+ sscanf(line, "-block size (bytes) %d", &(line_sz));
+ continue;
+ }
- if (!strncmp("-associativity", line, strlen("-associativity"))) {
- sscanf(line, "-associativity %d", &(assoc));
- continue;
- }
+ if (!strncmp("-associativity", line, strlen("-associativity"))) {
+ sscanf(line, "-associativity %d", &(assoc));
+ continue;
+ }
- if (!strncmp("-read-write", line, strlen("-read-write"))) {
- sscanf(line, "-read-write port %d", &(num_rw_ports));
- continue;
- }
+ if (!strncmp("-read-write", line, strlen("-read-write"))) {
+ sscanf(line, "-read-write port %d", &(num_rw_ports));
+ continue;
+ }
- if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
- sscanf(line, "-exclusive read port %d", &(num_rd_ports));
- continue;
- }
+ if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
+ sscanf(line, "-exclusive read port %d", &(num_rd_ports));
+ continue;
+ }
- if(!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
- sscanf(line, "-exclusive write port %d", &(num_wr_ports));
- continue;
- }
+ if (!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
+ sscanf(line, "-exclusive write port %d", &(num_wr_ports));
+ continue;
+ }
- if (!strncmp("-single ended", line, strlen("-single ended"))) {
- sscanf(line, "-single %[(:-~)*]%d", jk,
- &(num_se_rd_ports));
- continue;
- }
+ if (!strncmp("-single ended", line, strlen("-single ended"))) {
+ sscanf(line, "-single %[(:-~)*]%d", jk,
+ &(num_se_rd_ports));
+ continue;
+ }
- if (!strncmp("-search", line, strlen("-search"))) {
- sscanf(line, "-search port %d", &(num_search_ports));
- continue;
- }
+ if (!strncmp("-search", line, strlen("-search"))) {
+ sscanf(line, "-search port %d", &(num_search_ports));
+ continue;
+ }
- if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
- sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
- continue;
- }
+ if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
+ sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
+ continue;
+ }
- if (!strncmp("-technology", line, strlen("-technology"))) {
- sscanf(line, "-technology (u) %lf", &(F_sz_um));
- F_sz_nm = F_sz_um*1000;
- continue;
- }
+ if (!strncmp("-technology", line, strlen("-technology"))) {
+ sscanf(line, "-technology (u) %lf", &(F_sz_um));
+ F_sz_nm = F_sz_um * 1000;
+ continue;
+ }
- if (!strncmp("-output/input", line, strlen("-output/input"))) {
- sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
- continue;
- }
+ if (!strncmp("-output/input", line, strlen("-output/input"))) {
+ sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
+ continue;
+ }
- if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
- sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
- continue;
- }
+ if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
+ sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
+ continue;
+ }
- if (!strncmp("-cache type", line, strlen("-cache type"))) {
- sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("cache", temp_var, sizeof("cache"))) {
- is_cache = true;
- }
- else
- {
- is_cache = false;
- }
-
- if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
- is_main_mem = true;
- }
- else {
- is_main_mem = false;
- }
-
- if (!strncmp("cam", temp_var, sizeof("cam"))) {
- pure_cam = true;
- }
- else {
- pure_cam = false;
- }
-
- if (!strncmp("ram", temp_var, sizeof("ram"))) {
- pure_ram = true;
- }
- else {
- if (!is_main_mem)
- pure_ram = false;
- else
- pure_ram = true;
- }
-
- continue;
- }
+ if (!strncmp("-cache type", line, strlen("-cache type"))) {
+ sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("cache", temp_var, sizeof("cache"))) {
+ is_cache = true;
+ } else {
+ is_cache = false;
+ }
+
+ if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
+ is_main_mem = true;
+ } else {
+ is_main_mem = false;
+ }
+
+ if (!strncmp("cam", temp_var, sizeof("cam"))) {
+ pure_cam = true;
+ } else {
+ pure_cam = false;
+ }
+
+ if (!strncmp("ram", temp_var, sizeof("ram"))) {
+ pure_ram = true;
+ } else {
+ if (!is_main_mem)
+ pure_ram = false;
+ else
+ pure_ram = true;
+ }
+
+ continue;
+ }
- if (!strncmp("-tag size", line, strlen("-tag size"))) {
- sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_tag = false;
- tag_w = 42; /* the acutal value is calculated
+ if (!strncmp("-tag size", line, strlen("-tag size"))) {
+ sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("default", temp_var, sizeof("default"))) {
+ specific_tag = false;
+ tag_w = 42; /* the acutal value is calculated
* later based on the cache size, bank count, and associativity
*/
- }
- else {
- specific_tag = true;
- sscanf(line, "-tag size (b) %d", &(tag_w));
- }
- continue;
- }
+ } else {
+ specific_tag = true;
+ sscanf(line, "-tag size (b) %d", &(tag_w));
+ }
+ continue;
+ }
- if (!strncmp("-access mode", line, strlen("-access mode"))) {
- sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("fast", temp_var, strlen("fast"))) {
- access_mode = 2;
- }
- else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
- access_mode = 1;
- }
- else if(!strncmp("normal", temp_var, strlen("normal"))) {
- access_mode = 0;
- }
- else {
- cout << "ERROR: Invalid access mode!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-access mode", line, strlen("-access mode"))) {
+ sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("fast", temp_var, strlen("fast"))) {
+ access_mode = 2;
+ } else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
+ access_mode = 1;
+ } else if (!strncmp("normal", temp_var, strlen("normal"))) {
+ access_mode = 0;
+ } else {
+ cout << "ERROR: Invalid access mode!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) {
- sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- data_arr_ram_cell_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- data_arr_ram_cell_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- data_arr_ram_cell_tech_type = 2;
- }
- else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
- data_arr_ram_cell_tech_type = 3;
- }
- else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
- data_arr_ram_cell_tech_type = 4;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-Data array cell type", line,
+ strlen("-Data array cell type"))) {
+ sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ data_arr_ram_cell_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ data_arr_ram_cell_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ data_arr_ram_cell_tech_type = 2;
+ } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
+ data_arr_ram_cell_tech_type = 3;
+ } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
+ data_arr_ram_cell_tech_type = 4;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
- sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- data_arr_peri_global_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- data_arr_peri_global_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- data_arr_peri_global_tech_type = 2;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
+ sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ data_arr_peri_global_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ data_arr_peri_global_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ data_arr_peri_global_tech_type = 2;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
- sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- tag_arr_ram_cell_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- tag_arr_ram_cell_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- tag_arr_ram_cell_tech_type = 2;
- }
- else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
- tag_arr_ram_cell_tech_type = 3;
- }
- else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
- tag_arr_ram_cell_tech_type = 4;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
+ if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
+ sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ tag_arr_ram_cell_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ tag_arr_ram_cell_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ tag_arr_ram_cell_tech_type = 2;
+ } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
+ tag_arr_ram_cell_tech_type = 3;
+ } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
+ tag_arr_ram_cell_tech_type = 4;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
- if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
- sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- tag_arr_peri_global_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- tag_arr_peri_global_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- tag_arr_peri_global_tech_type = 2;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
- if(!strncmp("-design", line, strlen("-design"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_wt), &(dynamic_power_wt),
- &(leakage_power_wt),
- &(cycle_time_wt), &(area_wt));
- continue;
- }
+ if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
+ sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
+ tag_arr_peri_global_tech_type = 0;
+ } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
+ tag_arr_peri_global_tech_type = 1;
+ } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
+ tag_arr_peri_global_tech_type = 2;
+ } else {
+ cout << "ERROR: Invalid type!\n";
+ exit(0);
+ }
+ continue;
+ }
+ if (!strncmp("-design", line, strlen("-design"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_wt), &(dynamic_power_wt),
+ &(leakage_power_wt),
+ &(cycle_time_wt), &(area_wt));
+ continue;
+ }
- if(!strncmp("-deviate", line, strlen("-deviate"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_dev), &(dynamic_power_dev),
- &(leakage_power_dev),
- &(cycle_time_dev), &(area_dev));
- continue;
- }
+ if (!strncmp("-deviate", line, strlen("-deviate"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_dev), &(dynamic_power_dev),
+ &(leakage_power_dev),
+ &(cycle_time_dev), &(area_dev));
+ continue;
+ }
- if(!strncmp("-Optimize", line, strlen("-Optimize"))) {
- sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("ED^2", temp_var, strlen("ED^2"))) {
- ed = 2;
- }
- else if(!strncmp("ED", temp_var, strlen("ED"))) {
- ed = 1;
- }
- else {
- ed = 0;
- }
- }
+ if (!strncmp("-Optimize", line, strlen("-Optimize"))) {
+ sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_wt_nuca), &(dynamic_power_wt_nuca),
- &(leakage_power_wt_nuca),
- &(cycle_time_wt_nuca), &(area_wt_nuca));
- continue;
- }
+ if (!strncmp("ED^2", temp_var, strlen("ED^2"))) {
+ ed = 2;
+ } else if (!strncmp("ED", temp_var, strlen("ED"))) {
+ ed = 1;
+ } else {
+ ed = 0;
+ }
+ }
- if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_dev_nuca), &(dynamic_power_dev_nuca),
- &(leakage_power_dev_nuca),
- &(cycle_time_dev_nuca), &(area_dev_nuca));
- continue;
- }
+ if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_wt_nuca), &(dynamic_power_wt_nuca),
+ &(leakage_power_wt_nuca),
+ &(cycle_time_wt_nuca), &(area_wt_nuca));
+ continue;
+ }
- if(!strncmp("-Cache model", line, strlen("-cache model"))) {
- sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
+ sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
+ &(delay_dev_nuca), &(dynamic_power_dev_nuca),
+ &(leakage_power_dev_nuca),
+ &(cycle_time_dev_nuca), &(area_dev_nuca));
+ continue;
+ }
- if (!strncmp("UCA", temp_var, strlen("UCA"))) {
- nuca = 0;
- }
- else {
- nuca = 1;
- }
- continue;
- }
+ if (!strncmp("-Cache model", line, strlen("-cache model"))) {
+ sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
- sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
+ if (!strncmp("UCA", temp_var, strlen("UCA"))) {
+ nuca = 0;
+ } else {
+ nuca = 1;
+ }
+ continue;
+ }
- if (nuca_bank_count != 0) {
- force_nuca_bank = 1;
- }
- continue;
- }
+ if (!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
+ sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
- if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("global", temp_var, strlen("global"))) {
- wire_is_mat_type = 2;
- continue;
- }
- else if (!strncmp("local", temp_var, strlen("local"))) {
- wire_is_mat_type = 0;
- continue;
- }
- else {
- wire_is_mat_type = 1;
- continue;
- }
- }
+ if (nuca_bank_count != 0) {
+ force_nuca_bank = 1;
+ }
+ continue;
+ }
- if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("global", temp_var, strlen("global"))) {
+ wire_is_mat_type = 2;
+ continue;
+ } else if (!strncmp("local", temp_var, strlen("local"))) {
+ wire_is_mat_type = 0;
+ continue;
+ } else {
+ wire_is_mat_type = 1;
+ continue;
+ }
+ }
- if (!strncmp("global", temp_var, strlen("global"))) {
- wire_os_mat_type = 2;
- }
- else {
- wire_os_mat_type = 1;
- }
- continue;
- }
+ if (!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
- sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("global", temp_var, strlen("global"))) {
+ wire_os_mat_type = 2;
+ } else {
+ wire_os_mat_type = 1;
+ }
+ continue;
+ }
- if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
- ic_proj_type = 0;
- }
- else {
- ic_proj_type = 1;
- }
- continue;
- }
+ if (!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
+ sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
- if(!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("default", temp_var, strlen("default"))) {
- force_wiretype = 0;
- wt = Global;
- }
- else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
- force_wiretype = 1;
- wt = Global_10;
- }
- else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
- force_wiretype = 1;
- wt = Global_20;
- }
- else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
- force_wiretype = 1;
- wt = Global_30;
- }
- else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
- force_wiretype = 1;
- wt = Global_5;
- }
- else if (!(strncmp("Global", temp_var, strlen("Global")))) {
- force_wiretype = 1;
- wt = Global;
- }
- else {
- wt = Low_swing;
- force_wiretype = 1;
- }
- continue;
- }
+ if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
+ ic_proj_type = 0;
+ } else {
+ ic_proj_type = 1;
+ }
+ continue;
+ }
+ if (!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
+ sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
+
+ if (!strncmp("default", temp_var, strlen("default"))) {
+ force_wiretype = 0;
+ wt = Global;
+ } else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
+ force_wiretype = 1;
+ wt = Global_10;
+ } else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
+ force_wiretype = 1;
+ wt = Global_20;
+ } else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
+ force_wiretype = 1;
+ wt = Global_30;
+ } else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
+ force_wiretype = 1;
+ wt = Global_5;
+ } else if (!(strncmp("Global", temp_var, strlen("Global")))) {
+ force_wiretype = 1;
+ wt = Global;
+ } else {
+ wt = Low_swing;
+ force_wiretype = 1;
+ }
+ continue;
+ }
- if(!strncmp("-Core", line, strlen("-Core"))) {
- sscanf(line, "-Core count %d\n", &(cores));
- if (cores > 16) {
- printf("No. of cores should be less than 16!\n");
- }
- continue;
- }
- if(!strncmp("-Cache level", line, strlen("-Cache level"))) {
- sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("L2", temp_var, strlen("L2"))) {
- cache_level = 0;
- }
- else {
- cache_level = 1;
- }
- }
+ if (!strncmp("-Core", line, strlen("-Core"))) {
+ sscanf(line, "-Core count %d\n", &(cores));
+ if (cores > 16) {
+ printf("No. of cores should be less than 16!\n");
+ }
+ continue;
+ }
- if(!strncmp("-Print level", line, strlen("-Print level"))) {
- sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
- print_detail = 1;
- }
- else {
- print_detail = 0;
- }
+ if (!strncmp("-Cache level", line, strlen("-Cache level"))) {
+ sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("L2", temp_var, strlen("L2"))) {
+ cache_level = 0;
+ } else {
+ cache_level = 1;
+ }
+ }
- }
- if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
- sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- add_ecc_b_ = true;
- }
- else {
- add_ecc_b_ = false;
- }
- }
+ if (!strncmp("-Print level", line, strlen("-Print level"))) {
+ sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
+ print_detail = 1;
+ } else {
+ print_detail = 0;
+ }
- if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
- sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- print_input_args = true;
- }
- else {
- print_input_args = false;
- }
- }
+ }
+ if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
+ sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ add_ecc_b_ = true;
+ } else {
+ add_ecc_b_ = false;
+ }
+ }
- if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
- sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- force_cache_config = true;
- }
- else {
- force_cache_config = false;
- }
- }
+ if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
+ sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ print_input_args = true;
+ } else {
+ print_input_args = false;
+ }
+ }
- if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
- sscanf(line, "-Ndbl %d\n", &(ndbl));
- continue;
- }
- if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
- sscanf(line, "-Ndwl %d\n", &(ndwl));
- continue;
- }
- if(!strncmp("-Nspd", line, strlen("-Nspd"))) {
- sscanf(line, "-Nspd %d\n", &(nspd));
- continue;
- }
- if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
- sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
- continue;
- }
- if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
- sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
- continue;
- }
- if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
- sscanf(line, "-Ndcm %d\n", &(ndcm));
- continue;
- }
+ if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
+ sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
+ if (!strncmp("true", temp_var, strlen("true"))) {
+ force_cache_config = true;
+ } else {
+ force_cache_config = false;
+ }
+ }
- }
- rpters_in_htree = true;
- fclose(fp);
+ if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
+ sscanf(line, "-Ndbl %d\n", &(ndbl));
+ continue;
+ }
+ if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
+ sscanf(line, "-Ndwl %d\n", &(ndwl));
+ continue;
+ }
+ if (!strncmp("-Nspd", line, strlen("-Nspd"))) {
+ sscanf(line, "-Nspd %d\n", &(nspd));
+ continue;
+ }
+ if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
+ sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
+ continue;
+ }
+ if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
+ sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
+ continue;
+ }
+ if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
+ sscanf(line, "-Ndcm %d\n", &(ndcm));
+ continue;
+ }
+
+ }
+ rpters_in_htree = true;
+ fclose(fp);
}
- void
-InputParameter::display_ip()
-{
- cout << "Cache size : " << cache_sz << endl;
- cout << "Block size : " << line_sz << endl;
- cout << "Associativity : " << assoc << endl;
- cout << "Read only ports : " << num_rd_ports << endl;
- cout << "Write only ports : " << num_wr_ports << endl;
- cout << "Read write ports : " << num_rw_ports << endl;
- cout << "Single ended read ports : " << num_se_rd_ports << endl;
- if (fully_assoc||pure_cam)
- {
- cout << "Search ports : " << num_search_ports << endl;
- }
- cout << "Cache banks (UCA) : " << nbanks << endl;
- cout << "Technology : " << F_sz_um << endl;
- cout << "Temperature : " << temp << endl;
- cout << "Tag size : " << tag_w << endl;
- if (is_cache) {
- cout << "array type : " << "Cache" << endl;
- }
- if (pure_ram) {
- cout << "array type : " << "Scratch RAM" << endl;
- }
- if (pure_cam)
- {
- cout << "array type : " << "CAM" << endl;
- }
- cout << "Model as memory : " << is_main_mem << endl;
- cout << "Access mode : " << access_mode << endl;
- cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
- cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
- cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
- cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
- cout << "Optimization target : " << ed << endl;
- cout << "Design objective (UCA wt) : " << delay_wt << " "
- << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
- << " " << area_wt << endl;
- cout << "Design objective (UCA dev) : " << delay_dev << " "
- << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
- << " " << area_dev << endl;
- if (nuca)
- {
- cout << "Cores : " << cores << endl;
-
-
- cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
- << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
- << " " << area_wt_nuca << endl;
- cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
- << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
- << " " << area_dev_nuca << endl;
+void
+InputParameter::display_ip() {
+ cout << "Cache size : " << cache_sz << endl;
+ cout << "Block size : " << line_sz << endl;
+ cout << "Associativity : " << assoc << endl;
+ cout << "Read only ports : " << num_rd_ports << endl;
+ cout << "Write only ports : " << num_wr_ports << endl;
+ cout << "Read write ports : " << num_rw_ports << endl;
+ cout << "Single ended read ports : " << num_se_rd_ports << endl;
+ if (fully_assoc || pure_cam) {
+ cout << "Search ports : " << num_search_ports << endl;
+ }
+ cout << "Cache banks (UCA) : " << nbanks << endl;
+ cout << "Technology : " << F_sz_um << endl;
+ cout << "Temperature : " << temp << endl;
+ cout << "Tag size : " << tag_w << endl;
+ if (is_cache) {
+ cout << "array type : " << "Cache" << endl;
+ }
+ if (pure_ram) {
+ cout << "array type : " << "Scratch RAM" << endl;
+ }
+ if (pure_cam) {
+ cout << "array type : " << "CAM" << endl;
+ }
+ cout << "Model as memory : " << is_main_mem << endl;
+ cout << "Access mode : " << access_mode << endl;
+ cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
+ cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
+ cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
+ cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
+ cout << "Optimization target : " << ed << endl;
+ cout << "Design objective (UCA wt) : " << delay_wt << " "
+ << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
+ << " " << area_wt << endl;
+ cout << "Design objective (UCA dev) : " << delay_dev << " "
+ << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
+ << " " << area_dev << endl;
+ if (nuca) {
+ cout << "Cores : " << cores << endl;
+
+
+ cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
+ << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
+ << " " << area_wt_nuca << endl;
+ cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
+ << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
+ << " " << area_dev_nuca << endl;
+ }
+ cout << "Cache model : " << nuca << endl;
+ cout << "Nuca bank : " << nuca_bank_count << endl;
+ cout << "Wire inside mat : " << wire_is_mat_type << endl;
+ cout << "Wire outside mat : " << wire_os_mat_type << endl;
+ cout << "Interconnect projection : " << ic_proj_type << endl;
+ cout << "Wire signalling : " << force_wiretype << endl;
+ cout << "Print level : " << print_detail << endl;
+ cout << "ECC overhead : " << add_ecc_b_ << endl;
+ cout << "Page size : " << page_sz_bits << endl;
+ cout << "Burst length : " << burst_len << endl;
+ cout << "Internal prefetch width : " << int_prefetch_w << endl;
+ cout << "Force cache config : " << g_ip->force_cache_config << endl;
+ if (g_ip->force_cache_config) {
+ cout << "Ndwl : " << g_ip->ndwl << endl;
+ cout << "Ndbl : " << g_ip->ndbl << endl;
+ cout << "Nspd : " << g_ip->nspd << endl;
+ cout << "Ndcm : " << g_ip->ndcm << endl;
+ cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
+ cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
}
- cout << "Cache model : " << nuca << endl;
- cout << "Nuca bank : " << nuca_bank_count << endl;
- cout << "Wire inside mat : " << wire_is_mat_type << endl;
- cout << "Wire outside mat : " << wire_os_mat_type << endl;
- cout << "Interconnect projection : " << ic_proj_type << endl;
- cout << "Wire signalling : " << force_wiretype << endl;
- cout << "Print level : " << print_detail << endl;
- cout << "ECC overhead : " << add_ecc_b_ << endl;
- cout << "Page size : " << page_sz_bits << endl;
- cout << "Burst length : " << burst_len << endl;
- cout << "Internal prefetch width : " << int_prefetch_w << endl;
- cout << "Force cache config : " << g_ip->force_cache_config << endl;
- if (g_ip->force_cache_config) {
- cout << "Ndwl : " << g_ip->ndwl << endl;
- cout << "Ndbl : " << g_ip->ndbl << endl;
- cout << "Nspd : " << g_ip->nspd << endl;
- cout << "Ndcm : " << g_ip->ndcm << endl;
- cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
- cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
- }
}
-powerComponents operator+(const powerComponents & x, const powerComponents & y)
-{
- powerComponents z;
+powerComponents operator+(const powerComponents & x, const powerComponents & y) {
+ powerComponents z;
- z.dynamic = x.dynamic + y.dynamic;
- z.leakage = x.leakage + y.leakage;
- z.gate_leakage = x.gate_leakage + y.gate_leakage;
- z.short_circuit = x.short_circuit + y.short_circuit;
- z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
+ z.dynamic = x.dynamic + y.dynamic;
+ z.leakage = x.leakage + y.leakage;
+ z.gate_leakage = x.gate_leakage + y.gate_leakage;
+ z.short_circuit = x.short_circuit + y.short_circuit;
+ z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
- return z;
+ return z;
}
-powerComponents operator*(const powerComponents & x, double const * const y)
-{
- powerComponents z;
+powerComponents operator*(const powerComponents & x, double const * const y) {
+ powerComponents z;
- z.dynamic = x.dynamic*y[0];
- z.leakage = x.leakage*y[1];
- z.gate_leakage = x.gate_leakage*y[2];
- z.short_circuit = x.short_circuit*y[3];
- z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage
+ z.dynamic = x.dynamic * y[0];
+ z.leakage = x.leakage * y[1];
+ z.gate_leakage = x.gate_leakage * y[2];
+ z.short_circuit = x.short_circuit * y[3];
+ //longer channel leakage has the same behavior as normal leakage
+ z.longer_channel_leakage = x.longer_channel_leakage * y[1];
- return z;
+ return z;
}
-powerDef operator+(const powerDef & x, const powerDef & y)
-{
- powerDef z;
+powerDef operator+(const powerDef & x, const powerDef & y) {
+ powerDef z;
- z.readOp = x.readOp + y.readOp;
- z.writeOp = x.writeOp + y.writeOp;
- z.searchOp = x.searchOp + y.searchOp;
- return z;
+ z.readOp = x.readOp + y.readOp;
+ z.writeOp = x.writeOp + y.writeOp;
+ z.searchOp = x.searchOp + y.searchOp;
+ return z;
}
-powerDef operator*(const powerDef & x, double const * const y)
-{
- powerDef z;
+powerDef operator*(const powerDef & x, double const * const y) {
+ powerDef z;
- z.readOp = x.readOp*y;
- z.writeOp = x.writeOp*y;
- z.searchOp = x.searchOp*y;
- return z;
+ z.readOp = x.readOp * y;
+ z.writeOp = x.writeOp * y;
+ z.searchOp = x.searchOp * y;
+ return z;
}
-uca_org_t cacti_interface(const string & infile_name)
-{
+uca_org_t cacti_interface(const string & infile_name) {
- uca_org_t fin_res;
- //uca_org_t result;
- fin_res.valid = false;
+ uca_org_t fin_res;
+ //uca_org_t result;
+ fin_res.valid = false;
- g_ip = new InputParameter();
- g_ip->parse_cfg(infile_name);
- if(!g_ip->error_checking())
- exit(0);
- if (g_ip->print_input_args)
- g_ip->display_ip();
+ g_ip = new InputParameter();
+ g_ip->parse_cfg(infile_name);
+ if (!g_ip->error_checking(infile_name))
+ exit(0);
+ if (g_ip->print_input_args)
+ g_ip->display_ip();
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
// For HighRadix Only
@@ -703,19 +653,18 @@ uca_org_t cacti_interface(const string & infile_name)
// exit(0);
// For HighRadix Only End
- if (g_ip->nuca == 1)
- {
- Nuca n(&g_tp.peri_global);
- n.sim_nuca();
- }
- g_ip->display_ip();
- solve(&fin_res);
+ if (g_ip->nuca == 1) {
+ Nuca n(&g_tp.peri_global);
+ n.sim_nuca();
+ }
+ g_ip->display_ip();
+ solve(&fin_res);
- output_UCA(&fin_res);
- output_data_csv(fin_res);
+ output_UCA(&fin_res);
+ output_data_csv(fin_res);
- delete (g_ip);
- return fin_res;
+ delete (g_ip);
+ return fin_res;
}
//cacti6.5's plain interface, please keep !!!
@@ -773,142 +722,139 @@ uca_org_t cacti_interface(
int nuca_dev_func_area,
int nuca_dev_func_cycle_time,
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
- int p_input)
-{
- g_ip = new InputParameter();
- g_ip->add_ecc_b_ = true;
-
- g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
- g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
- g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
- g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-
- g_ip->ic_proj_type = interconnect_projection_type_in;
- g_ip->wire_is_mat_type = wire_inside_mat_type_in;
- g_ip->wire_os_mat_type = wire_outside_mat_type_in;
- g_ip->burst_len = burst_length;
- g_ip->int_prefetch_w = pre_width;
- g_ip->page_sz_bits = page_sz;
-
- g_ip->cache_sz = cache_size;
- g_ip->line_sz = line_size;
- g_ip->assoc = associativity;
- g_ip->nbanks = banks;
- g_ip->out_w = output_width;
- g_ip->specific_tag = specific_tag;
- if (tag_width == 0) {
- g_ip->tag_w = 42;
- }
- else {
- g_ip->tag_w = tag_width;
- }
-
- g_ip->access_mode = access_mode;
- g_ip->delay_wt = obj_func_delay;
- g_ip->dynamic_power_wt = obj_func_dynamic_power;
- g_ip->leakage_power_wt = obj_func_leakage_power;
- g_ip->area_wt = obj_func_area;
- g_ip->cycle_time_wt = obj_func_cycle_time;
- g_ip->delay_dev = dev_func_delay;
- g_ip->dynamic_power_dev = dev_func_dynamic_power;
- g_ip->leakage_power_dev = dev_func_leakage_power;
- g_ip->area_dev = dev_func_area;
- g_ip->cycle_time_dev = dev_func_cycle_time;
- g_ip->ed = ed_ed2_none;
-
- switch(wt) {
+ int p_input) {
+ g_ip = new InputParameter();
+ g_ip->add_ecc_b_ = true;
+
+ g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+ g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+ g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+ g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+
+ g_ip->ic_proj_type = interconnect_projection_type_in;
+ g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+ g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+ g_ip->burst_len = burst_length;
+ g_ip->int_prefetch_w = pre_width;
+ g_ip->page_sz_bits = page_sz;
+
+ g_ip->cache_sz = cache_size;
+ g_ip->line_sz = line_size;
+ g_ip->assoc = associativity;
+ g_ip->nbanks = banks;
+ g_ip->out_w = output_width;
+ g_ip->specific_tag = specific_tag;
+ if (tag_width == 0) {
+ g_ip->tag_w = 42;
+ } else {
+ g_ip->tag_w = tag_width;
+ }
+
+ g_ip->access_mode = access_mode;
+ g_ip->delay_wt = obj_func_delay;
+ g_ip->dynamic_power_wt = obj_func_dynamic_power;
+ g_ip->leakage_power_wt = obj_func_leakage_power;
+ g_ip->area_wt = obj_func_area;
+ g_ip->cycle_time_wt = obj_func_cycle_time;
+ g_ip->delay_dev = dev_func_delay;
+ g_ip->dynamic_power_dev = dev_func_dynamic_power;
+ g_ip->leakage_power_dev = dev_func_leakage_power;
+ g_ip->area_dev = dev_func_area;
+ g_ip->cycle_time_dev = dev_func_cycle_time;
+ g_ip->ed = ed_ed2_none;
+
+ switch (wt) {
case (0):
- g_ip->force_wiretype = 0;
- g_ip->wt = Global;
- break;
+ g_ip->force_wiretype = 0;
+ g_ip->wt = Global;
+ break;
case (1):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global;
+ break;
case (2):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_5;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_5;
+ break;
case (3):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_10;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_10;
+ break;
case (4):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_20;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_20;
+ break;
case (5):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_30;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Global_30;
+ break;
case (6):
- g_ip->force_wiretype = 1;
- g_ip->wt = Low_swing;
- break;
+ g_ip->force_wiretype = 1;
+ g_ip->wt = Low_swing;
+ break;
default:
- cout << "Unknown wire type!\n";
- exit(0);
- }
-
- g_ip->delay_wt_nuca = nuca_obj_func_delay;
- g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
- g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
- g_ip->area_wt_nuca = nuca_obj_func_area;
- g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
- g_ip->delay_dev_nuca = dev_func_delay;
- g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
- g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
- g_ip->area_dev_nuca = nuca_dev_func_area;
- g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
- g_ip->nuca = is_nuca;
- g_ip->nuca_bank_count = nuca_bank_count;
- if(nuca_bank_count > 0) {
- g_ip->force_nuca_bank = 1;
- }
- g_ip->cores = core_count;
- g_ip->cache_level = cache_level;
-
- g_ip->temp = temp;
-
- g_ip->F_sz_nm = tech_node;
- g_ip->F_sz_um = tech_node / 1000;
- g_ip->is_main_mem = (main_mem != 0) ? true : false;
- g_ip->is_cache = (cache != 0) ? true : false;
- g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-
- g_ip->num_rw_ports = rw_ports;
- g_ip->num_rd_ports = excl_read_ports;
- g_ip->num_wr_ports = excl_write_ports;
- g_ip->num_se_rd_ports = single_ended_read_ports;
- g_ip->print_detail = 1;
- g_ip->nuca = 0;
-
- g_ip->wt = Global_5;
- g_ip->force_cache_config = false;
- g_ip->force_wiretype = false;
- g_ip->print_input_args = p_input;
-
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- if (g_ip->error_checking() == false) exit(0);
- if (g_ip->print_input_args)
- g_ip->display_ip();
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
-
- if (g_ip->nuca == 1)
- {
- Nuca n(&g_tp.peri_global);
- n.sim_nuca();
- }
- solve(&fin_res);
-
- output_UCA(&fin_res);
+ cout << "Unknown wire type!\n";
+ exit(0);
+ }
- delete (g_ip);
- return fin_res;
+ g_ip->delay_wt_nuca = nuca_obj_func_delay;
+ g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
+ g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
+ g_ip->area_wt_nuca = nuca_obj_func_area;
+ g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
+ g_ip->delay_dev_nuca = dev_func_delay;
+ g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
+ g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
+ g_ip->area_dev_nuca = nuca_dev_func_area;
+ g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
+ g_ip->nuca = is_nuca;
+ g_ip->nuca_bank_count = nuca_bank_count;
+ if (nuca_bank_count > 0) {
+ g_ip->force_nuca_bank = 1;
+ }
+ g_ip->cores = core_count;
+ g_ip->cache_level = cache_level;
+
+ g_ip->temp = temp;
+
+ g_ip->F_sz_nm = tech_node;
+ g_ip->F_sz_um = tech_node / 1000;
+ g_ip->is_main_mem = (main_mem != 0) ? true : false;
+ g_ip->is_cache = (cache != 0) ? true : false;
+ g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+
+ g_ip->num_rw_ports = rw_ports;
+ g_ip->num_rd_ports = excl_read_ports;
+ g_ip->num_wr_ports = excl_write_ports;
+ g_ip->num_se_rd_ports = single_ended_read_ports;
+ g_ip->print_detail = 1;
+ g_ip->nuca = 0;
+
+ g_ip->wt = Global_5;
+ g_ip->force_cache_config = false;
+ g_ip->force_wiretype = false;
+ g_ip->print_input_args = p_input;
+
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ if (g_ip->error_checking() == false) exit(0);
+ if (g_ip->print_input_args)
+ g_ip->display_ip();
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+
+ if (g_ip->nuca == 1) {
+ Nuca n(&g_tp.peri_global);
+ n.sim_nuca();
+ }
+ solve(&fin_res);
+
+ output_UCA(&fin_res);
+
+ delete (g_ip);
+ return fin_res;
}
//McPAT's plain interface, please keep !!!
@@ -964,200 +910,187 @@ uca_org_t cacti_interface(
int ndcm,
int ndsam1,//para50
int ndsam2,
- int ecc)
-{
- g_ip = new InputParameter();
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
- g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
- g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
- g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-
- g_ip->ic_proj_type = interconnect_projection_type_in;
- g_ip->wire_is_mat_type = wire_inside_mat_type_in;
- g_ip->wire_os_mat_type = wire_outside_mat_type_in;
- g_ip->burst_len = BURST_LENGTH_in;
- g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
- g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-
- g_ip->cache_sz = cache_size;
- g_ip->line_sz = line_size;
- g_ip->assoc = associativity;
- g_ip->nbanks = banks;
- g_ip->out_w = output_width;
- g_ip->specific_tag = specific_tag;
- if (specific_tag == 0) {
- g_ip->tag_w = 42;
- }
- else {
- g_ip->tag_w = tag_width;
- }
-
- g_ip->access_mode = access_mode;
- g_ip->delay_wt = obj_func_delay;
- g_ip->dynamic_power_wt = obj_func_dynamic_power;
- g_ip->leakage_power_wt = obj_func_leakage_power;
- g_ip->area_wt = obj_func_area;
- g_ip->cycle_time_wt = obj_func_cycle_time;
- g_ip->delay_dev = dev_func_delay;
- g_ip->dynamic_power_dev = dev_func_dynamic_power;
- g_ip->leakage_power_dev = dev_func_leakage_power;
- g_ip->area_dev = dev_func_area;
- g_ip->cycle_time_dev = dev_func_cycle_time;
- g_ip->temp = temp;
- g_ip->ed = ed_ed2_none;
-
- g_ip->F_sz_nm = tech_node;
- g_ip->F_sz_um = tech_node / 1000;
- g_ip->is_main_mem = (main_mem != 0) ? true : false;
- g_ip->is_cache = (cache ==1) ? true : false;
- g_ip->pure_ram = (cache ==0) ? true : false;
- g_ip->pure_cam = (cache ==2) ? true : false;
- g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
- g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
- g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-
- g_ip->num_rw_ports = rw_ports;
- g_ip->num_rd_ports = excl_read_ports;
- g_ip->num_wr_ports = excl_write_ports;
- g_ip->num_se_rd_ports = single_ended_read_ports;
- g_ip->num_search_ports = search_ports;
-
- g_ip->print_detail = 1;
- g_ip->nuca = 0;
-
- if (force_wiretype == 0)
- {
- g_ip->wt = Global;
- g_ip->force_wiretype = false;
- }
- else
- { g_ip->force_wiretype = true;
- if (wiretype==10) {
- g_ip->wt = Global_10;
- }
- if (wiretype==20) {
- g_ip->wt = Global_20;
- }
- if (wiretype==30) {
- g_ip->wt = Global_30;
- }
- if (wiretype==5) {
- g_ip->wt = Global_5;
- }
- if (wiretype==0) {
- g_ip->wt = Low_swing;
- }
- }
- //g_ip->wt = Global_5;
- if (force_config == 0)
- {
- g_ip->force_cache_config = false;
+ int ecc) {
+ g_ip = new InputParameter();
+
+ uca_org_t fin_res;
+ fin_res.valid = false;
+
+ g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
+ g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
+ g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
+ g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
+
+ g_ip->ic_proj_type = interconnect_projection_type_in;
+ g_ip->wire_is_mat_type = wire_inside_mat_type_in;
+ g_ip->wire_os_mat_type = wire_outside_mat_type_in;
+ g_ip->burst_len = BURST_LENGTH_in;
+ g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
+ g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
+
+ g_ip->cache_sz = cache_size;
+ g_ip->line_sz = line_size;
+ g_ip->assoc = associativity;
+ g_ip->nbanks = banks;
+ g_ip->out_w = output_width;
+ g_ip->specific_tag = specific_tag;
+ if (specific_tag == 0) {
+ g_ip->tag_w = 42;
+ } else {
+ g_ip->tag_w = tag_width;
+ }
+
+ g_ip->access_mode = access_mode;
+ g_ip->delay_wt = obj_func_delay;
+ g_ip->dynamic_power_wt = obj_func_dynamic_power;
+ g_ip->leakage_power_wt = obj_func_leakage_power;
+ g_ip->area_wt = obj_func_area;
+ g_ip->cycle_time_wt = obj_func_cycle_time;
+ g_ip->delay_dev = dev_func_delay;
+ g_ip->dynamic_power_dev = dev_func_dynamic_power;
+ g_ip->leakage_power_dev = dev_func_leakage_power;
+ g_ip->area_dev = dev_func_area;
+ g_ip->cycle_time_dev = dev_func_cycle_time;
+ g_ip->temp = temp;
+ g_ip->ed = ed_ed2_none;
+
+ g_ip->F_sz_nm = tech_node;
+ g_ip->F_sz_um = tech_node / 1000;
+ g_ip->is_main_mem = (main_mem != 0) ? true : false;
+ g_ip->is_cache = (cache == 1) ? true : false;
+ g_ip->pure_ram = (cache == 0) ? true : false;
+ g_ip->pure_cam = (cache == 2) ? true : false;
+ g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
+ g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
+ g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
+
+ g_ip->num_rw_ports = rw_ports;
+ g_ip->num_rd_ports = excl_read_ports;
+ g_ip->num_wr_ports = excl_write_ports;
+ g_ip->num_se_rd_ports = single_ended_read_ports;
+ g_ip->num_search_ports = search_ports;
+
+ g_ip->print_detail = 1;
+ g_ip->nuca = 0;
+
+ if (force_wiretype == 0) {
+ g_ip->wt = Global;
+ g_ip->force_wiretype = false;
+ } else {
+ g_ip->force_wiretype = true;
+ if (wiretype == 10) {
+ g_ip->wt = Global_10;
+ }
+ if (wiretype == 20) {
+ g_ip->wt = Global_20;
+ }
+ if (wiretype == 30) {
+ g_ip->wt = Global_30;
+ }
+ if (wiretype == 5) {
+ g_ip->wt = Global_5;
+ }
+ if (wiretype == 0) {
+ g_ip->wt = Low_swing;
+ }
}
- else
- {
+ //g_ip->wt = Global_5;
+ if (force_config == 0) {
+ g_ip->force_cache_config = false;
+ } else {
g_ip->force_cache_config = true;
- g_ip->ndbl=ndbl;
- g_ip->ndwl=ndwl;
- g_ip->nspd=nspd;
- g_ip->ndcm=ndcm;
- g_ip->ndsam1=ndsam1;
- g_ip->ndsam2=ndsam2;
+ g_ip->ndbl = ndbl;
+ g_ip->ndwl = ndwl;
+ g_ip->nspd = nspd;
+ g_ip->ndcm = ndcm;
+ g_ip->ndsam1 = ndsam1;
+ g_ip->ndsam2 = ndsam2;
}
- if (ecc==0){
- g_ip->add_ecc_b_=false;
- }
- else
- {
- g_ip->add_ecc_b_=true;
- }
+ if (ecc == 0) {
+ g_ip->add_ecc_b_ = false;
+ } else {
+ g_ip->add_ecc_b_ = true;
+ }
- if(!g_ip->error_checking())
- exit(0);
+ if (!g_ip->error_checking())
+ exit(0);
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
- g_ip->display_ip();
- solve(&fin_res);
- output_UCA(&fin_res);
- output_data_csv(fin_res);
- delete (g_ip);
+ g_ip->display_ip();
+ solve(&fin_res);
+ output_UCA(&fin_res);
+ output_data_csv(fin_res);
+ delete (g_ip);
- return fin_res;
+ return fin_res;
}
-bool InputParameter::error_checking()
-{
- int A;
- bool seq_access = false;
- fast_access = true;
+bool InputParameter::error_checking(string name) {
+ int A;
+ bool seq_access = false;
+ fast_access = true;
- switch (access_mode)
- {
+ switch (access_mode) {
case 0:
- seq_access = false;
- fast_access = false;
- break;
+ seq_access = false;
+ fast_access = false;
+ break;
case 1:
- seq_access = true;
- fast_access = false;
- break;
+ seq_access = true;
+ fast_access = false;
+ break;
case 2:
- seq_access = false;
- fast_access = true;
- break;
- }
-
- if(is_main_mem)
- {
- if(ic_proj_type == 0)
- {
- cerr << "DRAM model supports only conservative interconnect projection!\n\n";
- return false;
+ seq_access = false;
+ fast_access = true;
+ break;
}
- }
-
-
- uint32_t B = line_sz;
-
- if (B < 1)
- {
- cerr << "Block size must >= 1" << endl;
- return false;
- }
- else if (B*8 < out_w)
- {
- cerr << "Block size must be at least " << out_w/8 << endl;
- return false;
- }
-
- if (F_sz_um <= 0)
- {
- cerr << "Feature size must be > 0" << endl;
- return false;
- }
- else if (F_sz_um > 0.091)
- {
- cerr << "Feature size must be <= 90 nm" << endl;
- return false;
- }
-
-
- uint32_t RWP = num_rw_ports;
- uint32_t ERP = num_rd_ports;
- uint32_t EWP = num_wr_ports;
- uint32_t NSER = num_se_rd_ports;
- uint32_t SCHP = num_search_ports;
+
+ if (is_main_mem) {
+ if (ic_proj_type == 0) {
+ cerr << name
+ << ": DRAM model supports only conservative interconnect "
+ << "projection but is set to aggressive!\n\n";
+ return false;
+ }
+ }
+
+
+ uint32_t B = line_sz;
+
+ if (B < 1) {
+ cerr << name << ": Block size must be >= 1, but is set to " << B
+ << endl;
+ return false;
+ } else if (B*8 < out_w) {
+ cerr << name << ": Block size must be at least " << out_w / 8
+ << ", but is set to " << B << endl;
+ return false;
+ }
+
+ if (F_sz_um <= 0) {
+ cerr << name << ": Feature size must be > 0, but is set to "
+ << F_sz_um << endl;
+ return false;
+ } else if (F_sz_um > 0.091) {
+ cerr << name << ": Feature size must be <= 90 nm, but is set to "
+ << F_sz_um << endl;
+ return false;
+ }
+
+
+ uint32_t RWP = num_rw_ports;
+ uint32_t ERP = num_rd_ports;
+ uint32_t EWP = num_wr_ports;
+ uint32_t NSER = num_se_rd_ports;
+ uint32_t SCHP = num_search_ports;
//TODO: revisit this. This is an important feature. Sheng thought this should be used
// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to
@@ -1181,26 +1114,26 @@ bool InputParameter::error_checking()
// return false;
// }
// else if ((RWP+ERP+EWP) < 1)
- // Changed to new implementation:
- // The number of ports specified at input is per bank
- if ((RWP+ERP+EWP) < 1)
- {
- cerr << "Must have at least one port" << endl;
- return false;
- }
-
- if (is_pow2(nbanks) == false)
- {
- cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl;
- return false;
- }
-
- int C = cache_sz/nbanks;
- if (C < 64)
- {
- cerr << "Cache size must >=64" << endl;
- return false;
- }
+ // Changed to new implementation:
+ // The number of ports specified at input is per bank
+ if ((RWP + ERP + EWP) < 1) {
+ cerr << name << ": Must have at least one port" << endl;
+ return false;
+ }
+
+ if (is_pow2(nbanks) == false) {
+ cerr << name << ": Number of subbanks should be greater than or "
+ << "equal to 1 and should be a power of 2, but is set to "
+ << nbanks << endl;
+ return false;
+ }
+
+ int C = cache_sz / nbanks;
+ if (C < 64) {
+ cerr << name << ": Cache size must be >=64, but is set to " << C
+ << endl;
+ return false;
+ }
//TODO: revisit this
// if (pure_ram==true && assoc!=1)
@@ -1210,54 +1143,64 @@ bool InputParameter::error_checking()
// }
//fully assoc and cam check
- if (is_cache && assoc==0)
- fully_assoc =true;
+ if (is_cache && assoc == 0)
+ fully_assoc = true;
else
fully_assoc = false;
- if (pure_cam==true && assoc!=0)
- {
- cerr << "Pure CAM must have associativity as 0" << endl;
- return false;
+ if (pure_cam == true && assoc != 0) {
+ cerr << name
+ << ": Pure CAM must have associativity as 0, but is set to"
+ << assoc << endl;
+ return false;
}
- if (assoc==0 && (pure_cam==false && is_cache ==false))
- {
- cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl;
- return false;
+ if (assoc == 0 && (pure_cam == false && is_cache == false)) {
+ cerr << name
+ << ": Only CAM or Fully associative cache can have associativity "
+ << "as 0" << endl;
+ return false;
}
- if ((fully_assoc==true || pure_cam==true)
- && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type
- || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type ))
- {
- cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl;
- return false;
+ if ((fully_assoc == true || pure_cam == true)
+ && (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type
+ || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) {
+ cerr << name
+ << ": CAM and fully associative cache must have same device type "
+ << "for both data and tag array" << endl;
+ cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type
+ << ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl
+ << "\tData array peripheral = " << data_arr_peri_global_tech_type
+ << ", Tag array peripheral = " << tag_arr_peri_global_tech_type
+ << endl;
+ return false;
}
- if ((fully_assoc==true || pure_cam==true)
- && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram))
- {
- cerr << "DRAM based CAM and fully associative cache are not supported" << endl;
- return false;
+ if ((fully_assoc == true || pure_cam == true)
+ && (data_arr_ram_cell_tech_type == lp_dram ||
+ data_arr_ram_cell_tech_type == comm_dram)) {
+ cerr << name << ": DRAM based CAM and fully associative cache are not "
+ << "supported" << endl;
+ return false;
}
- if ((fully_assoc==true || pure_cam==true)
- && (is_main_mem==true))
- {
- cerr << "CAM and fully associative cache cannot be as main memory" << endl;
- return false;
+ if ((fully_assoc == true || pure_cam == true)
+ && (is_main_mem == true)) {
+ cerr << name
+ << ": CAM and fully associative cache cannot be as main memory"
+ << endl;
+ return false;
}
- if ((fully_assoc || pure_cam) && SCHP<1)
- {
- cerr << "CAM and fully associative must have at least 1 search port" << endl;
- return false;
+ if ((fully_assoc || pure_cam) && SCHP < 1) {
+ cerr << name
+ << ": CAM and fully associative must have at least 1 search port,"
+ << " but are set to " << SCHP << endl;
+ return false;
}
- if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam)))
- {
- ERP=SCHP;
+ if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) {
+ ERP = SCHP;
}
// if ((!(fully_assoc || pure_cam)) && SCHP>=1)
@@ -1266,140 +1209,112 @@ bool InputParameter::error_checking()
// return false;
// }
- if (assoc == 0)
- {
- A = C/B;
- //fully_assoc = true;
- }
- else
- {
- if (assoc == 1)
- {
- A = 1;
- //fully_assoc = false;
+ if (assoc == 0) {
+ A = C / B;
+ //fully_assoc = true;
+ } else {
+ if (assoc == 1) {
+ A = 1;
+ //fully_assoc = false;
+ } else {
+ //fully_assoc = false;
+ A = assoc;
+ if (is_pow2(A) == false) {
+ cerr << name
+ << ": Associativity must be a power of 2, but is set to "
+ << A << endl;
+ return false;
+ }
+ }
}
- else
- {
- //fully_assoc = false;
- A = assoc;
- if (is_pow2(A) == false)
- {
- cerr << "Associativity must be a power of 2" << endl;
+
+ if (C / (B*A) <= 1 && assoc != 0) {
+ cerr << name << ": Number of sets (" << (C / (B * A))
+ << ") is too small: " << endl;
+ cerr << " Need to either increase cache size, or decrease "
+ << "associativity or block size" << endl;
+ cerr << " (or use fully associative cache)" << endl;
return false;
- }
}
- }
-
- if (C/(B*A) <= 1 && assoc!=0)
- {
- cerr << "Number of sets is too small: " << endl;
- cerr << " Need to either increase cache size, or decrease associativity or block size" << endl;
- cerr << " (or use fully associative cache)" << endl;
- return false;
- }
-
- block_sz = B;
-
- /*dt: testing sequential access mode*/
- if(seq_access)
- {
- tag_assoc = A;
- data_assoc = 1;
- is_seq_acc = true;
- }
- else
- {
- tag_assoc = A;
- data_assoc = A;
- is_seq_acc = false;
- }
-
- if (assoc==0)
- {
- data_assoc = 1;
- }
- num_rw_ports = RWP;
- num_rd_ports = ERP;
- num_wr_ports = EWP;
- num_se_rd_ports = NSER;
- if (!(fully_assoc || pure_cam))
- num_search_ports = 0;
- nsets = C/(B*A);
-
- if (temp < 300 || temp > 400 || temp%10 != 0)
- {
- cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl;
- return false;
- }
-
- if (nsets < 1)
- {
- cerr << "Less than one set..." << endl;
- return false;
- }
-
- return true;
+
+ block_sz = B;
+
+ /*dt: testing sequential access mode*/
+ if (seq_access) {
+ tag_assoc = A;
+ data_assoc = 1;
+ is_seq_acc = true;
+ } else {
+ tag_assoc = A;
+ data_assoc = A;
+ is_seq_acc = false;
+ }
+
+ if (assoc == 0) {
+ data_assoc = 1;
+ }
+ num_rw_ports = RWP;
+ num_rd_ports = ERP;
+ num_wr_ports = EWP;
+ num_se_rd_ports = NSER;
+ if (!(fully_assoc || pure_cam))
+ num_search_ports = 0;
+ nsets = C / (B * A);
+
+ if (temp < 300 || temp > 400 || temp % 10 != 0) {
+ cerr << name << ": " << temp
+ << " Temperature must be between 300 and 400 Kelvin and multiple "
+ << "of 10." << endl;
+ return false;
+ }
+
+ if (nsets < 1) {
+ cerr << name << ": Less than one set..." << endl;
+ return false;
+ }
+
+ return true;
}
-void output_data_csv(const uca_org_t & fin_res)
-{
- //TODO: the csv output should remain
- fstream file("out.csv", ios::in);
- bool print_index = file.fail();
- file.close();
-
- file.open("out.csv", ios::out|ios::app);
- if (file.fail() == true)
- {
- cerr << "File out.csv could not be opened successfully" << endl;
- }
- else
- {
- if (print_index == true)
- {
- file << "Tech node (nm), ";
- file << "Capacity (bytes), ";
- file << "Number of banks, ";
- file << "Associativity, ";
- file << "Output width (bits), ";
- file << "Access time (ns), ";
- file << "Random cycle time (ns), ";
-// file << "Multisubbank interleave cycle time (ns), ";
-
-// file << "Delay request network (ns), ";
-// file << "Delay inside mat (ns), ";
-// file << "Delay reply network (ns), ";
-// file << "Tag array access time (ns), ";
-// file << "Data array access time (ns), ";
-// file << "Refresh period (microsec), ";
-// file << "DRAM array availability (%), ";
- file << "Dynamic search energy (nJ), ";
- file << "Dynamic read energy (nJ), ";
- file << "Dynamic write energy (nJ), ";
-// file << "Tag Dynamic read energy (nJ), ";
-// file << "Data Dynamic read energy (nJ), ";
-// file << "Dynamic read power (mW), ";
- file << "Standby leakage per bank(mW), ";
-// file << "Leakage per bank with leak power management (mW), ";
-// file << "Leakage per bank with leak power management (mW), ";
-// file << "Refresh power as percentage of standby leakage, ";
- file << "Area (mm2), ";
- file << "Ndwl, ";
- file << "Ndbl, ";
- file << "Nspd, ";
- file << "Ndcm, ";
- file << "Ndsam_level_1, ";
- file << "Ndsam_level_2, ";
- file << "Data arrary area efficiency %, ";
- file << "Ntwl, ";
- file << "Ntbl, ";
- file << "Ntspd, ";
- file << "Ntcm, ";
- file << "Ntsam_level_1, ";
- file << "Ntsam_level_2, ";
- file << "Tag arrary area efficiency %, ";
+void output_data_csv(const uca_org_t & fin_res) {
+ //TODO: the csv output should remain
+ fstream file("out.csv", ios::in);
+ bool print_index = file.fail();
+ file.close();
+
+ file.open("out.csv", ios::out | ios::app);
+ if (file.fail() == true) {
+ cerr << "File out.csv could not be opened successfully" << endl;
+ } else {
+ if (print_index == true) {
+ file << "Tech node (nm), ";
+ file << "Capacity (bytes), ";
+ file << "Number of banks, ";
+ file << "Associativity, ";
+ file << "Output width (bits), ";
+ file << "Access time (ns), ";
+ file << "Random cycle time (ns), ";
+ file << "Dynamic search energy (nJ), ";
+ file << "Dynamic read energy (nJ), ";
+ file << "Dynamic write energy (nJ), ";
+ file << "Standby leakage per bank(mW), ";
+ file << "Area (mm2), ";
+ file << "Ndwl, ";
+ file << "Ndbl, ";
+ file << "Nspd, ";
+ file << "Ndcm, ";
+ file << "Ndsam_level_1, ";
+ file << "Ndsam_level_2, ";
+ file << "Data arrary area efficiency %, ";
+ file << "Ntwl, ";
+ file << "Ntbl, ";
+ file << "Ntspd, ";
+ file << "Ntcm, ";
+ file << "Ntsam_level_1, ";
+ file << "Ntsam_level_2, ";
+ file << "Tag arrary area efficiency %, ";
// file << "Resistance per unit micron (ohm-micron), ";
// file << "Capacitance per unit micron (fF per micron), ";
@@ -1428,15 +1343,15 @@ void output_data_csv(const uca_org_t & fin_res)
// file << "Delay opt (perc), ";
// file << "Repeater opt (perc), ";
// file << "Aspect ratio";
- file << endl;
- }
- file << g_ip->F_sz_nm << ", ";
- file << g_ip->cache_sz << ", ";
- file << g_ip->nbanks << ", ";
- file << g_ip->tag_assoc << ", ";
- file << g_ip->out_w << ", ";
- file << fin_res.access_time*1e+9 << ", ";
- file << fin_res.cycle_time*1e+9 << ", ";
+ file << endl;
+ }
+ file << g_ip->F_sz_nm << ", ";
+ file << g_ip->cache_sz << ", ";
+ file << g_ip->nbanks << ", ";
+ file << g_ip->tag_assoc << ", ";
+ file << g_ip->out_w << ", ";
+ file << fin_res.access_time*1e+9 << ", ";
+ file << fin_res.cycle_time*1e+9 << ", ";
// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", ";
// file << fin_res.data_array2->delay_request_network*1e+9 << ", ";
// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", ";
@@ -1453,16 +1368,13 @@ void output_data_csv(const uca_org_t & fin_res)
// file << fin_res.data_array2->access_time*1e+9 << ", ";
// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", ";
// file << fin_res.data_array2->dram_array_availability << ", ";
- if (g_ip->fully_assoc || g_ip->pure_cam)
- {
- file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
- }
- else
- {
- file << "N/A" << ", ";
- }
- file << fin_res.power.readOp.dynamic*1e+9 << ", ";
- file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
+ if (g_ip->fully_assoc || g_ip->pure_cam) {
+ file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
+ } else {
+ file << "N/A" << ", ";
+ }
+ file << fin_res.power.readOp.dynamic*1e+9 << ", ";
+ file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
// {
// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", ";
@@ -1484,27 +1396,24 @@ void output_data_csv(const uca_org_t & fin_res)
file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", ";
// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", ";
// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", ";
- file << fin_res.area*1e-6 << ", ";
-
- file << fin_res.data_array2->Ndwl << ", ";
- file << fin_res.data_array2->Ndbl << ", ";
- file << fin_res.data_array2->Nspd << ", ";
- file << fin_res.data_array2->deg_bl_muxing << ", ";
- file << fin_res.data_array2->Ndsam_lev_1 << ", ";
- file << fin_res.data_array2->Ndsam_lev_2 << ", ";
- file << fin_res.data_array2->area_efficiency << ", ";
- if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
- {
- file << fin_res.tag_array2->Ndwl << ", ";
- file << fin_res.tag_array2->Ndbl << ", ";
- file << fin_res.tag_array2->Nspd << ", ";
- file << fin_res.tag_array2->deg_bl_muxing << ", ";
- file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
- file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
- file << fin_res.tag_array2->area_efficiency << ", ";
- }
- else
- {
+ file << fin_res.area*1e-6 << ", ";
+
+ file << fin_res.data_array2->Ndwl << ", ";
+ file << fin_res.data_array2->Ndbl << ", ";
+ file << fin_res.data_array2->Nspd << ", ";
+ file << fin_res.data_array2->deg_bl_muxing << ", ";
+ file << fin_res.data_array2->Ndsam_lev_1 << ", ";
+ file << fin_res.data_array2->Ndsam_lev_2 << ", ";
+ file << fin_res.data_array2->area_efficiency << ", ";
+ if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) {
+ file << fin_res.tag_array2->Ndwl << ", ";
+ file << fin_res.tag_array2->Ndbl << ", ";
+ file << fin_res.tag_array2->Nspd << ", ";
+ file << fin_res.tag_array2->deg_bl_muxing << ", ";
+ file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
+ file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
+ file << fin_res.tag_array2->area_efficiency << ", ";
+ } else {
file << "N/A" << ", ";
file << "N/A"<< ", ";
file << "N/A" << ", ";
@@ -1535,803 +1444,552 @@ void output_data_csv(const uca_org_t & fin_res)
// file << fin_res.data_array.cas_latency * 1e9 << ", " ;
// file << fin_res.data_array.precharge_delay * 1e9 << ", " ;
// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width;
- file<<endl;
- }
- file.close();
+ file<<endl;
+ }
+ file.close();
}
-void output_UCA(uca_org_t *fr)
-{
- // if (NUCA)
- if (0) {
- cout << "\n\n Detailed Bank Stats:\n";
- cout << " Bank Size (bytes): %d\n" <<
- (int) (g_ip->cache_sz);
- }
- else {
- if (g_ip->data_arr_ram_cell_tech_type == 3) {
- cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
- "Logic Process Based DRAM Model ----------\n";
- }
- else if (g_ip->data_arr_ram_cell_tech_type == 4) {
- cout << "\n---------- CACTI version 6.5, Uniform" <<
- "Cache Access Commodity DRAM Model ----------\n";
+void output_UCA(uca_org_t *fr) {
+ // if (NUCA)
+ if (0) {
+ cout << "\n\n Detailed Bank Stats:\n";
+ cout << " Bank Size (bytes): %d\n" <<
+ (int) (g_ip->cache_sz);
+ } else {
+ if (g_ip->data_arr_ram_cell_tech_type == 3) {
+ cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
+ "Logic Process Based DRAM Model ----------\n";
+ } else if (g_ip->data_arr_ram_cell_tech_type == 4) {
+ cout << "\n---------- CACTI version 6.5, Uniform" <<
+ "Cache Access Commodity DRAM Model ----------\n";
+ } else {
+ cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
+ "SRAM Model ----------\n";
+ }
+ cout << "\nCache Parameters:\n";
+ cout << " Total cache size (bytes): " <<
+ (int) (g_ip->cache_sz) << endl;
}
+
+ cout << " Number of banks: " << (int) g_ip->nbanks << endl;
+ if (g_ip->fully_assoc || g_ip->pure_cam)
+ cout << " Associativity: fully associative\n";
else {
- cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
- "SRAM Model ----------\n";
+ if (g_ip->tag_assoc == 1)
+ cout << " Associativity: direct mapped\n";
+ else
+ cout << " Associativity: " <<
+ g_ip->tag_assoc << endl;
}
- cout << "\nCache Parameters:\n";
- cout << " Total cache size (bytes): " <<
- (int) (g_ip->cache_sz) << endl;
- }
-
- cout << " Number of banks: " << (int) g_ip->nbanks << endl;
- if (g_ip->fully_assoc|| g_ip->pure_cam)
- cout << " Associativity: fully associative\n";
- else {
- if (g_ip->tag_assoc == 1)
- cout << " Associativity: direct mapped\n";
- else
- cout << " Associativity: " <<
- g_ip->tag_assoc << endl;
- }
-
-
- cout << " Block size (bytes): " << g_ip->line_sz << endl;
- cout << " Read/write Ports: " <<
- g_ip->num_rw_ports << endl;
- cout << " Read ports: " <<
- g_ip->num_rd_ports << endl;
- cout << " Write ports: " <<
- g_ip->num_wr_ports << endl;
- if (g_ip->fully_assoc|| g_ip->pure_cam)
- cout << " search ports: " <<
- g_ip->num_search_ports << endl;
- cout << " Technology size (nm): " <<
- g_ip->F_sz_nm << endl << endl;
-
- cout << " Access time (ns): " << fr->access_time*1e9 << endl;
- cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl;
- if (g_ip->data_arr_ram_cell_tech_type >= 4) {
- cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
- cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
- cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
- cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
- cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
- cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
- cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
- cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
- cout << " Refresh power (mW): " <<
- fr->data_array2->refresh_power*1e3 << endl;
- }
- else {
- if ((g_ip->fully_assoc|| g_ip->pure_cam))
- {
- cout << " Total dynamic associative search energy per access (nJ): " <<
- fr->power.searchOp.dynamic*1e9 << endl;
+
+
+ cout << " Block size (bytes): " << g_ip->line_sz << endl;
+ cout << " Read/write Ports: " <<
+ g_ip->num_rw_ports << endl;
+ cout << " Read ports: " <<
+ g_ip->num_rd_ports << endl;
+ cout << " Write ports: " <<
+ g_ip->num_wr_ports << endl;
+ if (g_ip->fully_assoc || g_ip->pure_cam)
+ cout << " search ports: " <<
+ g_ip->num_search_ports << endl;
+ cout << " Technology size (nm): " <<
+ g_ip->F_sz_nm << endl << endl;
+
+ cout << " Access time (ns): " << fr->access_time*1e9 << endl;
+ cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl;
+ if (g_ip->data_arr_ram_cell_tech_type >= 4) {
+ cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
+ cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
+ cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
+ cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
+ cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
+ cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
+ cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
+ cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
+ cout << " Refresh power (mW): " <<
+ fr->data_array2->refresh_power*1e3 << endl;
+ } else {
+ if ((g_ip->fully_assoc || g_ip->pure_cam)) {
+ cout << " Total dynamic associative search energy per access (nJ): " <<
+ fr->power.searchOp.dynamic*1e9 << endl;
// cout << " Total dynamic read energy per access (nJ): " <<
// fr->power.readOp.dynamic*1e9 << endl;
// cout << " Total dynamic write energy per access (nJ): " <<
// fr->power.writeOp.dynamic*1e9 << endl;
- }
+ }
// else
// {
- cout << " Total dynamic read energy per access (nJ): " <<
- fr->power.readOp.dynamic*1e9 << endl;
- cout << " Total dynamic write energy per access (nJ): " <<
- fr->power.writeOp.dynamic*1e9 << endl;
+ cout << " Total dynamic read energy per access (nJ): " <<
+ fr->power.readOp.dynamic*1e9 << endl;
+ cout << " Total dynamic write energy per access (nJ): " <<
+ fr->power.writeOp.dynamic*1e9 << endl;
// }
- cout << " Total leakage power of a bank"
- " (mW): " << fr->power.readOp.leakage*1e3 << endl;
- cout << " Total gate leakage power of a bank"
- " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
- }
-
- if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4)
- {
- }
- cout << " Cache height x width (mm): " <<
- fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
-
-
- cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl;
- cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
- cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
- cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
- cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
- cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
- cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
- cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
- cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
- cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
- cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
- }
-
- switch (fr->data_array2->wt) {
+ cout << " Total leakage power of a bank"
+ " (mW): " << fr->power.readOp.leakage*1e3 << endl;
+ cout << " Total gate leakage power of a bank"
+ " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
+ }
+
+ if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) {
+ }
+ cout << " Cache height x width (mm): " <<
+ fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
+
+
+ cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl;
+ cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
+ cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
+ cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
+ cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
+ cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
+
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
+ cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
+ cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
+ cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
+ cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
+ cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
+ }
+
+ switch (fr->data_array2->wt) {
case (0):
- cout << " Data array, H-tree wire type: Delay optimized global wires\n";
- break;
- case (1):
- cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
- break;
- case (2):
- cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
- break;
- case (3):
- cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
- break;
- case (4):
- cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
- break;
- case (5):
- cout << " Data array, wire type: Low swing wires\n";
- break;
- default:
- cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <<endl;
- exit(0);
- }
-
- if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) {
- switch (fr->tag_array2->wt) {
- case (0):
- cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
+ cout << " Data array, H-tree wire type: Delay optimized global wires\n";
break;
- case (1):
- cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
+ case (1):
+ cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
break;
- case (2):
- cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
+ case (2):
+ cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
break;
- case (3):
- cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
+ case (3):
+ cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
break;
- case (4):
- cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
+ case (4):
+ cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
break;
- case (5):
- cout << " Tag array, wire type: Low swing wires\n";
+ case (5):
+ cout << " Data array, wire type: Low swing wires\n";
break;
- default:
- cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <<endl;
- exit(-1);
+ default:
+ cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt << endl;
+ exit(0);
}
- }
-
- if (g_ip->print_detail)
- {
- //if(g_ip->fully_assoc) return;
-
- /* Delay stats */
- /* data array stats */
- cout << endl << "Time Components:" << endl << endl;
-
- cout << " Data side (with Output driver) (ns): " <<
- fr->data_array2->access_time/1e-9 << endl;
- cout << "\tH-tree input delay (ns): " <<
- fr->data_array2->delay_route_to_bank * 1e9 +
- fr->data_array2->delay_input_htree * 1e9 << endl;
-
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- {
- cout << "\tDecoder + wordline delay (ns): " <<
- fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->data_array2->delay_row_decoder * 1e9 << endl;
- }
- else
- {
- cout << "\tCAM search delay (ns): " <<
- fr->data_array2->delay_matchlines * 1e9 << endl;
+ if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
+ switch (fr->tag_array2->wt) {
+ case (0):
+ cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
+ break;
+ case (1):
+ cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
+ break;
+ case (2):
+ cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
+ break;
+ case (3):
+ cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
+ break;
+ case (4):
+ cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
+ break;
+ case (5):
+ cout << " Tag array, wire type: Low swing wires\n";
+ break;
+ default:
+ cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl;
+ exit(-1);
+ }
}
- cout << "\tBitline delay (ns): " <<
- fr->data_array2->delay_bitlines/1e-9 << endl;
+ if (g_ip->print_detail) {
+ /* Delay stats */
+ /* data array stats */
+ cout << endl << "Time Components:" << endl << endl;
+
+ cout << " Data side (with Output driver) (ns): " <<
+ fr->data_array2->access_time / 1e-9 << endl;
+
+ cout << "\tH-tree input delay (ns): " <<
+ fr->data_array2->delay_route_to_bank * 1e9 +
+ fr->data_array2->delay_input_htree * 1e9 << endl;
+
+ if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
+ cout << "\tDecoder + wordline delay (ns): " <<
+ fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
+ fr->data_array2->delay_row_decoder * 1e9 << endl;
+ } else {
+ cout << "\tCAM search delay (ns): " <<
+ fr->data_array2->delay_matchlines * 1e9 << endl;
+ }
+
+ cout << "\tBitline delay (ns): " <<
+ fr->data_array2->delay_bitlines / 1e-9 << endl;
- cout << "\tSense Amplifier delay (ns): " <<
- fr->data_array2->delay_sense_amp * 1e9 << endl;
+ cout << "\tSense Amplifier delay (ns): " <<
+ fr->data_array2->delay_sense_amp * 1e9 << endl;
- cout << "\tH-tree output delay (ns): " <<
- fr->data_array2->delay_subarray_output_driver * 1e9 +
- fr->data_array2->delay_dout_htree * 1e9 << endl;
+ cout << "\tH-tree output delay (ns): " <<
+ fr->data_array2->delay_subarray_output_driver * 1e9 +
+ fr->data_array2->delay_dout_htree * 1e9 << endl;
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- /* tag array stats */
- cout << endl << " Tag side (with Output driver) (ns): " <<
- fr->tag_array2->access_time/1e-9 << endl;
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ /* tag array stats */
+ cout << endl << " Tag side (with Output driver) (ns): " <<
+ fr->tag_array2->access_time / 1e-9 << endl;
- cout << "\tH-tree input delay (ns): " <<
- fr->tag_array2->delay_route_to_bank * 1e9 +
- fr->tag_array2->delay_input_htree * 1e9 << endl;
+ cout << "\tH-tree input delay (ns): " <<
+ fr->tag_array2->delay_route_to_bank * 1e9 +
+ fr->tag_array2->delay_input_htree * 1e9 << endl;
- cout << "\tDecoder + wordline delay (ns): " <<
- fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->tag_array2->delay_row_decoder * 1e9 << endl;
+ cout << "\tDecoder + wordline delay (ns): " <<
+ fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
+ fr->tag_array2->delay_row_decoder * 1e9 << endl;
- cout << "\tBitline delay (ns): " <<
- fr->tag_array2->delay_bitlines/1e-9 << endl;
+ cout << "\tBitline delay (ns): " <<
+ fr->tag_array2->delay_bitlines / 1e-9 << endl;
- cout << "\tSense Amplifier delay (ns): " <<
- fr->tag_array2->delay_sense_amp * 1e9 << endl;
+ cout << "\tSense Amplifier delay (ns): " <<
+ fr->tag_array2->delay_sense_amp * 1e9 << endl;
- cout << "\tComparator delay (ns): " <<
- fr->tag_array2->delay_comparator * 1e9 << endl;
+ cout << "\tComparator delay (ns): " <<
+ fr->tag_array2->delay_comparator * 1e9 << endl;
- cout << "\tH-tree output delay (ns): " <<
- fr->tag_array2->delay_subarray_output_driver * 1e9 +
- fr->tag_array2->delay_dout_htree * 1e9 << endl;
- }
+ cout << "\tH-tree output delay (ns): " <<
+ fr->tag_array2->delay_subarray_output_driver * 1e9 +
+ fr->tag_array2->delay_dout_htree * 1e9 << endl;
+ }
- /* Energy/Power stats */
- cout << endl << endl << "Power Components:" << endl << endl;
+ /* Energy/Power stats */
+ cout << endl << endl << "Power Components:" << endl << endl;
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- {
- cout << " Data array: Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal leakage read/write power of a bank (mW): " <<
- fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
+ cout << " Data array: Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal leakage read/write power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- fr->data_array2->power_data_output_htree.readOp.dynamic +
- fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- cout << "\tTotal leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+ cout << "\tTotal leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->data_array2->power_addr_input_htree.readOp.leakage +
fr->data_array2->power_data_output_htree.readOp.leakage +
- fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
+ fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3
+ << endl;
- cout << "\tTotal gate leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+ cout << "\tTotal gate leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->data_array2->power_addr_input_htree.readOp.gate_leakage +
fr->data_array2->power_data_output_htree.readOp.gate_leakage +
- fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
-
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
-
- cout << "\tBitlines precharge and equalization circuit (nJ): " <<
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
- }
+ fr->data_array2->power_routing_to_bank.readOp.gate_leakage) *
+ 1e3 << endl;
+
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+
+ cout << "\tBitlines precharge and equalization circuit (nJ): " <<
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+ }
- else if (g_ip->pure_cam)
- {
-
- cout << " CAM array:"<<endl;
- cout << " Total dynamic associative search energy/access (nJ): " <<
- fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "match key and data transfer) (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic +
- fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
- cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
- cout << "\tSearchlines (nJ): " <<
- fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tMatchlines (nJ): " <<
- fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
-
-
- cout <<endl<< " Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- fr->data_array2->power_data_output_htree.readOp.dynamic +
- fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
-
- cout << endl <<" Total leakage power of a bank (mW): " <<
- fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ else if (g_ip->pure_cam) {
+
+ cout << " CAM array:" << endl;
+ cout << " Total dynamic associative search energy/access (nJ): " <<
+ fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "match key and data transfer) (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic +
+ fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tSearchlines (nJ): " <<
+ fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tMatchlines (nJ): " <<
+ fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
+
+
+ cout << endl << " Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+
+ cout << endl << " Total leakage power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
+ } else {
+ cout << " Fully associative array:" << endl;
+ cout << " Total dynamic associative search energy/access (nJ): " <<
+ fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "match key and data transfer) (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic +
+ fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
+ (fr->data_array2->power_htree_in_search.searchOp.dynamic +
+ fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
+ cout << "\tSearchlines (nJ): " <<
+ fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tMatchlines (nJ): " <<
+ fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
+ cout << "\tData portion wordline (nJ): " <<
+ fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
+ cout << "\tData Bitlines (nJ): " <<
+ fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
+
+
+ cout << endl << " Total dynamic read energy/access (nJ): " <<
+ fr->data_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->data_array2->power_addr_input_htree.readOp.dynamic +
+ fr->data_array2->power_data_output_htree.readOp.dynamic +
+ fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+ cout << "\tOutput Htree inside bank Energy (nJ): " <<
+ fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
+ fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+
+ cout << endl << " Total leakage power of a bank (mW): " <<
+ fr->data_array2->power.readOp.leakage * 1e3 << endl;
}
- else
- {
- cout << " Fully associative array:"<<endl;
- cout << " Total dynamic associative search energy/access (nJ): " <<
- fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "match key and data transfer) (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic +
- fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
- cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
- cout << "\tSearchlines (nJ): " <<
- fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tMatchlines (nJ): " <<
- fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
- cout << "\tData portion wordline (nJ): " <<
- fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
- cout << "\tData Bitlines (nJ): " <<
- fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
-
-
- cout <<endl<< " Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- fr->data_array2->power_data_output_htree.readOp.dynamic +
- fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
-
- cout << endl <<" Total leakage power of a bank (mW): " <<
- fr->data_array2->power.readOp.leakage * 1e3 << endl;
- }
-
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
- fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
- cout << "\tTotal leakage read/write power of a bank (mW): " <<
- fr->tag_array2->power.readOp.leakage * 1e3 << endl;
- cout << "\tTotal energy in H-tree (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->tag_array2->power_addr_input_htree.readOp.dynamic +
- fr->tag_array2->power_data_output_htree.readOp.dynamic +
- fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
-
- cout << "\tTotal leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+
+
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
+ fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
+ cout << "\tTotal leakage read/write power of a bank (mW): " <<
+ fr->tag_array2->power.readOp.leakage * 1e3 << endl;
+ cout << "\tTotal energy in H-tree (that includes both "
+ "address and data transfer) (nJ): " <<
+ (fr->tag_array2->power_addr_input_htree.readOp.dynamic +
+ fr->tag_array2->power_data_output_htree.readOp.dynamic +
+ fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
+
+ cout << "\tTotal leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->tag_array2->power_addr_input_htree.readOp.leakage +
fr->tag_array2->power_data_output_htree.readOp.leakage +
- fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
+ fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3
+ << endl;
- cout << "\tTotal gate leakage power in H-tree (that includes both "
- "address and data network) ((mW)): " <<
+ cout << "\tTotal gate leakage power in H-tree (that includes both "
+ "address and data network) ((mW)): " <<
(fr->tag_array2->power_addr_input_htree.readOp.gate_leakage +
fr->tag_array2->power_data_output_htree.readOp.gate_leakage +
- fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
-
- cout << "\tOutput Htree inside a bank Energy (nJ): " <<
- fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- cout << "\tDecoder (nJ): " <<
- fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- cout << "\tWordline (nJ): " <<
- fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines precharge and equalization circuit (nJ): " <<
- fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
- cout << "\tBitlines (nJ): " <<
- fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
- cout << "\tSense amplifier energy (nJ): " <<
- fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- cout << "\tSub-array output driver (nJ): " <<
- fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
- }
+ fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) *
+ 1e3 << endl;
+
+ cout << "\tOutput Htree inside a bank Energy (nJ): " <<
+ fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
+ cout << "\tDecoder (nJ): " <<
+ fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
+ cout << "\tWordline (nJ): " <<
+ fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitline mux & associated drivers (nJ): " <<
+ fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amp mux & associated drivers (nJ): " <<
+ fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
+ fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines precharge and equalization circuit (nJ): " <<
+ fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
+ cout << "\tBitlines (nJ): " <<
+ fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
+ cout << "\tSense amplifier energy (nJ): " <<
+ fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
+ cout << "\tSub-array output driver (nJ): " <<
+ fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
+ }
- cout << endl << endl << "Area Components:" << endl << endl;
- /* Data array area stats */
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- else if (g_ip->pure_cam)
- cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- else
- cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- cout << "\tHeight (mm): " <<
- fr->data_array2->all_banks_height*1e-3 << endl;
- cout << "\tWidth (mm): " <<
- fr->data_array2->all_banks_width*1e-3 << endl;
- if (g_ip->print_detail) {
- cout << "\tArea efficiency (Memory cell area/Total area) - " <<
- fr->data_array2->area_efficiency << " %" << endl;
- cout << "\t\tMAT Height (mm): " <<
- fr->data_array2->mat_height*1e-3 << endl;
- cout << "\t\tMAT Length (mm): " <<
- fr->data_array2->mat_length*1e-3 << endl;
- cout << "\t\tSubarray Height (mm): " <<
- fr->data_array2->subarray_height*1e-3 << endl;
- cout << "\t\tSubarray Length (mm): " <<
- fr->data_array2->subarray_length*1e-3 << endl;
- }
+ cout << endl << endl << "Area Components:" << endl << endl;
+ /* Data array area stats */
+ if (!(g_ip->pure_cam || g_ip->fully_assoc))
+ cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ else if (g_ip->pure_cam)
+ cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ else
+ cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
+ cout << "\tHeight (mm): " <<
+ fr->data_array2->all_banks_height*1e-3 << endl;
+ cout << "\tWidth (mm): " <<
+ fr->data_array2->all_banks_width*1e-3 << endl;
+ if (g_ip->print_detail) {
+ cout << "\tArea efficiency (Memory cell area/Total area) - " <<
+ fr->data_array2->area_efficiency << " %" << endl;
+ cout << "\t\tMAT Height (mm): " <<
+ fr->data_array2->mat_height*1e-3 << endl;
+ cout << "\t\tMAT Length (mm): " <<
+ fr->data_array2->mat_length*1e-3 << endl;
+ cout << "\t\tSubarray Height (mm): " <<
+ fr->data_array2->subarray_height*1e-3 << endl;
+ cout << "\t\tSubarray Length (mm): " <<
+ fr->data_array2->subarray_length*1e-3 << endl;
+ }
- /* Tag array area stats */
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
- cout << "\tHeight (mm): " <<
- fr->tag_array2->all_banks_height*1e-3 << endl;
- cout << "\tWidth (mm): " <<
- fr->tag_array2->all_banks_width*1e-3 << endl;
- if (g_ip->print_detail)
- {
- cout << "\tArea efficiency (Memory cell area/Total area) - " <<
- fr->tag_array2->area_efficiency << " %" << endl;
- cout << "\t\tMAT Height (mm): " <<
- fr->tag_array2->mat_height*1e-3 << endl;
- cout << "\t\tMAT Length (mm): " <<
- fr->tag_array2->mat_length*1e-3 << endl;
- cout << "\t\tSubarray Height (mm): " <<
- fr->tag_array2->subarray_height*1e-3 << endl;
- cout << "\t\tSubarray Length (mm): " <<
- fr->tag_array2->subarray_length*1e-3 << endl;
- }
+ /* Tag array area stats */
+ if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
+ !g_ip->is_main_mem) {
+ cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
+ cout << "\tHeight (mm): " <<
+ fr->tag_array2->all_banks_height*1e-3 << endl;
+ cout << "\tWidth (mm): " <<
+ fr->tag_array2->all_banks_width*1e-3 << endl;
+ if (g_ip->print_detail) {
+ cout << "\tArea efficiency (Memory cell area/Total area) - " <<
+ fr->tag_array2->area_efficiency << " %" << endl;
+ cout << "\t\tMAT Height (mm): " <<
+ fr->tag_array2->mat_height*1e-3 << endl;
+ cout << "\t\tMAT Length (mm): " <<
+ fr->tag_array2->mat_length*1e-3 << endl;
+ cout << "\t\tSubarray Height (mm): " <<
+ fr->tag_array2->subarray_height*1e-3 << endl;
+ cout << "\t\tSubarray Length (mm): " <<
+ fr->tag_array2->subarray_length*1e-3 << endl;
+ }
+ }
+ Wire wpr;
+ wpr.print_wire();
}
- Wire wpr;
- wpr.print_wire();
-
- //cout << "FO4 = " << g_tp.FO4 << endl;
- }
}
//McPAT's plain interface, please keep !!!
-uca_org_t cacti_interface(InputParameter * const local_interface)
-{
-// g_ip = new InputParameter();
- //g_ip->add_ecc_b_ = true;
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip = local_interface;
-
-
-// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
-// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
-// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
-// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-//
-// g_ip->ic_proj_type = interconnect_projection_type_in;
-// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
-// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
-// g_ip->burst_len = BURST_LENGTH_in;
-// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
-// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-//
-// g_ip->cache_sz = cache_size;
-// g_ip->line_sz = line_size;
-// g_ip->assoc = associativity;
-// g_ip->nbanks = banks;
-// g_ip->out_w = output_width;
-// g_ip->specific_tag = specific_tag;
-// if (tag_width == 0) {
-// g_ip->tag_w = 42;
-// }
-// else {
-// g_ip->tag_w = tag_width;
-// }
-//
-// g_ip->access_mode = access_mode;
-// g_ip->delay_wt = obj_func_delay;
-// g_ip->dynamic_power_wt = obj_func_dynamic_power;
-// g_ip->leakage_power_wt = obj_func_leakage_power;
-// g_ip->area_wt = obj_func_area;
-// g_ip->cycle_time_wt = obj_func_cycle_time;
-// g_ip->delay_dev = dev_func_delay;
-// g_ip->dynamic_power_dev = dev_func_dynamic_power;
-// g_ip->leakage_power_dev = dev_func_leakage_power;
-// g_ip->area_dev = dev_func_area;
-// g_ip->cycle_time_dev = dev_func_cycle_time;
-// g_ip->temp = temp;
-//
-// g_ip->F_sz_nm = tech_node;
-// g_ip->F_sz_um = tech_node / 1000;
-// g_ip->is_main_mem = (main_mem != 0) ? true : false;
-// g_ip->is_cache = (cache ==1) ? true : false;
-// g_ip->pure_ram = (cache ==0) ? true : false;
-// g_ip->pure_cam = (cache ==2) ? true : false;
-// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
-// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-//
-// g_ip->num_rw_ports = rw_ports;
-// g_ip->num_rd_ports = excl_read_ports;
-// g_ip->num_wr_ports = excl_write_ports;
-// g_ip->num_se_rd_ports = single_ended_read_ports;
-// g_ip->num_search_ports = search_ports;
-//
-// g_ip->print_detail = 1;
-// g_ip->nuca = 0;
-// g_ip->is_cache=true;
-//
-// if (force_wiretype == 0)
-// {
-// g_ip->wt = Global;
-// g_ip->force_wiretype = false;
-// }
-// else
-// { g_ip->force_wiretype = true;
-// if (wiretype==10) {
-// g_ip->wt = Global_10;
-// }
-// if (wiretype==20) {
-// g_ip->wt = Global_20;
-// }
-// if (wiretype==30) {
-// g_ip->wt = Global_30;
-// }
-// if (wiretype==5) {
-// g_ip->wt = Global_5;
-// }
-// if (wiretype==0) {
-// g_ip->wt = Low_swing;
-// }
-// }
-// //g_ip->wt = Global_5;
-// if (force_config == 0)
-// {
-// g_ip->force_cache_config = false;
-// }
-// else
-// {
-// g_ip->force_cache_config = true;
-// g_ip->ndbl=ndbl;
-// g_ip->ndwl=ndwl;
-// g_ip->nspd=nspd;
-// g_ip->ndcm=ndcm;
-// g_ip->ndsam1=ndsam1;
-// g_ip->ndsam2=ndsam2;
-//
-//
-// }
-//
-// if (ecc==0){
-// g_ip->add_ecc_b_=false;
-// }
-// else
-// {
-// g_ip->add_ecc_b_=true;
-// }
-
+uca_org_t cacti_interface(InputParameter * const local_interface) {
+ uca_org_t fin_res;
+ fin_res.valid = false;
- g_ip->error_checking();
-
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
+ g_ip = local_interface;
- solve(&fin_res);
+ if (!g_ip->error_checking()) {
+ exit(0);
+ }
-// g_ip->display_ip();
-// output_UCA(&fin_res);
-// output_data_csv(fin_res);
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
- // delete (g_ip);
+ solve(&fin_res);
- return fin_res;
+ return fin_res;
}
//McPAT's plain interface, please keep !!!
-uca_org_t init_interface(InputParameter* const local_interface)
-{
- // g_ip = new InputParameter();
- //g_ip->add_ecc_b_ = true;
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip = local_interface;
+uca_org_t init_interface(InputParameter* const local_interface,
+ const string &name) {
+ uca_org_t fin_res;
+ fin_res.valid = false;
+ g_ip = local_interface;
-// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
-// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
-// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
-// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-//
-// g_ip->ic_proj_type = interconnect_projection_type_in;
-// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
-// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
-// g_ip->burst_len = BURST_LENGTH_in;
-// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
-// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-//
-// g_ip->cache_sz = cache_size;
-// g_ip->line_sz = line_size;
-// g_ip->assoc = associativity;
-// g_ip->nbanks = banks;
-// g_ip->out_w = output_width;
-// g_ip->specific_tag = specific_tag;
-// if (tag_width == 0) {
-// g_ip->tag_w = 42;
-// }
-// else {
-// g_ip->tag_w = tag_width;
-// }
-//
-// g_ip->access_mode = access_mode;
-// g_ip->delay_wt = obj_func_delay;
-// g_ip->dynamic_power_wt = obj_func_dynamic_power;
-// g_ip->leakage_power_wt = obj_func_leakage_power;
-// g_ip->area_wt = obj_func_area;
-// g_ip->cycle_time_wt = obj_func_cycle_time;
-// g_ip->delay_dev = dev_func_delay;
-// g_ip->dynamic_power_dev = dev_func_dynamic_power;
-// g_ip->leakage_power_dev = dev_func_leakage_power;
-// g_ip->area_dev = dev_func_area;
-// g_ip->cycle_time_dev = dev_func_cycle_time;
-// g_ip->temp = temp;
-//
-// g_ip->F_sz_nm = tech_node;
-// g_ip->F_sz_um = tech_node / 1000;
-// g_ip->is_main_mem = (main_mem != 0) ? true : false;
-// g_ip->is_cache = (cache ==1) ? true : false;
-// g_ip->pure_ram = (cache ==0) ? true : false;
-// g_ip->pure_cam = (cache ==2) ? true : false;
-// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
-// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-//
-// g_ip->num_rw_ports = rw_ports;
-// g_ip->num_rd_ports = excl_read_ports;
-// g_ip->num_wr_ports = excl_write_ports;
-// g_ip->num_se_rd_ports = single_ended_read_ports;
-// g_ip->num_search_ports = search_ports;
-//
-// g_ip->print_detail = 1;
-// g_ip->nuca = 0;
-//
-// if (force_wiretype == 0)
-// {
-// g_ip->wt = Global;
-// g_ip->force_wiretype = false;
-// }
-// else
-// { g_ip->force_wiretype = true;
-// if (wiretype==10) {
-// g_ip->wt = Global_10;
-// }
-// if (wiretype==20) {
-// g_ip->wt = Global_20;
-// }
-// if (wiretype==30) {
-// g_ip->wt = Global_30;
-// }
-// if (wiretype==5) {
-// g_ip->wt = Global_5;
-// }
-// if (wiretype==0) {
-// g_ip->wt = Low_swing;
-// }
-// }
-// //g_ip->wt = Global_5;
-// if (force_config == 0)
-// {
-// g_ip->force_cache_config = false;
-// }
-// else
-// {
-// g_ip->force_cache_config = true;
-// g_ip->ndbl=ndbl;
-// g_ip->ndwl=ndwl;
-// g_ip->nspd=nspd;
-// g_ip->ndcm=ndcm;
-// g_ip->ndsam1=ndsam1;
-// g_ip->ndsam2=ndsam2;
-//
-//
-// }
-//
-// if (ecc==0){
-// g_ip->add_ecc_b_=false;
-// }
-// else
-// {
-// g_ip->add_ecc_b_=true;
-// }
-
-
- g_ip->error_checking();
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
- //solve(&fin_res);
- //g_ip->display_ip();
-
- //solve(&fin_res);
- //output_UCA(&fin_res);
- //output_data_csv(fin_res);
- // delete (g_ip);
+ if (!g_ip->error_checking(name)) {
+ exit(0);
+ }
- return fin_res;
+ init_tech_params(g_ip->F_sz_um, false);
+ Wire winit; // Do not delete this line. It initializes wires.
+ return fin_res;
}
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res)
diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc
index ef98107c7..447996053 100755..100644
--- a/ext/mcpat/cacti/mat.cc
+++ b/ext/mcpat/cacti/mat.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,371 +37,369 @@
#include "mat.h"
Mat::Mat(const DynamicParameter & dyn_p)
- :dp(dyn_p),
- power_subarray_out_drv(),
- delay_fa_tag(0), delay_cam(0),
- delay_before_decoder(0), delay_bitline(0),
- delay_wl_reset(0), delay_bl_restore(0),
- delay_searchline(0), delay_matchchline(0),
- delay_cam_sl_restore(0), delay_cam_ml_reset(0),
- delay_fa_ram_wl(0),delay_hit_miss_reset(0),
- delay_hit_miss(0),
- subarray(dp, dp.fully_assoc),
- power_bitline(), per_bitline_read_energy(0),
- deg_bl_muxing(dp.deg_bl_muxing),
- num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
- delay_writeback(0),
- cell(subarray.cell), cam_cell(subarray.cam_cell),
- is_dram(dyn_p.is_dram),
- pure_cam(dyn_p.pure_cam),
- num_mats(dp.num_mats),
- power_sa(), delay_sa(0),
- leak_power_sense_amps_closed_page_state(0),
- leak_power_sense_amps_open_page_state(0),
- delay_subarray_out_drv(0),
- delay_comparator(0), power_comparator(),
- num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
- num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
- num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
-{
- assert(num_subarrays_per_mat <= 4);
- assert(num_subarrays_per_row <= 2);
- is_fa = (dp.fully_assoc) ? true : false;
- camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
-
- if (is_fa || pure_cam)
- num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
-
- if (dp.use_inp_params == 1) {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
-
- }
-
- double number_sa_subarray;
-
- if (!is_fa && !pure_cam)
- {
- number_sa_subarray = subarray.num_cols / deg_bl_muxing;
- }
- else if (is_fa && !pure_cam)
- {
- number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
- }
-
- else
- {
- number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
- }
-
- int num_dec_signals = subarray.num_rows;
- double C_ld_bit_mux_dec_out = 0;
- double C_ld_sa_mux_lev_1_dec_out = 0;
- double C_ld_sa_mux_lev_2_dec_out = 0;
- double R_wire_wl_drv_out;
-
- if (!is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ : dp(dyn_p),
+ power_subarray_out_drv(),
+ delay_fa_tag(0), delay_cam(0),
+ delay_before_decoder(0), delay_bitline(0),
+ delay_wl_reset(0), delay_bl_restore(0),
+ delay_searchline(0), delay_matchchline(0),
+ delay_cam_sl_restore(0), delay_cam_ml_reset(0),
+ delay_fa_ram_wl(0), delay_hit_miss_reset(0),
+ delay_hit_miss(0),
+ subarray(dp, dp.fully_assoc),
+ power_bitline(), per_bitline_read_energy(0),
+ deg_bl_muxing(dp.deg_bl_muxing),
+ num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
+ delay_writeback(0),
+ cell(subarray.cell), cam_cell(subarray.cam_cell),
+ is_dram(dyn_p.is_dram),
+ pure_cam(dyn_p.pure_cam),
+ num_mats(dp.num_mats),
+ power_sa(), delay_sa(0),
+ leak_power_sense_amps_closed_page_state(0),
+ leak_power_sense_amps_open_page_state(0),
+ delay_subarray_out_drv(0),
+ delay_comparator(0), power_comparator(),
+ num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
+ num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
+ num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
+ assert(num_subarrays_per_mat <= 4);
+ assert(num_subarrays_per_row <= 2);
+ is_fa = (dp.fully_assoc) ? true : false;
+ camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
+
+ if (is_fa || pure_cam) {
+ num_subarrays_per_row = num_subarrays_per_mat > 2 ?
+ num_subarrays_per_mat / 2 : num_subarrays_per_mat;
}
- else if (is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+
+ if (dp.use_inp_params == 1) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+
+ }
+
+ double number_sa_subarray;
+
+ if (!is_fa && !pure_cam) {
+ number_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ } else if (is_fa && !pure_cam) {
+ number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
+ }
+
+ else {
+ number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
}
- else
- {
+
+ int num_dec_signals = subarray.num_rows;
+ double C_ld_bit_mux_dec_out = 0;
+ double C_ld_sa_mux_lev_1_dec_out = 0;
+ double C_ld_sa_mux_lev_2_dec_out = 0;
+ double R_wire_wl_drv_out;
+
+ if (!is_fa && !pure_cam) {
+ R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ } else if (is_fa && !pure_cam) {
+ R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+ } else {
R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
}
- double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
- double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
-
- if (deg_bl_muxing > 1)
- {
- C_ld_bit_mux_dec_out =
- (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (dp.Ndsam_lev_1 > 1)
- {
- C_ld_sa_mux_lev_1_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- C_ld_sa_mux_lev_2_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (num_subarrays_per_row >= 2)
- {
- // wire heads for both right and left side of a mat, so half the resistance
- R_wire_bit_mux_dec_out /= 2.0;
- R_wire_sa_mux_dec_out /= 2.0;
- }
-
-
- row_dec = new Decoder(
- num_dec_signals,
- false,
- subarray.C_wl,
- R_wire_wl_drv_out,
- false/*is_fa*/,
- is_dram,
- true,
- camFlag? cam_cell:cell);
+ double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
+ double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
+
+ if (deg_bl_muxing > 1) {
+ C_ld_bit_mux_dec_out =
+ (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
+ gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+
+ if (dp.Ndsam_lev_1 > 1) {
+ C_ld_sa_mux_lev_1_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
+ gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+ if (dp.Ndsam_lev_2 > 1) {
+ C_ld_sa_mux_lev_2_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray /
+ (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
+ gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+
+ if (num_subarrays_per_row >= 2) {
+ // wire heads for both right and left side of a mat, so half the resistance
+ R_wire_bit_mux_dec_out /= 2.0;
+ R_wire_sa_mux_dec_out /= 2.0;
+ }
+
+
+ row_dec = new Decoder(
+ num_dec_signals,
+ false,
+ subarray.C_wl,
+ R_wire_wl_drv_out,
+ false/*is_fa*/,
+ is_dram,
+ true,
+ camFlag ? cam_cell : cell);
// if (is_fa && (!dp.is_tag))
// {
// row_dec->exist = true;
// }
- bit_mux_dec = new Decoder(
- deg_bl_muxing,// This number is 1 for FA or CAM
- false,
- C_ld_bit_mux_dec_out,
- R_wire_bit_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
- sa_mux_lev_1_dec = new Decoder(
- dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
- dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
- C_ld_sa_mux_lev_1_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
- sa_mux_lev_2_dec = new Decoder(
- dp.Ndsam_lev_2, // This number is 1 for FA or CAM
- false,
- C_ld_sa_mux_lev_2_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
-
- double C_wire_predec_blk_out;
- double R_wire_predec_blk_out;
-
- if (!is_fa && !pure_cam)
- {
+ bit_mux_dec = new Decoder(
+ deg_bl_muxing,// This number is 1 for FA or CAM
+ false,
+ C_ld_bit_mux_dec_out,
+ R_wire_bit_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+ sa_mux_lev_1_dec = new Decoder(
+ dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
+ dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
+ C_ld_sa_mux_lev_1_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+ sa_mux_lev_2_dec = new Decoder(
+ dp.Ndsam_lev_2, // This number is 1 for FA or CAM
+ false,
+ C_ld_sa_mux_lev_2_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+
+ double C_wire_predec_blk_out;
+ double R_wire_predec_blk_out;
+
+ if (!is_fa && !pure_cam) {
+
+ C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
+ R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
+
+ } else { //for pre-decode block's load is same for both FA and CAM
+ C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
+ R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
+ }
- C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
- R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
- }
- else //for pre-decode block's load is same for both FA and CAM
- {
- C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
- R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
- }
-
-
- if (is_fa||pure_cam)
- num_dec_signals += _log2(num_subarrays_per_mat);
-
- PredecBlk * r_predec_blk1 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- true);
- PredecBlk * r_predec_blk2 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- false);
- PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
- PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
- dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
- dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
-
- PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
- PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
- way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
- dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
-
- r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
- b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
- sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
- sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
-
- subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
-
- if (is_fa || pure_cam)
-
- { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
- driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
- cam_bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- if (!pure_cam)
- {
- //This is only used for fully asso not pure CAM
- driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- }
-
- else
- {
- driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
- double w_row_decoder = area_row_decoder / subarray.area.get_h();
-
- double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
- compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
-
- double h_subarray_out_drv = subarray_out_wire->area.get_area() *
- (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
-
-
- h_subarray_out_drv *= (RWP + ERP + SCHP);
-
- double h_comparators = 0.0;
- double w_row_predecode_output_wires = 0.0;
- double h_bit_mux_dec_out_wires = 0.0;
- double h_senseamp_mux_dec_out_wires = 0.0;
-
- if ((!is_fa)&&(dp.is_tag))
- {
- //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
- h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
- h_comparators *= (RWP + ERP);
- }
+ if (is_fa || pure_cam)
+ num_dec_signals += _log2(num_subarrays_per_mat);
+
+ PredecBlk * r_predec_blk1 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ true);
+ PredecBlk * r_predec_blk2 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ false);
+ PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
+ dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
+ dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
+
+ PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
+ PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
+ way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
+ dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
+
+ r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
+ b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
+ sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
+ sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
+
+ subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+
+ if (is_fa || pure_cam)
+
+ { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
+ driver_c_gate_load = (subarray.num_cols_fa_cam ) *
+ gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+ is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+ g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+ g_tp.wire_outside_mat.R_per_um;
+ cam_bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ if (!pure_cam) {
+ //This is only used for fully asso not pure CAM
+ driver_c_gate_load = (subarray.num_cols_fa_ram ) *
+ gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+ is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
+ g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
+ g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ }
+
+ else {
+ driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
+ double w_row_decoder = area_row_decoder / subarray.area.get_h();
+
+ double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
+ compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
+
+ double h_subarray_out_drv = subarray_out_wire->area.get_area() *
+ (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
+
+
+ h_subarray_out_drv *= (RWP + ERP + SCHP);
+
+ double h_comparators = 0.0;
+ double w_row_predecode_output_wires = 0.0;
+ double h_bit_mux_dec_out_wires = 0.0;
+ double h_senseamp_mux_dec_out_wires = 0.0;
+
+ if ((!is_fa) && (dp.is_tag)) {
+ //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
+ h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
+ h_comparators *= (RWP + ERP);
+ }
int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
-
-
- double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
- (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
- h_subarray_out_drv + h_comparators);
-
- double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
-
- if (deg_bl_muxing > 1)
- {
- h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_1 > 1)
- {
- h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
-
- double h_addr_datain_wires;
- if (!g_ip->ver_htree_wires_over_array)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
- (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+
+ double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
+ (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
+ h_subarray_out_drv + h_comparators);
+
+ double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
+
+ if (deg_bl_muxing > 1) {
+ h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_1 > 1) {
+ h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_2 > 1) {
+ h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+
+ double h_addr_datain_wires;
+ if (!g_ip->ver_htree_wires_over_array) {
+ h_addr_datain_wires = (dp.number_addr_bits_mat +
+ dp.number_way_select_signals_mat +
+ (dp.num_di_b_mat + dp.num_do_b_mat) /
+ num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+ if (is_fa || pure_cam) {
+ h_addr_datain_wires =
+ (dp.number_addr_bits_mat +
+ dp.number_way_select_signals_mat + //TODO: revisit
+ (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
+ (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
+ g_tp.wire_inside_mat.pitch * SCHP;
+ }
+ //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
+ //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
+ h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
+ h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
+ h_addr_datain_wires +
+ h_bit_mux_dec_out_wires +
+ h_senseamp_mux_dec_out_wires;
- if (is_fa || pure_cam)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
- (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
- (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
}
- //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
- //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
- h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
- h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
- h_addr_datain_wires +
- h_bit_mux_dec_out_wires +
- h_senseamp_mux_dec_out_wires;
-
- }
-
- // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
- double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
- b_mux_predec_blk_drv1->area.get_area() +
- sa_mux_lev_1_predec_blk_drv1->area.get_area() +
- sa_mux_lev_2_predec_blk_drv1->area.get_area() +
- way_sel_drv1->area.get_area() +
- r_predec_blk_drv2->area.get_area() +
- b_mux_predec_blk_drv2->area.get_area() +
- sa_mux_lev_1_predec_blk_drv2->area.get_area() +
- sa_mux_lev_2_predec_blk_drv2->area.get_area() +
- r_predec_blk1->area.get_area() +
- b_mux_predec_blk1->area.get_area() +
- sa_mux_lev_1_predec_blk1->area.get_area() +
- sa_mux_lev_2_predec_blk1->area.get_area() +
- r_predec_blk2->area.get_area() +
- b_mux_predec_blk2->area.get_area() +
- sa_mux_lev_1_predec_blk2->area.get_area() +
- sa_mux_lev_2_predec_blk2->area.get_area() +
- bit_mux_dec->area.get_area() +
- sa_mux_lev_1_dec->area.get_area() +
- sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
-
- double area_efficiency_mat;
+
+ // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
+ double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
+ b_mux_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv1->area.get_area() +
+ way_sel_drv1->area.get_area() +
+ r_predec_blk_drv2->area.get_area() +
+ b_mux_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv2->area.get_area() +
+ r_predec_blk1->area.get_area() +
+ b_mux_predec_blk1->area.get_area() +
+ sa_mux_lev_1_predec_blk1->area.get_area() +
+ sa_mux_lev_2_predec_blk1->area.get_area() +
+ r_predec_blk2->area.get_area() +
+ b_mux_predec_blk2->area.get_area() +
+ sa_mux_lev_1_predec_blk2->area.get_area() +
+ sa_mux_lev_2_predec_blk2->area.get_area() +
+ bit_mux_dec->area.get_area() +
+ sa_mux_lev_1_dec->area.get_area() +
+ sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
+
+ double area_efficiency_mat;
// if (!is_fa)
// {
- assert(num_subarrays_per_mat/num_subarrays_per_row>0);
- area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
+ assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
+ area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
+ subarray.area.h + h_non_cell_area;
area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
- area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
- area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
+ area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
+ area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
+ 100.0 / area.get_area();
// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
// cout<<"h_comparators"<<h_comparators<<endl;
@@ -413,8 +412,8 @@ Mat::Mat(const DynamicParameter & dyn_p)
// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
- assert(area.h>0);
- assert(area.w>0);
+ assert(area.h > 0);
+ assert(area.w > 0);
// }
// else
// {
@@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p)
// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
// }
- }
-
-
-
-Mat::~Mat()
-{
- delete row_dec;
- delete bit_mux_dec;
- delete sa_mux_lev_1_dec;
- delete sa_mux_lev_2_dec;
-
- delete r_predec->blk1;
- delete r_predec->blk2;
- delete b_mux_predec->blk1;
- delete b_mux_predec->blk2;
- delete sa_mux_lev_1_predec->blk1;
- delete sa_mux_lev_1_predec->blk2;
- delete sa_mux_lev_2_predec->blk1;
- delete sa_mux_lev_2_predec->blk2;
- delete dummy_way_sel_predec_blk1;
- delete dummy_way_sel_predec_blk2;
-
- delete r_predec->drv1;
- delete r_predec->drv2;
- delete b_mux_predec->drv1;
- delete b_mux_predec->drv2;
- delete sa_mux_lev_1_predec->drv1;
- delete sa_mux_lev_1_predec->drv2;
- delete sa_mux_lev_2_predec->drv1;
- delete sa_mux_lev_2_predec->drv2;
- delete way_sel_drv1;
- delete dummy_way_sel_predec_blk_drv2;
-
- delete r_predec;
- delete b_mux_predec;
- delete sa_mux_lev_1_predec;
- delete sa_mux_lev_2_predec;
-
- delete subarray_out_wire;
- if (!pure_cam)
- delete bl_precharge_eq_drv;
-
- if (is_fa || pure_cam)
- {
- delete sl_precharge_eq_drv ;
- delete sl_data_drv ;
- delete cam_bl_precharge_eq_drv;
- delete ml_precharge_drv;
- delete ml_to_ram_wl_drv;
- }
}
-double Mat::compute_delays(double inrisetime)
-{
- int k;
- double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
- double outrisetime_search, outrisetime, row_dec_outrisetime;
- // delay calculation for tags of fully associative cache
- if (is_fa || pure_cam)
- {
- //Compute search access time
- outrisetime_search = compute_cam_delay(inrisetime);
- if (is_fa)
- {
- bl_precharge_eq_drv->compute_delay(0);
- k = ml_to_ram_wl_drv->number_gates - 1;
- rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
- C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
- tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
-
- R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
- R_bl = subarray.num_rows * r_b_metal;
- C_bl = subarray.C_bl;
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
-
- outrisetime_search = compute_bitline_delay(outrisetime_search);
- outrisetime_search = compute_sa_delay(outrisetime_search);
- }
- outrisetime_search = compute_subarray_out_drv(outrisetime_search);
- subarray_out_wire->set_in_rise_time(outrisetime_search);
- outrisetime_search = subarray_out_wire->signal_rise_time();
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
-
- //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- if (pure_cam)
- {
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- }
- return outrisetime_search;
+Mat::~Mat() {
+ delete row_dec;
+ delete bit_mux_dec;
+ delete sa_mux_lev_1_dec;
+ delete sa_mux_lev_2_dec;
+
+ delete r_predec->blk1;
+ delete r_predec->blk2;
+ delete b_mux_predec->blk1;
+ delete b_mux_predec->blk2;
+ delete sa_mux_lev_1_predec->blk1;
+ delete sa_mux_lev_1_predec->blk2;
+ delete sa_mux_lev_2_predec->blk1;
+ delete sa_mux_lev_2_predec->blk2;
+ delete dummy_way_sel_predec_blk1;
+ delete dummy_way_sel_predec_blk2;
+
+ delete r_predec->drv1;
+ delete r_predec->drv2;
+ delete b_mux_predec->drv1;
+ delete b_mux_predec->drv2;
+ delete sa_mux_lev_1_predec->drv1;
+ delete sa_mux_lev_1_predec->drv2;
+ delete sa_mux_lev_2_predec->drv1;
+ delete sa_mux_lev_2_predec->drv2;
+ delete way_sel_drv1;
+ delete dummy_way_sel_predec_blk_drv2;
+
+ delete r_predec;
+ delete b_mux_predec;
+ delete sa_mux_lev_1_predec;
+ delete sa_mux_lev_2_predec;
+
+ delete subarray_out_wire;
+ if (!pure_cam)
+ delete bl_precharge_eq_drv;
+
+ if (is_fa || pure_cam) {
+ delete sl_precharge_eq_drv ;
+ delete sl_data_drv ;
+ delete cam_bl_precharge_eq_drv;
+ delete ml_precharge_drv;
+ delete ml_to_ram_wl_drv;
+ }
+}
+
+
+
+double Mat::compute_delays(double inrisetime) {
+ int k;
+ double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
+ double outrisetime_search, outrisetime, row_dec_outrisetime;
+ // delay calculation for tags of fully associative cache
+ if (is_fa || pure_cam) {
+ //Compute search access time
+ outrisetime_search = compute_cam_delay(inrisetime);
+ if (is_fa) {
+ bl_precharge_eq_drv->compute_delay(0);
+ k = ml_to_ram_wl_drv->number_gates - 1;
+ rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
+ C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
+ cell.h, is_dram, false, true) +
+ drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
+ is_dram, false, true);
+ C_ld = ml_to_ram_wl_drv->c_gate_load +
+ ml_to_ram_wl_drv->c_wire_load;
+ tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+
+ R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
+ R_bl = subarray.num_rows * r_b_metal;
+ C_bl = subarray.C_bl;
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+ (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+
+ outrisetime_search = compute_bitline_delay(outrisetime_search);
+ outrisetime_search = compute_sa_delay(outrisetime_search);
+ }
+ outrisetime_search = compute_subarray_out_drv(outrisetime_search);
+ subarray_out_wire->set_in_rise_time(outrisetime_search);
+ outrisetime_search = subarray_out_wire->signal_rise_time();
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+
+ //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ if (pure_cam) {
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ }
+ return outrisetime_search;
+ } else {
+ bl_precharge_eq_drv->compute_delay(0);
+ if (row_dec->exist == true) {
+ int k = row_dec->num_gates - 1;
+ double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
+ // TODO: this 4*cell.h number must be revisited
+ double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
+ cell.h, is_dram, false, true) +
+ drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
+ false, true);
+ double C_ld = row_dec->C_ld_dec_out;
+ double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+ }
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * r_b_metal;
+ double C_bl = subarray.C_bl;
+
+ if (is_dram) {
+ delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ } else {
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+ (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ }
+ }
+
+
+
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ outrisetime = compute_subarray_out_drv(outrisetime);
+ subarray_out_wire->set_in_rise_time(outrisetime);
+ outrisetime = subarray_out_wire->signal_rise_time();
+
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+ if (dp.is_tag == true && dp.fully_assoc == false) {
+ compute_comparator_delay(0);
}
- else
- {
- bl_precharge_eq_drv->compute_delay(0);
- if (row_dec->exist == true)
- {
- int k = row_dec->num_gates - 1;
- double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
- // TODO: this 4*cell.h number must be revisited
- double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- double C_ld = row_dec->C_ld_dec_out;
- double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
- }
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * r_b_metal;
- double C_bl = subarray.C_bl;
-
- if (is_dram)
- {
- delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- else
- {
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- }
-
-
-
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- outrisetime = compute_subarray_out_drv(outrisetime);
- subarray_out_wire->set_in_rise_time(outrisetime);
- outrisetime = subarray_out_wire->signal_rise_time();
-
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
- if (dp.is_tag == true && dp.fully_assoc == false)
- {
- compute_comparator_delay(0);
- }
-
- if (row_dec->exist == false)
- {
- delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
+
+ if (row_dec->exist == false) {
+ delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
}
- return outrisetime;
+ return outrisetime;
}
-double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
-{
-
- double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
- compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
-
- if (deg_bl_muxing > 1)
- {
- height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
- // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
- }
-
- height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
-
- if (dp.Ndsam_lev_1 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
- }
-
- if (dp.Ndsam_lev_2 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
-
- // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
- height += 2 * compute_tr_width_after_folding(
- pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- }
-
- // TODO: this should be uncommented...
- /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
- {
- //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
- double width_write_driver_write_mux = width_write_driver_or_write_mux();
- double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
- cell.w *
- // deg_bl_muxing *
- dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
- height += height_write_driver_write_mux;
- }*/
-
- return height;
+double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
+
+ double height =
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
+ camFlag ? cam_cell.w :
+ cell.w / (2 * (RWP + ERP + SCHP))) +
+ // precharge circuitry
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
+ camFlag ? cam_cell.w :
+ cell.w / (RWP + ERP + SCHP));
+
+ if (deg_bl_muxing > 1) {
+ // col mux tr height
+ height +=
+ compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
+ cell.w / (2 * (RWP + ERP)));
+ // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
+ }
+
+ height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
+
+ if (dp.Ndsam_lev_1 > 1) {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+ }
+
+ if (dp.Ndsam_lev_2 > 1) {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+
+ // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
+ height += 2 * compute_tr_width_after_folding(
+ pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ }
+
+ // TODO: this should be uncommented...
+ /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
+ {
+ //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
+ double width_write_driver_write_mux = width_write_driver_or_write_mux();
+ double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
+ cell.w *
+ // deg_bl_muxing *
+ dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
+ height += height_write_driver_write_mux;
+ }*/
+
+ return height;
}
-double Mat::compute_cam_delay(double inrisetime)
-{
+double Mat::compute_cam_delay(double inrisetime) {
- double out_time_ramp, this_delay;
- double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
+ double out_time_ramp, this_delay;
+ double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
- double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
+ double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
- double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
- int Htagbits;
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
- //double searchline_precharge_time;
-
- double leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double leak_power_SCHP_port_sram_cell = 0;
- double leak_comparator_cam_cell =0;
-
- double gate_leak_comparator_cam_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_SCHP_port_sram_cell = 0;
-
- c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
- c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
- r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
- r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
-
- dynSearchEng = 0.0;
- delay_matchchline = 0.0;
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
- bool linear_scaling = false;
-
- if (linear_scaling)
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
- }
- else
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
- Wdummyn = g_tp.cam.cell_nmos_w;
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- }
-
- Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
-
- /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
- search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
- From the driver(am and an) to the comparators in all the rows including the dummy row,
- Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
-
- //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
- //Searchline precharge routes horizontally
- driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
-
- sl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
- //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
- sl_data_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- sl_precharge_eq_drv->compute_delay(0);
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
- double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
- double R_bl = (subarray.num_rows + 1) * r_b_metal;
- double C_bl = subarray.C_bl_cam;
- delay_cam_sl_restore = sl_precharge_eq_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
- out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
-
- //matchline ops delay
- delay_matchchline += sl_data_drv->delay;
-
- /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
- //matchline delay, matchline power, matchline_reset for cycle time computation,
-
- ////matchline precharge circuitry routes vertically
- //There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
-
- ml_precharge_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- ml_precharge_drv->compute_delay(0);
-
-
- rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
- c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
- + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
-
- Cwire = c_matchline_metal * Htagbits;
- Rwire = r_matchline_metal * Htagbits;
- c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
-
- double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_ml = Rwire;
- double C_ml = Cwire + c_intrinsic;
- delay_cam_ml_reset = ml_precharge_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
-
- //matchline ops delay
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
- delay_matchchline += this_delay;
- out_time_ramp = this_delay / VTHFA3;
-
- dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
- * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
-
- /* third stage, from the NAND2 gates to the drivers in the dummy row */
- rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
- c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
- c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
- out_time_ramp = this_delay / (1 - VTHFA4);
- delay_matchchline += this_delay;
-
- //only the dummy row has the extra inverter between NAND and NOR gates
- dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
-
- /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
- rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
- c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
- Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
- c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
- tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
- out_time_ramp = this_delay / VTHFA5;
- delay_matchchline += this_delay;
-
- dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
- /*final statge from the NOR gate to drive the wordline of the data portion */
-
- //searchline data driver There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
- driver_c_wire_load = subarray.C_wl_ram;
- driver_r_wire_load = subarray.R_wl_ram;
-
- ml_to_ram_wl_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
-
-
- rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
- c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
- out_time_ramp = this_delay / (1-0.5);
- delay_matchchline += this_delay;
-
- out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
-
- //c_gate_load energy is computed in ml_to_ram_wl_drv
- dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
-
- /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
- /*Precharge the hitting logic */
- c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_hit_miss = Rwire;
- double C_hit_miss = Cwire + c_intrinsic;
- delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /*hitting logic evaluation */
- c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-
- delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
-
- if (is_fa)
- delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
-
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
-
- power_matchline.searchOp.dynamic = dynSearchEng;
-
- //leakage in one subarray
- double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
- double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
- leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
- leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
-
- power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
- leak_comparator_cam_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP +
- leak_power_SCHP_port_sram_cell*SCHP;
+ double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
+ int Htagbits;
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+ //double searchline_precharge_time;
+
+ double leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double leak_power_SCHP_port_sram_cell = 0;
+ double leak_comparator_cam_cell =0;
+
+ double gate_leak_comparator_cam_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_SCHP_port_sram_cell = 0;
+
+ c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
+ c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
+ r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
+ r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
+
+ dynSearchEng = 0.0;
+ delay_matchchline = 0.0;
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
+ bool linear_scaling = false;
+
+ if (linear_scaling) {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
+ } else {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
+ Wdummyn = g_tp.cam.cell_nmos_w;
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ }
+
+ Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
+
+ /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
+ search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
+ From the driver(am and an) to the comparators in all the rows including the dummy row,
+ Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
+
+ //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
+ //Searchline precharge routes horizontally
+ driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
+
+ sl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
+ //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+ sl_data_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ sl_precharge_eq_drv->compute_delay(0);
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
+ double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = (subarray.num_rows + 1) * r_b_metal;
+ double C_bl = subarray.C_bl_cam;
+ delay_cam_sl_restore = sl_precharge_eq_drv->delay
+ + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+ out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
+
+ //matchline ops delay
+ delay_matchchline += sl_data_drv->delay;
+
+ /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
+ //matchline delay, matchline power, matchline_reset for cycle time computation,
+
+ ////matchline precharge circuitry routes vertically
+ //There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+
+ ml_precharge_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ ml_precharge_drv->compute_delay(0);
+
+
+ rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
+ c_intrinsic = Htagbits *
+ (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
+ is_dram)//TODO: the cell_h_def should be revisit
+ + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
+ Htagbits);//since each halve only has one precharge tx per matchline
+
+ Cwire = c_matchline_metal * Htagbits;
+ Rwire = r_matchline_metal * Htagbits;
+ c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
+
+ double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_ml = Rwire;
+ double C_ml = Cwire + c_intrinsic;
+ //TODO: latest CAM has sense amps on matchlines too
+ delay_cam_ml_reset = ml_precharge_drv->delay
+ + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
+
+ //matchline ops delay
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
+ delay_matchchline += this_delay;
+ out_time_ramp = this_delay / VTHFA3;
+
+ dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
+ (subarray.num_rows + 1)) //TODO: need to be precise
+ * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
+ 2;//each subarry has two halves
+
+ /* third stage, from the NAND2 gates to the drivers in the dummy row */
+ rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
+ c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
+ c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
+ out_time_ramp = this_delay / (1 - VTHFA4);
+ delay_matchchline += this_delay;
+
+ //only the dummy row has the extra inverter between NAND and NOR gates
+ dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
+ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
+
+ /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
+ rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
+ c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_matchline_metal * Htagbits + c_searchline_metal *
+ (subarray.num_rows + 1) / 2;
+ Rwire = r_matchline_metal * Htagbits + r_searchline_metal *
+ (subarray.num_rows + 1) / 2;
+ c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
+ tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
+ out_time_ramp = this_delay / VTHFA5;
+ delay_matchchline += this_delay;
+
+ dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
+ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+ /*final statge from the NOR gate to drive the wordline of the data portion */
+
+ //searchline data driver There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
+ driver_c_wire_load = subarray.C_wl_ram;
+ driver_r_wire_load = subarray.R_wl_ram;
+
+ ml_to_ram_wl_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+
+
+ rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
+ c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
+ out_time_ramp = this_delay / (1 - 0.5);
+ delay_matchchline += this_delay;
+
+ out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
+
+ //c_gate_load energy is computed in ml_to_ram_wl_drv
+ dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+
+ /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
+ /*Precharge the hitting logic */
+ c_intrinsic = 2 *
+ drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+ subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_hit_miss = Rwire;
+ double C_hit_miss = Cwire + c_intrinsic;
+ delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
+ (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /*hitting logic evaluation */
+ c_intrinsic = 2 *
+ drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+ subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+
+ delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
+
+ if (is_fa)
+ delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
+
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
+
+ power_matchline.searchOp.dynamic = dynSearchEng;
+
+ //leakage in one subarray
+ double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
+ double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
+ double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+ 1, inv, false, true) * 2;
+ //approx XOR with Inv
+ double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
+ false, true) * 2;
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
+ leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
+ leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
+
+ power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
+ leak_comparator_cam_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP +
+ leak_power_SCHP_port_sram_cell * SCHP;
// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
- power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
- //In idle states, the hit/miss txs are closed (on) therefore no Isub
- power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
+ power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
+ subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ //In idle states, the hit/miss txs are closed (on) therefore no Isub
+ power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
// + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
-
- gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- gate_leak_power_SCHP_port_sram_cell = 0;
-
- //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
-
- power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
- power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
- power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
- power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
- + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
-
-
- return out_time_ramp;
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+ 1, inv, false, true) * 2;
+ double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
+ false, true) * 2;
+
+ gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+ gate_leak_power_SCHP_port_sram_cell = 0;
+
+ //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
+
+ power_matchline.searchOp.gate_leakage +=
+ gate_leak_power_cc_inverters_sram_cell;
+ power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
+ power_matchline.searchOp.gate_leakage +=
+ gate_leak_power_SCHP_port_sram_cell * SCHP +
+ gate_leak_power_RD_port_sram_cell * ERP;
+ power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
+ subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += subarray.num_rows *
+ cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
+ + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
+
+
+ return out_time_ramp;
}
-double Mat::width_write_driver_or_write_mux()
-{
- // calculate resistance of SRAM cell pull-up PMOS transistor
- // cam and sram have same cell trasistor properties
- double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
- double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
- double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
- double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
+double Mat::width_write_driver_or_write_mux() {
+ // calculate resistance of SRAM cell pull-up PMOS transistor
+ // cam and sram have same cell trasistor properties
+ double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
+ double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
+ double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
+ double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
- return width_write_driver_nmos;
+ return width_write_driver_nmos;
}
@@ -1007,134 +1032,164 @@ double Mat::width_write_driver_or_write_mux()
double Mat::compute_comparators_height(
int tagbits,
int number_ways_in_mat,
- double subarray_mem_cell_area_width)
-{
- double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
- double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
- return cumulative_area / subarray_mem_cell_area_width;
+ double subarray_mem_cell_area_width) {
+ double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
+ double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
+ return cumulative_area / subarray_mem_cell_area_width;
}
-double Mat::compute_bitline_delay(double inrisetime)
-{
- double V_b_pre, v_th_mem_cell, V_wl;
- double tstep;
- double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
- double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
- int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
-
- double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * R_b_metal;
- double C_bl = subarray.C_bl;
-
- // TODO: no leakage for DRAMs?
- double leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
-
- if (is_dram == true)
- {
- V_b_pre = g_tp.dram.Vbitpre;
- v_th_mem_cell = g_tp.dram_acc.Vth;
- V_wl = g_tp.vpp;
- //The access transistor is not folded. So we just need to specify a threshold value for the
- //folding width that is equal to or greater than Wmemcella.
- R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
- r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
- }
- else
- { //SRAM
- V_b_pre = g_tp.sram.Vbitpre;
- v_th_mem_cell = g_tp.sram_cell.Vth;
- V_wl = g_tp.sram_cell.Vdd;
- R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
- R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
-
- //Leakage current of an SRAM cell
- double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
- double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+double Mat::compute_bitline_delay(double inrisetime) {
+ double V_b_pre, v_th_mem_cell, V_wl;
+ double tstep;
+ double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
+ double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
+ int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
+
+ double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * R_b_metal;
+ double C_bl = subarray.C_bl;
+
+ // TODO: no leakage for DRAMs?
+ double leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+
+ if (is_dram == true) {
+ V_b_pre = g_tp.dram.Vbitpre;
+ v_th_mem_cell = g_tp.dram_acc.Vth;
+ V_wl = g_tp.vpp;
+ //The access transistor is not folded. So we just need to specify a
+ // threshold value for the folding width that is equal to or greater
+ // than Wmemcella.
+ R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
+ r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
+ } else { //SRAM
+ V_b_pre = g_tp.sram.Vbitpre;
+ v_th_mem_cell = g_tp.sram_cell.Vth;
+ V_wl = g_tp.sram_cell.Vdd;
+ R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
+ R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
+
+ //Leakage current of an SRAM cell
+ //TODO: how much is the idle time? just by *2?
+ double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+ false, true);
+ double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,
+ false, true);
+ double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
+ g_tp.sram.cell_pmos_w, 1, inv, false,
+ true) * 2;//two invs per cell
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+
+
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+ false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
+ g_tp.sram.cell_pmos_w, 1, inv, false,
+ true);
+
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+ }
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
-
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- }
-
-
- double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
- double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
- double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
- double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-
- if (is_dram)
- {
- double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
- tstep = 2.3 * fraction * r_dev *
- (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
- (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
- delay_writeback = tstep;
- dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
- per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
- }
- else
- {
- double tau;
-
- if (deg_bl_muxing > 1)
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
- subarray.num_cols * num_subarrays_per_mat*/;
- dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
- dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
- //Write Ops are differential for SRAM
- }
- else
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
+ double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w /
+ (2 * (RWP + ERP + SCHP)), is_dram);
+ double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
+ double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
+ camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram);
+ double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
+ double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
+ is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+ double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram);
+
+ if (is_dram) {
+ double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl));
+ tstep = 2.3 * fraction * r_dev *
+ (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux)) /
+ (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux));
+ delay_writeback = tstep;
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
+ (g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
+ num_act_mats_hor_dir * 100;
+ per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
+ } else {
+ double tau;
+
+ if (deg_bl_muxing > 1) {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
+ C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) +
+ R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
+ g_tp.sram_cell.Vdd;
+ dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
+ (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+ deg_bl_muxing);
+ dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
+ deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
+ g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+ //Write Ops are differential for SRAM
+ } else {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+ deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * C_bl) *
+ g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+
+ }
+ tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
+ power_bitline.readOp.leakage =
+ leak_power_cc_inverters_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP;
+ power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
+ gate_leak_power_RD_port_sram_cell * ERP;
}
- tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
- power_bitline.readOp.leakage =
- leak_power_cc_inverters_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP;
- power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
- gate_leak_power_RD_port_sram_cell * ERP;
-
- }
// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
@@ -1142,607 +1197,684 @@ double Mat::compute_bitline_delay(double inrisetime)
// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
- /* take input rise time into account */
- double m = V_wl / inrisetime;
- if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
- {
- delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
- }
- else
- {
- delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
- }
+ /* take input rise time into account */
+ double m = V_wl / inrisetime;
+ if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
+ delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
+ } else {
+ delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
+ }
- bool is_fa = (dp.fully_assoc) ? true : false;
+ bool is_fa = (dp.fully_assoc) ? true : false;
- if (dp.is_tag == false || is_fa == false)
- {
- power_bitline.readOp.dynamic = dynRdEnergy;
- power_bitline.writeOp.dynamic = dynWriteEnergy;
- }
+ if (dp.is_tag == false || is_fa == false) {
+ power_bitline.readOp.dynamic = dynRdEnergy;
+ power_bitline.writeOp.dynamic = dynWriteEnergy;
+ }
- double outrisetime = 0;
- return outrisetime;
+ double outrisetime = 0;
+ return outrisetime;
}
-double Mat::compute_sa_delay(double inrisetime)
-{
- //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
-
- //Bitline circuitry leakage.
- double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
- double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
- double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
- double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
-
- double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
- //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
- double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
- //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
- // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
- double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
- leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
- leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
-
- // sense amplifier has to drive logic in "data out driver" and sense precharge load.
- // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
- //constant as well as the magnitude of input differential voltage.
- double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double tau = C_ld / g_tp.gm_sense_amp_latch;
- delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
- power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
+double Mat::compute_sa_delay(double inrisetime) {
+ //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
+
+ //Bitline circuitry leakage.
+ double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
+ double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
+ double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
+ double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
+
+ double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
+ //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
+ double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
+ //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
+ // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
+ double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+
+ // sense amplifier has to drive logic in "data out driver" and sense precharge load.
+ // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
+ //constant as well as the magnitude of input differential voltage.
+ double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram);
+ double tau = C_ld / g_tp.gm_sense_amp_latch;
+ delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
+ power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
num_subarrays_per_mat * num_act_mats_hor_dir*/;
- power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
+ power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
- double outrisetime = 0;
- return outrisetime;
+ double outrisetime = 0;
+ return outrisetime;
}
-double Mat::compute_subarray_out_drv(double inrisetime)
-{
- double C_ld, rd, tf, this_delay;
- double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
-
- // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
- rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
- C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
- // delay of signal through inverter-buffer to second level of sense-amp mux.
- // internal delay of buffer
- rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
- C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
- // inverter driving drain of pass transistor of second level of sense-amp mux.
- rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
- C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
-
- // delay of signal through pass-transistor to input of subarray output driver.
- rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
- C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
- //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
-
-
- return inrisetime;
+double Mat::compute_subarray_out_drv(double inrisetime) {
+ double C_ld, rd, tf, this_delay;
+ double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
+
+ // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w *
+ deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+ // delay of signal through inverter-buffer to second level of sense-amp mux.
+ // internal delay of buffer
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage +=
+ cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv, is_dram) * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+
+ // inverter driving drain of pass transistor of second level of sense-amp mux.
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
+ is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
+ (RWP + ERP + SCHP), is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage +=
+ cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+
+
+ // delay of signal through pass-transistor to input of subarray output driver.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_2 *
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
+ is_dram) +
+ //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
+ gate_C(subarray_out_wire->repeater_size *
+ (subarray_out_wire->wire_length /
+ subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
+ (1 + p_to_n_sz_r), 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+
+
+ return inrisetime;
}
-double Mat::compute_comparator_delay(double inrisetime)
-{
- int A = g_ip->tag_assoc;
-
- int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
- // a multiple of 4.
-
- /* First Inverter */
- double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
- double tf = Req*Ceq;
- double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
- double nextinputtime = st1del/VTHCOMPINV;
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-
- //For each degree of associativity
- //there are 4 such quarter comparators
- double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- /* Second Inverter */
- Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
- tf = Req*Ceq;
- double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
- nextinputtime = st2del/(1.0-VTHCOMPINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
-
- /* Third Inverter */
- Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
- tf = Req*Ceq;
- double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
- nextinputtime = st3del/(VTHEVALINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
-
- /* Final Inverter (virtual ground driver) discharging compare part */
- double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
- double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
- double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
- double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
- power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
- lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
-
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
-
- /* time to go to threshold of mux driver */
- double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
- /* take into account non-zero input rise time */
- double m = g_tp.peri_global.Vdd/nextinputtime;
- double Tcomparatorni;
-
- if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
- {
- double a = m;
- double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
- }
- else
- {
- Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
- }
- delay_comparator = Tcomparatorni+st1del+st2del+st3del;
- power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
- power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
-
- return Tcomparatorni / (1.0 - VTHMUXNAND);;
+double Mat::compute_comparator_delay(double inrisetime) {
+ int A = g_ip->tag_assoc;
+
+ int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
+ // a multiple of 4.
+
+ /* First Inverter */
+ double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
+ double tf = Req * Ceq;
+ double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
+ double nextinputtime = st1del / VTHCOMPINV;
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+
+ //For each degree of associativity
+ //there are 4 such quarter comparators
+ double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
+ g_tp.w_comp_inv_p1, 1, inv,
+ is_dram) * 4 * A;
+ double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
+ g_tp.w_comp_inv_p1, 1, inv,
+ is_dram) * 4 * A;
+ /* Second Inverter */
+ Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
+ tf = Req * Ceq;
+ double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
+ nextinputtime = st2del / (1.0 - VTHCOMPINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+ inv, is_dram) * 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+ inv, is_dram) * 4 * A;
+
+ /* Third Inverter */
+ Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
+ tf = Req * Ceq;
+ double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
+ nextinputtime = st3del / (VTHEVALINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
+ inv, is_dram) * 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
+ 1, inv, is_dram) * 4 * A;
+
+ /* Final Inverter (virtual ground driver) discharging compare part */
+ double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
+ double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
+ double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+ g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+ g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+ g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+ g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
+ power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+ inv, is_dram) * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+ is_dram) * 4 * A; // stack factor of 0.2
+
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+ inv, is_dram) * 4 * A;
+ //for gate leakage this equals to a inverter
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+ is_dram) * 4 * A;
+
+ /* time to go to threshold of mux driver */
+ double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
+ /* take into account non-zero input rise time */
+ double m = g_tp.peri_global.Vdd / nextinputtime;
+ double Tcomparatorni;
+
+ if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
+ double a = m;
+ double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
+ g_tp.peri_global.Vth);
+ double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
+ g_tp.peri_global.Vth) + 1 / m *
+ ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
+ ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
+ Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
+ } else {
+ Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
+ g_tp.peri_global.Vth) / (2 * m) -
+ (g_tp.peri_global.Vdd * VTHEVALINV) / m;
+ }
+ delay_comparator = Tcomparatorni + st1del + st2del + st3del;
+ power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
+ power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
+
+ return Tcomparatorni / (1.0 - VTHMUXNAND);;
}
-void Mat::compute_power_energy()
-{
- //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
+void Mat::compute_power_energy() {
+ //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
//when search all subarrays and all mats are fully active
- //when plain read/write only one subarray in a single mat is active.
+ //when plain read/write only one subarray in a single mat is active.
// add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
- power.readOp.dynamic += r_predec->power.readOp.dynamic +
- b_mux_predec->power.readOp.dynamic +
- sa_mux_lev_1_predec->power.readOp.dynamic +
- sa_mux_lev_2_predec->power.readOp.dynamic;
-
- // add energy consumed in decoders
- power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
- if (!(is_fa||pure_cam))
- power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
-
- // add energy consumed in bitline prechagers, SAs, and bitlines
- if (!(is_fa||pure_cam))
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
-
- // add energy consumed in bitlines
- //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
- power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
- power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
- //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
- //Add subarray output energy
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
- }
-
- else if (is_fa)
- {
- //for plain read/write only one subarray in a mat is active
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
- + cam_bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
-
- //Add sense amps energy
- num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
- num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
- power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
- power_sa.readOp.dynamic *= num_sa_subarray;
-
-
- // add energy consumed in bitlines
- power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
- power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
-
- //Add subarray output energy
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
- //add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
- else
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
- power_sa.searchOp.dynamic = 0;
-
- power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
- power_bitline.searchOp.dynamic = 0;
- power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
-
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
-
- ////add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
-
-
-
- // calculate leakage power
- if (!(is_fa || pure_cam))
- {
- int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
- //cout<<"leakage"<<power.readOp.leakage<<endl;
-
- power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.leakage += power_comparator.readOp.leakage;
-
- //cout<<"leakage1"<<power.readOp.leakage<<endl;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
-
- power.readOp.leakage += r_predec->power.readOp.leakage +
- b_mux_predec->power.readOp.leakage +
- sa_mux_lev_1_predec->power.readOp.leakage +
- sa_mux_lev_2_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage +
- power_bit_mux_decoders.readOp.leakage +
- power_sa_mux_lev_1_decoders.readOp.leakage +
- power_sa_mux_lev_2_decoders.readOp.leakage;
- //cout<<"leakage2"<<power.readOp.leakage<<endl;
-
- //++++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
- //cout<<"leakage"<<power.readOp.leakage<<endl;
-
- power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
-
- //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
-
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- b_mux_predec->power.readOp.gate_leakage +
- sa_mux_lev_1_predec->power.readOp.gate_leakage +
- sa_mux_lev_2_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage +
- power_bit_mux_decoders.readOp.gate_leakage +
- power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- power_sa_mux_lev_2_decoders.readOp.gate_leakage;
- }
- else if (is_fa)
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<<power.readOp.leakage<<endl;
-
-
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
-
- //cout<<"leakage4"<<power.readOp.leakage<<endl;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
-
- //cout<<"leakage5"<<power.readOp.leakage<<endl;
-
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
-
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
-
-// cout<<"leakage6"<<power.readOp.leakage<<endl;
-
- //+++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
-
-
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
-
- //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
+ power.readOp.dynamic += r_predec->power.readOp.dynamic +
+ b_mux_predec->power.readOp.dynamic +
+ sa_mux_lev_1_predec->power.readOp.dynamic +
+ sa_mux_lev_2_predec->power.readOp.dynamic;
+
+ // add energy consumed in decoders
+ power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
+ if (!(is_fa || pure_cam))
+ power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
+
+ // add energy consumed in bitline prechagers, SAs, and bitlines
+ if (!(is_fa || pure_cam)) {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
+
+ // add energy consumed in bitlines
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
+ power_bitline.readOp.dynamic *= num_subarrays_per_mat *
+ subarray.num_cols;
+ power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
+ subarray.num_cols;
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
+ //Add subarray output energy
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+ }
- //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+ else if (is_fa) {
+ //for plain read/write only one subarray in a mat is active
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
+ + cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+
+ //Add sense amps energy
+ num_sa_subarray = (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram) / deg_bl_muxing;
+ num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
+ power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
+ num_sa_subarray_search;
+ power_sa.readOp.dynamic *= num_sa_subarray;
+
+
+ // add energy consumed in bitlines
+ power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
+ power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram);
+ power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram);
+ power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
+
+ //Add subarray output energy
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+ //add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+ subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic =
+ power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic =
+ ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ } else {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
+ power_sa.searchOp.dynamic = 0;
+
+ power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
+ power_bitline.searchOp.dynamic = 0;
+ power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
+
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+
+ ////add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+ subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic =
+ power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic =
+ ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic =
+ power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+ }
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
- else
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
- //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+ // calculate leakage power
+ if (!(is_fa || pure_cam)) {
+ int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.leakage += power_comparator.readOp.leakage;
+
+ //cout<<"leakage1"<<power.readOp.leakage<<endl;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
+
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ b_mux_predec->power.readOp.leakage +
+ sa_mux_lev_1_predec->power.readOp.leakage +
+ sa_mux_lev_2_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage +
+ power_bit_mux_decoders.readOp.leakage +
+ power_sa_mux_lev_1_decoders.readOp.leakage +
+ power_sa_mux_lev_2_decoders.readOp.leakage;
+ //cout<<"leakage2"<<power.readOp.leakage<<endl;
+
+ //++++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
+
+ //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
+
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ b_mux_predec->power.readOp.gate_leakage +
+ sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage +
+ power_bit_mux_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage;
+ } else if (is_fa) {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP + SCHP);
- power.readOp.leakage += //power_bitline.readOp.leakage +
- //power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage3"<<power.readOp.leakage<<endl;
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
- //+++Below is gate leakage
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+ //cout<<"leakage4"<<power.readOp.leakage<<endl;
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+ //cout<<"leakage5"<<power.readOp.leakage<<endl;
- power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
- //power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *=
+ num_subarrays_per_mat;
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+// cout<<"leakage6"<<power.readOp.leakage<<endl;
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
+ //+++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage =
+ power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+
+ } else {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+ //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.leakage += //power_bitline.readOp.leakage +
+ //power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
+ subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+
+ //+++Below is gate leakage
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
+ //power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage =
+ row_dec->power.readOp.gate_leakage * subarray.num_rows *
+ num_subarrays_per_mat * (RWP + ERP + EWP);
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage =
+ power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *=
+ num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+ }
}
diff --git a/ext/mcpat/cacti/mat.h b/ext/mcpat/cacti/mat.h
index 8d038be8b..38200107c 100755
--- a/ext/mcpat/cacti/mat.h
+++ b/ext/mcpat/cacti/mat.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,9 +40,8 @@
#include "subarray.h"
#include "wire.h"
-class Mat : public Component
-{
- public:
+class Mat : public Component {
+public:
Mat(const DynamicParameter & dyn_p);
~Mat();
double compute_delays(double inrisetime); // return outrisetime
@@ -106,8 +106,8 @@ class Mat : public Component
int deg_bl_muxing;
int num_act_mats_hor_dir;
double delay_writeback;
- Area cell,cam_cell;
- bool is_dram,is_fa, pure_cam, camFlag;
+ Area cell, cam_cell;
+ bool is_dram, is_fa, pure_cam, camFlag;
int num_mats;
powerDef power_sa;
double delay_sa;
@@ -127,7 +127,7 @@ class Mat : public Component
uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
- private:
+private:
double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
double width_write_driver_or_write_mux();
double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
diff --git a/ext/mcpat/cacti/nuca.cc b/ext/mcpat/cacti/nuca.cc
index 2aabe843f..e0b4dcdaf 100644
--- a/ext/mcpat/cacti/nuca.cc
+++ b/ext/mcpat/cacti/nuca.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,89 +37,86 @@
#include "Ucache.h"
#include "nuca.h"
-unsigned int MIN_BANKSIZE=65536;
+unsigned int MIN_BANKSIZE = 65536;
#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
#define CONTR_2_BANK_LAT 0
int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
- Nuca::Nuca(
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- ):deviceType(dt)
-{
- init_cont();
+Nuca::Nuca(
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+): deviceType(dt) {
+ init_cont();
}
void
-Nuca::init_cont()
-{
- FILE *cont;
- char line[5000];
- char jk[5000];
- cont = fopen("contention.dat", "r");
- if (!cont) {
- cout << "contention.dat file is missing!\n";
- exit(0);
- }
-
- for(int i=0; i<2; i++) {
- for(int j=2; j<5; j++) {
- for(int k=0; k<ROUTER_TYPES; k++) {
- for(int l=0;l<7; l++) {
- int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
- assert(fscanf(cont, "%[^\n]\n", line) != EOF);
- sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
- &temp[4], &temp[5], &temp[6], &temp[7]);
+Nuca::init_cont() {
+ FILE *cont;
+ char line[5000];
+ char jk[5000];
+ cont = fopen("contention.dat", "r");
+ if (!cont) {
+ cout << "contention.dat file is missing!\n";
+ exit(0);
+ }
+
+ for (int i = 0; i < 2; i++) {
+ for (int j = 2; j < 5; j++) {
+ for (int k = 0; k < ROUTER_TYPES; k++) {
+ for (int l = 0; l < 7; l++) {
+ int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
+ assert(fscanf(cont, "%[^\n]\n", line) != EOF);
+ sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
+ &temp[0], &temp[1], &temp[2], &temp[3],
+ &temp[4], &temp[5], &temp[6], &temp[7]);
+ }
+ }
}
- }
}
- }
- fclose(cont);
+ fclose(cont);
}
- void
-Nuca::print_cont_stats()
-{
- for(int i=0; i<2; i++) {
- for(int j=2; j<5; j++) {
- for(int k=0; k<ROUTER_TYPES; k++) {
- for(int l=0;l<7; l++) {
- for(int m=0;l<7; l++) {
- cout << cont_stats[i][j][k][l][m] << " ";
- }
- cout << endl;
+void
+Nuca::print_cont_stats() {
+ for (int i = 0; i < 2; i++) {
+ for (int j = 2; j < 5; j++) {
+ for (int k = 0; k < ROUTER_TYPES; k++) {
+ for (int l = 0; l < 7; l++) {
+ for (int m = 0; l < 7; l++) {
+ cout << cont_stats[i][j][k][l][m] << " ";
+ }
+ cout << endl;
+ }
+ }
}
- }
}
- }
- cout << endl;
+ cout << endl;
}
-Nuca::~Nuca(){
- for (int i = wt_min; i <= wt_max; i++) {
- delete wire_vertical[i];
- delete wire_horizontal[i];
- }
+Nuca::~Nuca() {
+ for (int i = wt_min; i <= wt_max; i++) {
+ delete wire_vertical[i];
+ delete wire_horizontal[i];
+ }
}
/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
- int
-Nuca::calc_cycles(double lat, double oper_freq)
-{
- //TODO: convert latch delay to FO4 */
- double cycle_time = (1.0/(oper_freq*1e9)); /*s*/
- cycle_time -= LATCH_DELAY;
- cycle_time -= FIXED_OVERHEAD;
-
- return (int)ceil(lat/cycle_time);
+int
+Nuca::calc_cycles(double lat, double oper_freq) {
+ //TODO: convert latch delay to FO4 */
+ double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
+ cycle_time -= LATCH_DELAY;
+ cycle_time -= FIXED_OVERHEAD;
+
+ return (int)ceil(lat / cycle_time);
}
nuca_org_t::~nuca_org_t() {
- // if(h_wire) delete h_wire;
- // if(v_wire) delete v_wire;
- // if(router) delete router;
+ // if(h_wire) delete h_wire;
+ // if(v_wire) delete v_wire;
+ // if(router) delete router;
}
/*
@@ -137,476 +135,477 @@ nuca_org_t::~nuca_org_t() {
* Finally include contention statistics and find the optimal
* NUCA configuration
*/
- void
-Nuca::sim_nuca()
-{
- /* temp variables */
- int it, ro, wr;
- int num_cyc;
- unsigned int i, j, k;
- unsigned int r, c;
- int l2_c;
- int bank_count = 0;
- uca_org_t ures;
- nuca_org_t *opt_n;
- mem_array tag, data;
- list<nuca_org_t *> nuca_list;
- Router *router_s[ROUTER_TYPES];
- router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
- router_s[0]->print_router();
- router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
- router_s[1]->print_router();
- router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
- router_s[2]->print_router();
-
- int core_in; // to store no. of cores
-
- /* to search diff grid organizations */
- double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
- curr_acclat;
- double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
- avg_leakage_power;
-
- double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
- int opt_rows = 0;
- int opt_columns = 0;
- double opt_totno_hops = 0;
- double opt_avg_hop = 0;
- double opt_dyn_power = 0, opt_leakage_power = 0;
- min_values_t minval;
-
- int bank_start = 0;
-
- int flit_width = 0;
-
- /* vertical and horizontal hop latency values */
- int ver_hop_lat, hor_hop_lat; /* in cycles */
-
-
- /* no. of different bank sizes to consider */
- int iterations;
-
-
- g_ip->nuca_cache_sz = g_ip->cache_sz;
- nuca_list.push_back(new nuca_org_t());
-
- if (g_ip->cache_level == 0) l2_c = 1;
- else l2_c = 0;
-
- if (g_ip->cores <= 4) core_in = 2;
- else if (g_ip->cores <= 8) core_in = 3;
- else if (g_ip->cores <= 16) core_in = 4;
- else {cout << "Number of cores should be <= 16!\n"; exit(0);}
-
-
- // set the lower bound to an appropriate value. this depends on cache associativity
- if (g_ip->assoc > 2) {
- i = 2;
- while (i != g_ip->assoc) {
- MIN_BANKSIZE *= 2;
- i *= 2;
- }
- }
-
- iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE);
-
- if (g_ip->force_wiretype)
- {
- if (g_ip->wt == Low_swing) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
+void
+Nuca::sim_nuca() {
+ /* temp variables */
+ int it, ro, wr;
+ int num_cyc;
+ unsigned int i, j, k;
+ unsigned int r, c;
+ int l2_c;
+ int bank_count = 0;
+ uca_org_t ures;
+ nuca_org_t *opt_n;
+ mem_array tag, data;
+ list<nuca_org_t *> nuca_list;
+ Router *router_s[ROUTER_TYPES];
+ router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
+ router_s[0]->print_router();
+ router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
+ router_s[1]->print_router();
+ router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
+ router_s[2]->print_router();
+
+ int core_in; // to store no. of cores
+
+ /* to search diff grid organizations */
+ double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
+ curr_acclat;
+ double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
+ avg_leakage_power;
+
+ double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
+ int opt_rows = 0;
+ int opt_columns = 0;
+ double opt_totno_hops = 0;
+ double opt_avg_hop = 0;
+ double opt_dyn_power = 0, opt_leakage_power = 0;
+ min_values_t minval;
+
+ int bank_start = 0;
+
+ int flit_width = 0;
+
+ /* vertical and horizontal hop latency values */
+ int ver_hop_lat, hor_hop_lat; /* in cycles */
+
+
+ /* no. of different bank sizes to consider */
+ int iterations;
+
+
+ g_ip->nuca_cache_sz = g_ip->cache_sz;
+ nuca_list.push_back(new nuca_org_t());
+
+ if (g_ip->cache_level == 0) l2_c = 1;
+ else l2_c = 0;
+
+ if (g_ip->cores <= 4) core_in = 2;
+ else if (g_ip->cores <= 8) core_in = 3;
+ else if (g_ip->cores <= 16) core_in = 4;
else {
- wt_min = Global;
- wt_max = Low_swing-1;
+ cout << "Number of cores should be <= 16!\n";
+ exit(0);
}
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
- if (g_ip->nuca_bank_count != 0) { // simulate just one bank
- if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
- g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
- g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
- fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
- }
- bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
- iterations = bank_start+1;
- g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count;
- }
- cout << "Simulating various NUCA configurations\n";
- for (it=bank_start; it<iterations; it++) { /* different bank count values */
- ures.tag_array2 = &tag;
- ures.data_array2 = &data;
- /*
- * find the optimal bank organization
- */
- solve(&ures);
-// output_UCA(&ures);
- bank_count = g_ip->nuca_cache_sz/g_ip->cache_sz;
- cout << "====" << g_ip->cache_sz << "\n";
-
- for (wr=wt_min; wr<=wt_max; wr++) {
-
- for (ro=0; ro<ROUTER_TYPES; ro++)
- {
- flit_width = (int) router_s[ro]->flit_size; //initialize router
- nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
-
- /* calculate router and wire parameters */
-
- double vlength = ures.cache_ht; /* length of the wire (u)*/
- double hlength = ures.cache_len; // u
- /* find delay, area, and power for wires */
- wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
- wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+ // set the lower bound to an appropriate value. this depends on cache associativity
+ if (g_ip->assoc > 2) {
+ i = 2;
+ while (i != g_ip->assoc) {
+ MIN_BANKSIZE *= 2;
+ i *= 2;
+ }
+ }
- hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
- ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
+ iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == Low_swing) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing - 1;
+ }
+ } else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
+ if (g_ip->nuca_bank_count != 0) { // simulate just one bank
+ if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
+ g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
+ g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
+ fprintf(stderr, "Incorrect bank count value! Please fix the ",
+ "value in cache.cfg\n");
+ }
+ bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
+ iterations = bank_start + 1;
+ g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
+ }
+ cout << "Simulating various NUCA configurations\n";
+ for (it = bank_start; it < iterations; it++) {
+ /* different bank count values */
+ ures.tag_array2 = &tag;
+ ures.data_array2 = &data;
/*
- * assume a grid like topology and explore for optimal network
- * configuration using different row and column count values.
+ * find the optimal bank organization
*/
- for (c=1; c<=(unsigned int)bank_count; c++) {
- while (bank_count%c != 0) c++;
- r = bank_count/c;
-
- /*
- * to find the avg access latency of a NUCA cache, uncontended
- * access time to each bank from the
- * cache controller is calculated.
- * avg latency =
- * sum of the access latencies to individual banks)/bank
- * count value.
- */
- totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
- k = 1;
- for (i=0; i<r; i++) {
- for (j=0; j<c; j++) {
- /*
- * vertical hops including the
- * first hop from the cache controller
- */
- curr_hop = i + 1;
- curr_hop += j; /* horizontal hops */
- totno_hhops += j;
- totno_vhops += (i+1);
- curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
- j * hor_hop_lat);
-
- tot_lat += curr_acclat;
- totno_hops += curr_hop;
+ solve(&ures);
+// output_UCA(&ures);
+ bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
+ cout << "====" << g_ip->cache_sz << "\n";
+
+ for (wr = wt_min; wr <= wt_max; wr++) {
+
+ for (ro = 0; ro < ROUTER_TYPES; ro++) {
+ flit_width = (int) router_s[ro]->flit_size; //initialize router
+ nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
+
+ /* calculate router and wire parameters */
+
+ double vlength = ures.cache_ht; /* length of the wire (u)*/
+ double hlength = ures.cache_len; // u
+
+ /* find delay, area, and power for wires */
+ wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
+ wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
+
+
+ hor_hop_lat =
+ calc_cycles(wire_horizontal[wr]->delay,
+ 1 /(nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+ ver_hop_lat =
+ calc_cycles(wire_vertical[wr]->delay,
+ 1 / (nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+
+ /*
+ * assume a grid like topology and explore for optimal network
+ * configuration using different row and column count values.
+ */
+ for (c = 1; c <= (unsigned int)bank_count; c++) {
+ while (bank_count % c != 0) c++;
+ r = bank_count / c;
+
+ /*
+ * to find the avg access latency of a NUCA cache, uncontended
+ * access time to each bank from the
+ * cache controller is calculated.
+ * avg latency =
+ * sum of the access latencies to individual banks)/bank
+ * count value.
+ */
+ totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
+ k = 1;
+ for (i = 0; i < r; i++) {
+ for (j = 0; j < c; j++) {
+ /*
+ * vertical hops including the
+ * first hop from the cache controller
+ */
+ curr_hop = i + 1;
+ curr_hop += j; /* horizontal hops */
+ totno_hhops += j;
+ totno_vhops += (i + 1);
+ curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
+ j * hor_hop_lat);
+
+ tot_lat += curr_acclat;
+ totno_hops += curr_hop;
+ }
+ }
+ avg_lat = tot_lat / bank_count;
+ avg_hop = totno_hops / bank_count;
+ avg_hhop = totno_hhops / bank_count;
+ avg_vhop = totno_vhops / bank_count;
+
+ /* net access latency */
+ curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
+ avg_hop) +
+ calc_cycles(ures.access_time,
+ 1 /
+ (nuca_list.back()->nuca_pda.cycle_time *
+ .001));
+
+ /* avg access lat of nuca */
+ avg_dyn_power =
+ avg_hop *
+ (router_s[ro]->power.readOp.dynamic) + avg_hhop *
+ (wire_horizontal[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz * 8 + 64) + avg_vhop *
+ (wire_vertical[wr]->power.readOp.dynamic) *
+ (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
+
+ avg_leakage_power =
+ bank_count * router_s[ro]->power.readOp.leakage +
+ avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay) * flit_width +
+ avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
+ wire_horizontal[wr]->delay);
+
+ if (curr_acclat < opt_acclat) {
+ opt_acclat = curr_acclat;
+ opt_tot_lat = tot_lat;
+ opt_avg_lat = avg_lat;
+ opt_totno_hops = totno_hops;
+ opt_avg_hop = avg_hop;
+ opt_rows = r;
+ opt_columns = c;
+ opt_dyn_power = avg_dyn_power;
+ opt_leakage_power = avg_leakage_power;
+ }
+ totno_hops = 0;
+ tot_lat = 0;
+ totno_hhops = 0;
+ totno_vhops = 0;
+ }
+ nuca_list.back()->wire_pda.power.readOp.dynamic =
+ opt_avg_hop * flit_width *
+ (wire_horizontal[wr]->power.readOp.dynamic +
+ wire_vertical[wr]->power.readOp.dynamic);
+ nuca_list.back()->avg_hops = opt_avg_hop;
+ /* network delay/power */
+ nuca_list.back()->h_wire = wire_horizontal[wr];
+ nuca_list.back()->v_wire = wire_vertical[wr];
+ nuca_list.back()->router = router_s[ro];
+ /* bank delay/power */
+
+ nuca_list.back()->bank_pda.delay = ures.access_time;
+ nuca_list.back()->bank_pda.power = ures.power;
+ nuca_list.back()->bank_pda.area.h = ures.cache_ht;
+ nuca_list.back()->bank_pda.area.w = ures.cache_len;
+ nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
+
+ num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
+ 1 /
+ (nuca_list.back()->nuca_pda.cycle_time *
+ .001/*GHz*/));
+ if (num_cyc % 2 != 0) num_cyc++;
+ if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
+
+ if (it < 7) {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
+ } else {
+ nuca_list.back()->nuca_pda.delay = opt_acclat +
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ nuca_list.back()->contention =
+ cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
+ }
+ nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
+ nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+
+ /* array organization */
+ nuca_list.back()->bank_count = bank_count;
+ nuca_list.back()->rows = opt_rows;
+ nuca_list.back()->columns = opt_columns;
+ calculate_nuca_area (nuca_list.back());
+
+ minval.update_min_values(nuca_list.back());
+ nuca_list.push_back(new nuca_org_t());
+ opt_acclat = BIGNUM;
+
}
- }
- avg_lat = tot_lat/bank_count;
- avg_hop = totno_hops/bank_count;
- avg_hhop = totno_hhops/bank_count;
- avg_vhop = totno_vhops/bank_count;
-
- /* net access latency */
- curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
- calc_cycles(ures.access_time,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
-
- /* avg access lat of nuca */
- avg_dyn_power =
- avg_hop *
- (router_s[ro]->power.readOp.dynamic) + avg_hhop *
- (wire_horizontal[wr]->power.readOp.dynamic) *
- (g_ip->block_sz*8 + 64) + avg_vhop *
- (wire_vertical[wr]->power.readOp.dynamic) *
- (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic;
-
- avg_leakage_power =
- bank_count * router_s[ro]->power.readOp.leakage +
- avg_hhop * (wire_horizontal[wr]->power.readOp.leakage*
- wire_horizontal[wr]->delay) * flit_width +
- avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
- wire_horizontal[wr]->delay);
-
- if (curr_acclat < opt_acclat) {
- opt_acclat = curr_acclat;
- opt_tot_lat = tot_lat;
- opt_avg_lat = avg_lat;
- opt_totno_hops = totno_hops;
- opt_avg_hop = avg_hop;
- opt_rows = r;
- opt_columns = c;
- opt_dyn_power = avg_dyn_power;
- opt_leakage_power = avg_leakage_power;
- }
- totno_hops = 0;
- tot_lat = 0;
- totno_hhops = 0;
- totno_vhops = 0;
}
- nuca_list.back()->wire_pda.power.readOp.dynamic =
- opt_avg_hop * flit_width *
- (wire_horizontal[wr]->power.readOp.dynamic +
- wire_vertical[wr]->power.readOp.dynamic);
- nuca_list.back()->avg_hops = opt_avg_hop;
- /* network delay/power */
- nuca_list.back()->h_wire = wire_horizontal[wr];
- nuca_list.back()->v_wire = wire_vertical[wr];
- nuca_list.back()->router = router_s[ro];
- /* bank delay/power */
-
- nuca_list.back()->bank_pda.delay = ures.access_time;
- nuca_list.back()->bank_pda.power = ures.power;
- nuca_list.back()->bank_pda.area.h = ures.cache_ht;
- nuca_list.back()->bank_pda.area.w = ures.cache_len;
- nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
-
- num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
- 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
- if(num_cyc%2 != 0) num_cyc++;
- if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
-
- if (it < 7) {
- nuca_list.back()->nuca_pda.delay = opt_acclat +
- cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
- nuca_list.back()->contention =
- cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
- }
- else {
- nuca_list.back()->nuca_pda.delay = opt_acclat +
- cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
- nuca_list.back()->contention =
- cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
- }
- nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
- nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
+ g_ip->cache_sz /= 2;
+ }
- /* array organization */
- nuca_list.back()->bank_count = bank_count;
- nuca_list.back()->rows = opt_rows;
- nuca_list.back()->columns = opt_columns;
- calculate_nuca_area (nuca_list.back());
+ delete(nuca_list.back());
+ nuca_list.pop_back();
+ opt_n = find_optimal_nuca(&nuca_list, &minval);
+ print_nuca(opt_n);
+ g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
- minval.update_min_values(nuca_list.back());
- nuca_list.push_back(new nuca_org_t());
- opt_acclat = BIGNUM;
+ list<nuca_org_t *>::iterator niter;
+ for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
+ delete *niter;
+ }
+ nuca_list.clear();
- }
+ for (int i = 0; i < ROUTER_TYPES; i++) {
+ delete router_s[i];
}
- g_ip->cache_sz /= 2;
- }
-
- delete(nuca_list.back());
- nuca_list.pop_back();
- opt_n = find_optimal_nuca(&nuca_list, &minval);
- print_nuca(opt_n);
- g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count;
-
- list<nuca_org_t *>::iterator niter;
- for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
- {
- delete *niter;
- }
- nuca_list.clear();
-
- for(int i=0; i < ROUTER_TYPES; i++)
- {
- delete router_s[i];
- }
- g_ip->display_ip();
- // g_ip->force_cache_config = true;
- // g_ip->ndwl = 8;
- // g_ip->ndbl = 16;
- // g_ip->nspd = 4;
- // g_ip->ndcm = 1;
- // g_ip->ndsam1 = 8;
- // g_ip->ndsam2 = 32;
+ g_ip->display_ip();
+ // g_ip->force_cache_config = true;
+ // g_ip->ndwl = 8;
+ // g_ip->ndbl = 16;
+ // g_ip->nspd = 4;
+ // g_ip->ndcm = 1;
+ // g_ip->ndsam1 = 8;
+ // g_ip->ndsam2 = 32;
}
- void
-Nuca::print_nuca (nuca_org_t *fr)
-{
- printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
- "----------\n\n");
- printf("Optimal number of banks - %d\n", fr->bank_count);
- printf("Grid organization rows x columns - %d x %d\n",
- fr->rows, fr->columns);
- printf("Network frequency - %g GHz\n",
- (1/fr->nuca_pda.cycle_time)*1e3);
- printf("Cache dimension (mm x mm) - %g x %g\n",
- fr->nuca_pda.area.h,
- fr->nuca_pda.area.w);
-
- fr->router->print_router();
-
- printf("\n\nWire stats:\n");
- if (fr->h_wire->wt == Global) {
- printf("\tWire type - Full swing global wires with least "
- "possible delay\n");
- }
- else if (fr->h_wire->wt == Global_5) {
- printf("\tWire type - Full swing global wires with "
- "5%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_10) {
- printf("\tWire type - Full swing global wires with "
- "10%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_20) {
- printf("\tWire type - Full swing global wires with "
- "20%% delay penalty\n");
- }
- else if (fr->h_wire->wt == Global_30) {
- printf("\tWire type - Full swing global wires with "
- "30%% delay penalty\n");
- }
- else if(fr->h_wire->wt == Low_swing) {
- printf("\tWire type - Low swing wires\n");
- }
-
- printf("\tHorizontal link delay - %g (ns)\n",
- fr->h_wire->delay*1e9);
- printf("\tVertical link delay - %g (ns)\n",
- fr->v_wire->delay*1e9);
- printf("\tDelay/length - %g (ns/mm)\n",
- fr->h_wire->delay*1e9/fr->bank_pda.area.w);
- printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
- "\t -leakage %g (nW)\n\n",
- fr->h_wire->power.readOp.dynamic*1e9,
- fr->h_wire->power.readOp.leakage*1e9);
- printf("\tVertical link energy -dynamic/access %g (nJ)\n"
- "\t -leakage %g (nW)\n\n",
- fr->v_wire->power.readOp.dynamic*1e9,
- fr->v_wire->power.readOp.leakage*1e9);
- printf("\n\n");
- fr->v_wire->print_wire();
- printf("\n\nBank stats:\n");
+void
+Nuca::print_nuca (nuca_org_t *fr) {
+ printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
+ "----------\n\n");
+ printf("Optimal number of banks - %d\n", fr->bank_count);
+ printf("Grid organization rows x columns - %d x %d\n",
+ fr->rows, fr->columns);
+ printf("Network frequency - %g GHz\n",
+ (1 / fr->nuca_pda.cycle_time)*1e3);
+ printf("Cache dimension (mm x mm) - %g x %g\n",
+ fr->nuca_pda.area.h,
+ fr->nuca_pda.area.w);
+
+ fr->router->print_router();
+
+ printf("\n\nWire stats:\n");
+ if (fr->h_wire->wt == Global) {
+ printf("\tWire type - Full swing global wires with least "
+ "possible delay\n");
+ } else if (fr->h_wire->wt == Global_5) {
+ printf("\tWire type - Full swing global wires with "
+ "5%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_10) {
+ printf("\tWire type - Full swing global wires with "
+ "10%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_20) {
+ printf("\tWire type - Full swing global wires with "
+ "20%% delay penalty\n");
+ } else if (fr->h_wire->wt == Global_30) {
+ printf("\tWire type - Full swing global wires with "
+ "30%% delay penalty\n");
+ } else if (fr->h_wire->wt == Low_swing) {
+ printf("\tWire type - Low swing wires\n");
+ }
+
+ printf("\tHorizontal link delay - %g (ns)\n",
+ fr->h_wire->delay*1e9);
+ printf("\tVertical link delay - %g (ns)\n",
+ fr->v_wire->delay*1e9);
+ printf("\tDelay/length - %g (ns/mm)\n",
+ fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
+ printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->h_wire->power.readOp.dynamic*1e9,
+ fr->h_wire->power.readOp.leakage*1e9);
+ printf("\tVertical link energy -dynamic/access %g (nJ)\n"
+ "\t -leakage %g (nW)\n\n",
+ fr->v_wire->power.readOp.dynamic*1e9,
+ fr->v_wire->power.readOp.leakage*1e9);
+ printf("\n\n");
+ fr->v_wire->print_wire();
+ printf("\n\nBank stats:\n");
}
- nuca_org_t *
-Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- nuca_org_t *res = NULL;
- float d, a, dp, lp, c;
- int v;
- dp = g_ip->dynamic_power_wt_nuca;
- lp = g_ip->leakage_power_wt_nuca;
- a = g_ip->area_wt_nuca;
- d = g_ip->delay_wt_nuca;
- c = g_ip->cycle_time_wt_nuca;
-
- list<nuca_org_t *>::iterator niter;
-
-
- for (niter = n->begin(); niter != n->end(); niter++) {
- fprintf(stderr, "\n-----------------------------"
- "---------------\n");
-
-
- printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
- "bank_dpower = %g \tleak = %g \tcycle = %g\n",
- (*niter)->bank_count,
- (*niter)->nuca_pda.delay,
- (*niter)->nuca_pda.power.readOp.dynamic,
- (*niter)->h_wire->wt,
- (*niter)->bank_pda.power.readOp.dynamic,
- (*niter)->nuca_pda.power.readOp.leakage,
- (*niter)->nuca_pda.cycle_time);
-
-
- if (g_ip->ed == 1) {
- cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
- }
- }
- else if (g_ip->ed == 2) {
- cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.delay/minval->min_delay)*
- ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
- }
- }
- else {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- v = check_nuca_org((*niter), minval);
- if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v) {
- cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) +
- c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) +
- dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
- lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
- a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
- fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- res = ((*niter));
+nuca_org_t *
+Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
+ double cost = 0;
+ double min_cost = BIGNUM;
+ nuca_org_t *res = NULL;
+ float d, a, dp, lp, c;
+ int v;
+ dp = g_ip->dynamic_power_wt_nuca;
+ lp = g_ip->leakage_power_wt_nuca;
+ a = g_ip->area_wt_nuca;
+ d = g_ip->delay_wt_nuca;
+ c = g_ip->cycle_time_wt_nuca;
+
+ list<nuca_org_t *>::iterator niter;
+
+
+ for (niter = n->begin(); niter != n->end(); niter++) {
+ fprintf(stderr, "\n-----------------------------"
+ "---------------\n");
+
+
+ printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
+ "bank_dpower = %g \tleak = %g \tcycle = %g\n",
+ (*niter)->bank_count,
+ (*niter)->nuca_pda.delay,
+ (*niter)->nuca_pda.power.readOp.dynamic,
+ (*niter)->h_wire->wt,
+ (*niter)->bank_pda.power.readOp.dynamic,
+ (*niter)->nuca_pda.power.readOp.leakage,
+ (*niter)->nuca_pda.cycle_time);
+
+
+ if (g_ip->ed == 1) {
+ cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else if (g_ip->ed == 2) {
+ cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.delay / minval->min_delay) *
+ ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ v = check_nuca_org((*niter), minval);
+ if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
+
+ if (v) {
+ cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) +
+ c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
+ dp * ((*niter)->nuca_pda.power.readOp.dynamic /
+ minval->min_dyn) +
+ lp * ((*niter)->nuca_pda.power.readOp.leakage /
+ minval->min_leakage) +
+ a * ((*niter)->nuca_pda.area.get_area() /
+ minval->min_area));
+ fprintf(stderr, "cost = %g\n", cost);
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ res = ((*niter));
+ }
+ } else {
+ niter = n->erase(niter);
+ if (niter != n->begin())
+ niter --;
+ }
}
- }
- else {
- niter = n->erase(niter);
- if (niter !=n->begin())
- niter --;
- }
}
- }
- return res;
+ return res;
}
- int
-Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
-{
- if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev_nuca) {
- return 0;
- }
- if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev_nuca) {
- return 0;
- }
- return 1;
+int
+Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
+ if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
+ g_ip->delay_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
+ minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
+ minval->min_leakage)*100 >
+ g_ip->leakage_power_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
+ g_ip->cycle_time_dev_nuca) {
+ return 0;
+ }
+ if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
+ 100 >
+ g_ip->area_dev_nuca) {
+ return 0;
+ }
+ return 1;
}
- void
-Nuca::calculate_nuca_area (nuca_org_t *nuca)
-{
- nuca->nuca_pda.area.h=
- nuca->rows * ((nuca->h_wire->wire_width +
- nuca->h_wire->wire_spacing)
- * nuca->router->flit_size +
- nuca->bank_pda.area.h);
-
- nuca->nuca_pda.area.w =
- nuca->columns * ((nuca->v_wire->wire_width +
- nuca->v_wire->wire_spacing)
- * nuca->router->flit_size +
- nuca->bank_pda.area.w);
+void
+Nuca::calculate_nuca_area (nuca_org_t *nuca) {
+ nuca->nuca_pda.area.h =
+ nuca->rows * ((nuca->h_wire->wire_width +
+ nuca->h_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.h);
+
+ nuca->nuca_pda.area.w =
+ nuca->columns * ((nuca->v_wire->wire_width +
+ nuca->v_wire->wire_spacing)
+ * nuca->router->flit_size +
+ nuca->bank_pda.area.w);
}
diff --git a/ext/mcpat/cacti/nuca.h b/ext/mcpat/cacti/nuca.h
index adfe32564..38cca6f70 100644
--- a/ext/mcpat/cacti/nuca.h
+++ b/ext/mcpat/cacti/nuca.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -46,8 +47,8 @@
#include "wire.h"
class nuca_org_t {
- public:
- ~nuca_org_t();
+public:
+ ~nuca_org_t();
// int size;
/* area, power, access time, and cycle time stats */
Component nuca_pda;
@@ -71,9 +72,8 @@ class nuca_org_t {
-class Nuca : public Component
-{
- public:
+class Nuca : public Component {
+public:
Nuca(
TechnologyParameter::DeviceType *dt);
void print_router();
@@ -87,12 +87,12 @@ class Nuca : public Component
void print_nuca(nuca_org_t *n);
void print_cont_stats();
- private:
+private:
TechnologyParameter::DeviceType *deviceType;
int wt_min, wt_max;
Wire *wire_vertical[WIRE_TYPES],
- *wire_horizontal[WIRE_TYPES];
+ *wire_horizontal[WIRE_TYPES];
};
diff --git a/ext/mcpat/cacti/parameter.cc b/ext/mcpat/cacti/parameter.cc
index b71640c19..f7184d8a9 100644
--- a/ext/mcpat/cacti/parameter.cc
+++ b/ext/mcpat/cacti/parameter.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -46,147 +47,141 @@ TechnologyParameter g_tp;
-void TechnologyParameter::DeviceType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
- cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
- cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
- cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
- cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
- cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
- cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
- cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
- cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
- cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
- cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
- cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
- cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
- cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
- cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
- cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
- cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
+void TechnologyParameter::DeviceType::display(uint32_t indent) {
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
+ cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
+ cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
+ cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
+ cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
+ cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
+ cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
+ cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
+ cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
+ cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
+ cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
+ cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
+ cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
+ cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
+ cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
+ cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
+ cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
}
-void TechnologyParameter::InterconnectType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
+void TechnologyParameter::InterconnectType::display(uint32_t indent) {
+ string indent_str(indent, ' ');
- cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
- cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
- cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
+ cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
+ cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
+ cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
}
-void TechnologyParameter::ScalingFactor::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
+void TechnologyParameter::ScalingFactor::display(uint32_t indent) {
+ string indent_str(indent, ' ');
- cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
- cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
+ cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
+ cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
}
-void TechnologyParameter::MemoryType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
+void TechnologyParameter::MemoryType::display(uint32_t indent) {
+ string indent_str(indent, ' ');
- cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
- cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
- cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
- cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
- cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
- cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
+ cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
+ cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
+ cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
+ cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
+ cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
+ cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
}
-void TechnologyParameter::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
- cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
- cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
- cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
- cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
- cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
- cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
- cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
- cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
- cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
- cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
- cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
- cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
- cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
- cout << endl;
- cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
- cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
- cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
- cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
- cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
- cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
- cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
- cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
- cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
- cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
- cout << endl;
- cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
- cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
- cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
- cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
- cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
- cout << endl;
- cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
- cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
- cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
- cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
- cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
- cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
- cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
-
- cout << endl;
- cout << indent_str << "SRAM cell transistor: " << endl;
- sram_cell.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM access transistor: " << endl;
- dram_acc.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM wordline transistor: " << endl;
- dram_wl.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "peripheral global transistor: " << endl;
- peri_global.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire local" << endl;
- wire_local.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire inside mat" << endl;
- wire_inside_mat.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire outside mat" << endl;
- wire_outside_mat.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "SRAM" << endl;
- sram.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM" << endl;
- dram.display(indent + 2);
+void TechnologyParameter::display(uint32_t indent) {
+ string indent_str(indent, ' ');
+
+ cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
+ cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
+ cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
+ cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
+ cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
+ cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
+ cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
+ cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
+ cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
+ cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
+ cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
+ cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
+ cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
+ cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
+ cout << endl;
+ cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
+ cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
+ cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
+ cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
+ cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
+ cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
+ cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
+ cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
+ cout << endl;
+ cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
+ cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
+ cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
+ cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
+ cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
+ cout << endl;
+ cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
+ cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
+ cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
+ cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
+ cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
+ cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
+ cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
+
+ cout << endl;
+ cout << indent_str << "SRAM cell transistor: " << endl;
+ sram_cell.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM access transistor: " << endl;
+ dram_acc.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM wordline transistor: " << endl;
+ dram_wl.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "peripheral global transistor: " << endl;
+ peri_global.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire local" << endl;
+ wire_local.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire inside mat" << endl;
+ wire_inside_mat.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "wire outside mat" << endl;
+ wire_outside_mat.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "SRAM" << endl;
+ sram.display(indent + 2);
+
+ cout << endl;
+ cout << indent_str << "DRAM" << endl;
+ dram.display(indent + 2);
}
DynamicParameter::DynamicParameter():
- use_inp_params(0), cell(), is_valid(true)
-{
+ use_inp_params(0), cell(), is_valid(true) {
}
@@ -202,512 +197,433 @@ DynamicParameter::DynamicParameter(
unsigned int Ndsam_lev_1_,
unsigned int Ndsam_lev_2_,
bool is_main_mem_):
- is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
- Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
- number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
- is_main_mem(is_main_mem_), cell(), is_valid(false)
-{
- ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
-
- unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
- const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
- fully_assoc = (g_ip->fully_assoc) ? true : false;
-
- if (fully_assoc || pure_cam)
- { // fully-assocative cache -- ref: CACTi 2.0 report
- if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
- Ndcm != 1 || //Ndcm is fixed to 1 for FA
- Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
- Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
- Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
- Ndbl < 2)
- {
- return;
- }
- }
-
- if ((is_dram) && (!is_tag) && (Ndcm > 1))
- {
- return; // For a DRAM array, each bitline has its own sense-amp
- }
-
- // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
- // at least two because an array is assumed to have at least one mat. And a mat
- // is formed out of two horizontal subarrays and two vertical subarrays
- if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
- {
- return;
- }
-
- //***********compute row, col of an subarray
- if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
- {
- // if data array, let tagbits = 0
- if (is_tag)
- {
- if (g_ip->specific_tag)
- {
- tagbits = g_ip->tag_w;
- }
- else
- {
- tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
- _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks);
-
- }
- tagbits = (((tagbits + 3) >> 2) << 2);
-
- num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
- g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
- num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
- //burst_length = 1;
- }
- else
- {
- num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
- g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
- num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
- // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
- }
-
- if (num_r_subarray < MINSUBARRAYROWS) return;
- if (num_r_subarray == 0) return;
- if (num_r_subarray > MAXSUBARRAYROWS) return;
- if (num_c_subarray < MINSUBARRAYCOLS) return;
- if (num_c_subarray > MAXSUBARRAYCOLS) return;
-
- }
-
- else
- {//either fully-asso or cam
- if (pure_cam)
- {
- if (g_ip->specific_tag)
- {
- tagbits = int(ceil(g_ip->tag_w/8.0)*8);
- }
- else
- {
- tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8);
+ is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0),
+ Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_),
+ Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
+ number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
+ is_main_mem(is_main_mem_), cell(), is_valid(false) {
+ ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+
+ unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
+ const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
+ fully_assoc = (g_ip->fully_assoc) ? true : false;
+
+ // fully-assocative cache -- ref: CACTi 2.0 report
+ if (fully_assoc || pure_cam) {
+ if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
+ Ndcm != 1 || //Ndcm is fixed to 1 for FA
+ Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
+ Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
+ Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
+ Ndbl < 2) {
+ return;
+ }
+ }
+
+ if ((is_dram) && (!is_tag) && (Ndcm > 1)) {
+ return; // For a DRAM array, each bitline has its own sense-amp
+ }
+
+ // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
+ // at least two because an array is assumed to have at least one mat. And a mat
+ // is formed out of two horizontal subarrays and two vertical subarrays
+ if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) {
+ return;
+ }
+
+ //***********compute row, col of an subarray
+ if (!(fully_assoc || pure_cam)) {
+ //Not fully_asso nor cam
+ // if data array, let tagbits = 0
+ if (is_tag) {
+ if (g_ip->specific_tag) {
+ tagbits = g_ip->tag_w;
+ } else {
+ tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
+ _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks);
+
+ }
+ tagbits = (((tagbits + 3) >> 2) << 2);
+
+ num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
+ g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
+ num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
+ //burst_length = 1;
+ } else {
+ num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
+ g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
+ num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
+ // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
+ }
+
+ if (num_r_subarray < MINSUBARRAYROWS) return;
+ if (num_r_subarray == 0) return;
+ if (num_r_subarray > MAXSUBARRAYROWS) return;
+ if (num_c_subarray < MINSUBARRAYCOLS) return;
+ if (num_c_subarray > MAXSUBARRAYCOLS) return;
+
+ }
+
+ else {//either fully-asso or cam
+ if (pure_cam) {
+ if (g_ip->specific_tag) {
+ tagbits = int(ceil(g_ip->tag_w / 8.0) * 8);
+ } else {
+ tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8);
// cout<<"Pure CAM needs tag width to be specified"<<endl;
// exit(0);
- }
- //tagbits = (((tagbits + 3) >> 2) << 2);
-
- tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
- //tag_num_c_subarray = (int)(tagbits + EPSILON);
- tag_num_c_subarray = tagbits;
- if (tag_num_r_subarray == 0) return;
- if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
- if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
- if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
- num_r_subarray = tag_num_r_subarray;
- }
- else //fully associative
- {
- if (g_ip->specific_tag)
- {
- tagbits = g_ip->tag_w;
- }
- else
- {
- tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
- }
- tagbits = (((tagbits + 3) >> 2) << 2);
-
- tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
- tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
- if (tag_num_r_subarray == 0) return;
- if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
- if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
- if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
-
- data_num_r_subarray = tag_num_r_subarray;
- data_num_c_subarray = 8 * g_ip->block_sz;
- if (data_num_r_subarray == 0) return;
- if (data_num_r_subarray > MAXSUBARRAYROWS) return;
- if (data_num_c_subarray < MINSUBARRAYCOLS) return;
- if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
- num_r_subarray = tag_num_r_subarray;
- }
- }
-
- num_subarrays = Ndwl * Ndbl;
- //****************end of computation of row, col of an subarray
-
- // calculate wire parameters
- if (fully_assoc || pure_cam)
- {
- cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
- cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
-
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
- + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
- }
- else
- {
- if(is_tag)
- {
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
- g_ip->num_wr_ports);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
- (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
- wire_local.pitch * g_ip->num_se_rd_ports;
- }
- else
- {
- if (is_dram)
- {
- cell.h = g_tp.dram.b_h;
- cell.w = g_tp.dram.b_w;
- }
- else
- {
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
- g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
- (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
- g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
- }
- }
- }
-
- double c_b_metal = cell.h * wire_local.C_per_um;
- double C_bl;
-
- if (!(fully_assoc || pure_cam))
- {
- if (is_dram)
- {
- deg_bl_muxing = 1;
- if (ram_cell_tech_type == comm_dram)
- {
- C_bl = num_r_subarray * c_b_metal;
- V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
- if (V_b_sense < VBITSENSEMIN)
- {
- return;
- }
- V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
- dram_refresh_period = 64e-3;
- }
- else
- {
- double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
-
- if (V_b_sense < VBITSENSEMIN)
- {
- return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
- }
- V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
- //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
- //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
- dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
- }
- }
- else
- { //SRAM
- V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
- deg_bl_muxing = Ndcm;
- // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
- // contacts in a physical layout
- double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- dram_refresh_period = 0;
- }
- }
- else
- {
- c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
- V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
- deg_bl_muxing = 1;//FA fix as 1
- // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
- // contacts in a physical layout
- double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- dram_refresh_period = 0;
- }
-
-
- // do/di: data in/out, for fully associative they are the data width for normal read and write
- // so/si: search data in/out, for fully associative they are the data width for the search ops
- // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
- // so/si needs broadcase while do/di do not
-
- if (fully_assoc || pure_cam)
- {
- switch (Ndbl) {
- case (0):
- cout << " Invalid Ndbl \n"<<endl;
- exit(0);
- break;
- case (1):
- num_mats_h_dir = 1;//one subarray per mat
- num_mats_v_dir = 1;
- break;
- case (2):
- num_mats_h_dir = 1;//two subarrays per mat
- num_mats_v_dir = 1;
- break;
- default:
- num_mats_h_dir = int(floor(sqrt(Ndbl/4.0)));//4 subbarrys per mat
- num_mats_v_dir = int(Ndbl/4.0 / num_mats_h_dir);
}
- num_mats = num_mats_h_dir * num_mats_v_dir;
-
- if (fully_assoc)
- {
- num_so_b_mat = data_num_c_subarray;
- num_do_b_mat = data_num_c_subarray + tagbits;
+ //tagbits = (((tagbits + 3) >> 2) << 2);
+
+ //TODO: error check input of tagbits and blocksize
+ //TODO: for pure CAM, g_ip->block should be number of entries.
+ tag_num_r_subarray = (int)ceil(capacity_per_die /
+ (g_ip->nbanks * tagbits / 8.0 * Ndbl));
+ //tag_num_c_subarray = (int)(tagbits + EPSILON);
+ tag_num_c_subarray = tagbits;
+ if (tag_num_r_subarray == 0) return;
+ if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
+ num_r_subarray = tag_num_r_subarray;
+ } else { //fully associative
+ if (g_ip->specific_tag) {
+ tagbits = g_ip->tag_w;
+ } else {
+ tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
+ }
+ tagbits = (((tagbits + 3) >> 2) << 2);
+
+ tag_num_r_subarray = (int)(capacity_per_die /
+ (g_ip->nbanks * g_ip->block_sz * Ndbl));
+ tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
+ if (tag_num_r_subarray == 0) return;
+ if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
+
+ data_num_r_subarray = tag_num_r_subarray;
+ data_num_c_subarray = 8 * g_ip->block_sz;
+ if (data_num_r_subarray == 0) return;
+ if (data_num_r_subarray > MAXSUBARRAYROWS) return;
+ if (data_num_c_subarray < MINSUBARRAYCOLS) return;
+ if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
+ num_r_subarray = tag_num_r_subarray;
+ }
+ }
+
+ num_subarrays = Ndwl * Ndbl;
+ //****************end of computation of row, col of an subarray
+
+ // calculate wire parameters
+ if (fully_assoc || pure_cam) {
+ cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch *
+ (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
+ + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+ cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch *
+ (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
+ + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch *
+ (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports)
+ + 2 * wire_local.pitch * (g_ip->num_search_ports - 1);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch *
+ (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports -
+ g_ip->num_se_rd_ports)
+ + g_ip->num_wr_ports) + g_tp.wire_local.pitch *
+ g_ip->num_se_rd_ports + 2 * wire_local.pitch *
+ (g_ip->num_search_ports - 1);
+ } else {
+ if (is_tag) {
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
+ g_ip->num_wr_ports);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
+ (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
+ wire_local.pitch * g_ip->num_se_rd_ports;
+ } else {
+ if (is_dram) {
+ cell.h = g_tp.dram.b_h;
+ cell.w = g_tp.dram.b_w;
+ } else {
+ cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
+ g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
+ cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
+ (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
+ g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
}
- else
- {
- num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
- num_do_b_mat = tagbits;
+ }
+ }
+
+ double c_b_metal = cell.h * wire_local.C_per_um;
+ double C_bl;
+
+ if (!(fully_assoc || pure_cam)) {
+ if (is_dram) {
+ deg_bl_muxing = 1;
+ if (ram_cell_tech_type == comm_dram) {
+ C_bl = num_r_subarray * c_b_metal;
+ V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl);
+ if (V_b_sense < VBITSENSEMIN) {
+ return;
+ }
+ V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
+ dram_refresh_period = 64e-3;
+ } else {
+ double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl);
+
+ if (V_b_sense < VBITSENSEMIN) {
+ return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
+ }
+ V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
+ //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
+ //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
+ dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
}
- }
- else
- {
- num_mats_h_dir = MAX(Ndwl / 2, 1);
- num_mats_v_dir = MAX(Ndbl / 2, 1);
- num_mats = num_mats_h_dir * num_mats_v_dir;
- num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
- }
-
- if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats)))
- {
- return;
- }
-
-
- int deg_sa_mux_l1_non_assoc;
- //TODO:the i/o for subbank is not necessary and should be removed.
- if (!(fully_assoc || pure_cam))
- {
- if (!is_tag)
- {
- if (is_main_mem == true)
- {
- num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- }
- else
- {
- if (g_ip->fast_access == true)
- {
- num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- }
- else
- {
-
- num_do_b_subbank = g_ip->out_w;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
- if (deg_sa_mux_l1_non_assoc < 1)
- {
- return;
- }
-
- }
- }
- }
- else
- {
- num_do_b_subbank = tagbits * g_ip->tag_assoc;
- if (num_do_b_mat < tagbits)
- {
- return;
- }
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
- num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
- }
- else
- {
- num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
- num_do_b_subbank = tag_num_c_subarray;
- }
-
- deg_sa_mux_l1_non_assoc = 1;
- }
-
- deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
-
- if (fully_assoc || pure_cam)
- {
- num_act_mats_hor_dir = 1;
- num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
- }
- else
- {
- num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
- if (num_act_mats_hor_dir == 0)
- {
- return;
- }
- }
-
- //compute num_do_mat for tag
- if (is_tag)
- {
- if (!(fully_assoc || pure_cam))
- {
- num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
- num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
- }
- }
-
- if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
- {
- if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
- {
- return;
- }
- }
+ } else { //SRAM
+ V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
+ deg_bl_muxing = Ndcm;
+ // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
+ // contacts in a physical layout
+ double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ dram_refresh_period = 0;
+ }
+ } else {
+ c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
+ V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
+ deg_bl_muxing = 1;//FA fix as 1
+ // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
+ // contacts in a physical layout
+ double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
+ C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
+ dram_refresh_period = 0;
+ }
+
+
+ // do/di: data in/out, for fully associative they are the data width for normal read and write
+ // so/si: search data in/out, for fully associative they are the data width for the search ops
+ // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
+ // so/si needs broadcase while do/di do not
+
+ if (fully_assoc || pure_cam) {
+ switch (Ndbl) {
+ case (0):
+ cout << " Invalid Ndbl \n" << endl;
+ exit(0);
+ break;
+ case (1):
+ num_mats_h_dir = 1;//one subarray per mat
+ num_mats_v_dir = 1;
+ break;
+ case (2):
+ num_mats_h_dir = 1;//two subarrays per mat
+ num_mats_v_dir = 1;
+ break;
+ default:
+ num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat
+ num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir);
+ }
+ num_mats = num_mats_h_dir * num_mats_v_dir;
+
+ if (fully_assoc) {
+ num_so_b_mat = data_num_c_subarray;
+ num_do_b_mat = data_num_c_subarray + tagbits;
+ } else {
+ num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
+ num_do_b_mat = tagbits;
+ }
+ } else {
+ num_mats_h_dir = MAX(Ndwl / 2, 1);
+ num_mats_v_dir = MAX(Ndbl / 2, 1);
+ num_mats = num_mats_h_dir * num_mats_v_dir;
+ num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray /
+ (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
+ }
+
+ if (!(fully_assoc || pure_cam) && (num_do_b_mat <
+ (num_subarrays / num_mats))) {
+ return;
+ }
+
+
+ int deg_sa_mux_l1_non_assoc;
+ //TODO:the i/o for subbank is not necessary and should be removed.
+ if (!(fully_assoc || pure_cam)) {
+ if (!is_tag) {
+ if (is_main_mem == true) {
+ num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ } else {
+ if (g_ip->fast_access == true) {
+ num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ } else {
+
+ num_do_b_subbank = g_ip->out_w;
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
+ if (deg_sa_mux_l1_non_assoc < 1) {
+ return;
+ }
+
+ }
+ }
+ } else {
+ num_do_b_subbank = tagbits * g_ip->tag_assoc;
+ if (num_do_b_mat < tagbits) {
+ return;
+ }
+ deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
+ //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
+ }
+ } else {
+ if (fully_assoc) {
+ num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
+ num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
+ } else {
+ num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
+ num_do_b_subbank = tag_num_c_subarray;
+ }
+
+ deg_sa_mux_l1_non_assoc = 1;
+ }
+
+ deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
+
+ if (fully_assoc || pure_cam) {
+ num_act_mats_hor_dir = 1;
+ num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
+ } else {
+ num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
+ if (num_act_mats_hor_dir == 0) {
+ return;
+ }
+ }
+
+ //compute num_do_mat for tag
+ if (is_tag) {
+ if (!(fully_assoc || pure_cam)) {
+ num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
+ num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
+ }
+ }
+
+ if ((g_ip->is_cache == false && is_main_mem == true) ||
+ (PAGE_MODE == 1 && is_dram)) {
+ if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 !=
+ (int)g_ip->page_sz_bits) {
+ return;
+ }
+ }
// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
- if (is_tag == false && g_ip->is_main_mem == true &&
- num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
- {
- return;
- }
-
- if (num_act_mats_hor_dir > num_mats_h_dir)
- {
- return;
- }
-
-
- //compute di for mat subbank and bank
- if (!(fully_assoc ||pure_cam))
- {
- if(!is_tag)
- {
- if(g_ip->fast_access == true)
- {
- num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
- }
- else
- {
- num_di_b_mat = num_do_b_mat;
- }
- }
- else
- {
- num_di_b_mat = tagbits;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_di_b_mat = num_do_b_mat;
- //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
- //but inside the mat wire tracks need to be reserved for search data bus
- num_si_b_mat = tagbits;
- }
- else
- {
- num_di_b_mat = tagbits;
- num_si_b_mat = tagbits;//*num_subarrays/num_mats;
- }
-
- }
-
- num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
- num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
-
- int num_addr_b_row_dec = _log2(num_r_subarray);
- if ((fully_assoc ||pure_cam))
- num_addr_b_row_dec +=_log2(num_subarrays/num_mats);
- int number_subbanks = num_mats / num_act_mats_hor_dir;
- number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
-
- num_rw_ports = g_ip->num_rw_ports;
- num_rd_ports = g_ip->num_rd_ports;
- num_wr_ports = g_ip->num_wr_ports;
- num_se_rd_ports = g_ip->num_se_rd_ports;
- num_search_ports = g_ip->num_search_ports;
-
- if (is_dram && is_main_mem)
- {
- number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
- _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
- }
- else
- {
- number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
- _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
- }
-
- if (!(fully_assoc ||pure_cam))
- {
- if (is_tag)
- {
- num_di_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = g_ip->data_assoc;
- }
- else
- {
- num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
- num_do_b_bank_per_port = g_ip->out_w;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
- num_si_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = g_ip->out_w + tagbits;
- num_so_b_bank_per_port = g_ip->out_w;
- }
- else
- {
- num_di_b_bank_per_port = tagbits;
- num_si_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = tagbits;
- num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
- }
- }
-
- if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
- {
- number_way_select_signals_mat = g_ip->data_assoc;
- }
-
- // add ECC adjustment to all data signals that traverse on H-trees.
- if (g_ip->add_ecc_b_ == true)
- {
- num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
- num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
- num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
- num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
- num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
- num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
-
- num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
- num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
- num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
- num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
- num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
- num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
- }
-
- is_valid = true;
+ if (is_tag == false && g_ip->is_main_mem == true &&
+ num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 <
+ ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) {
+ return;
+ }
+
+ if (num_act_mats_hor_dir > num_mats_h_dir) {
+ return;
+ }
+
+
+ //compute di for mat subbank and bank
+ if (!(fully_assoc || pure_cam)) {
+ if (!is_tag) {
+ if (g_ip->fast_access == true) {
+ num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
+ } else {
+ num_di_b_mat = num_do_b_mat;
+ }
+ } else {
+ num_di_b_mat = tagbits;
+ }
+ } else {
+ if (fully_assoc) {
+ num_di_b_mat = num_do_b_mat;
+ //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
+ //but inside the mat wire tracks need to be reserved for search data bus
+ num_si_b_mat = tagbits;
+ } else {
+ num_di_b_mat = tagbits;
+ num_si_b_mat = tagbits;//*num_subarrays/num_mats;
+ }
+
+ }
+
+ num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
+ num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
+
+ int num_addr_b_row_dec = _log2(num_r_subarray);
+ if ((fully_assoc || pure_cam))
+ num_addr_b_row_dec += _log2(num_subarrays / num_mats);
+ int number_subbanks = num_mats / num_act_mats_hor_dir;
+ number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
+
+ num_rw_ports = g_ip->num_rw_ports;
+ num_rd_ports = g_ip->num_rd_ports;
+ num_wr_ports = g_ip->num_wr_ports;
+ num_se_rd_ports = g_ip->num_se_rd_ports;
+ num_search_ports = g_ip->num_search_ports;
+
+ if (is_dram && is_main_mem) {
+ number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
+ _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
+ } else {
+ number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
+ _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
+ }
+
+ if (!(fully_assoc || pure_cam)) {
+ if (is_tag) {
+ num_di_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = g_ip->data_assoc;
+ } else {
+ num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
+ num_do_b_bank_per_port = g_ip->out_w;
+ }
+ } else {
+ if (fully_assoc) {
+ num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
+ num_si_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = g_ip->out_w + tagbits;
+ num_so_b_bank_per_port = g_ip->out_w;
+ } else {
+ num_di_b_bank_per_port = tagbits;
+ num_si_b_bank_per_port = tagbits;
+ num_do_b_bank_per_port = tagbits;
+ num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
+ }
+ }
+
+ if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) {
+ number_way_select_signals_mat = g_ip->data_assoc;
+ }
+
+ // add ECC adjustment to all data signals that traverse on H-trees.
+ if (g_ip->add_ecc_b_ == true) {
+ num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
+ num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
+ num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
+ num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
+ num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
+ num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
+
+ num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
+ num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
+ num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
+ num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
+ num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
+ num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
+ }
+
+ is_valid = true;
}
diff --git a/ext/mcpat/cacti/parameter.h b/ext/mcpat/cacti/parameter.h
index 9c827bbc8..573b726a6 100644
--- a/ext/mcpat/cacti/parameter.h
+++ b/ext/mcpat/cacti/parameter.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -40,251 +41,242 @@
#include "io.h"
// parameters which are functions of certain device technology
-class TechnologyParameter
-{
- public:
- class DeviceType
- {
- public:
- double C_g_ideal;
- double C_fringe;
- double C_overlap;
- double C_junc; // C_junc_area
- double C_junc_sidewall;
- double l_phy;
- double l_elec;
- double R_nch_on;
- double R_pch_on;
- double Vdd;
- double Vth;
- double I_on_n;
- double I_on_p;
- double I_off_n;
- double I_off_p;
- double I_g_on_n;
- double I_g_on_p;
- double C_ox;
- double t_ox;
- double n_to_p_eff_curr_drv_ratio;
- double long_channel_leakage_reduction;
-
- DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
- C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
- Vdd(0), Vth(0),
- I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
- C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { };
- void reset()
- {
- C_g_ideal = 0;
- C_fringe = 0;
- C_overlap = 0;
- C_junc = 0;
- l_phy = 0;
- l_elec = 0;
- R_nch_on = 0;
- R_pch_on = 0;
- Vdd = 0;
- Vth = 0;
- I_on_n = 0;
- I_on_p = 0;
- I_off_n = 0;
- I_off_p = 0;
- I_g_on_n = 0;
- I_g_on_p = 0;
- C_ox = 0;
- t_ox = 0;
- n_to_p_eff_curr_drv_ratio = 0;
- long_channel_leakage_reduction = 0;
- }
+class TechnologyParameter {
+public:
+ class DeviceType {
+ public:
+ double C_g_ideal;
+ double C_fringe;
+ double C_overlap;
+ double C_junc; // C_junc_area
+ double C_junc_sidewall;
+ double l_phy;
+ double l_elec;
+ double R_nch_on;
+ double R_pch_on;
+ double Vdd;
+ double Vth;
+ double I_on_n;
+ double I_on_p;
+ double I_off_n;
+ double I_off_p;
+ double I_g_on_n;
+ double I_g_on_p;
+ double C_ox;
+ double t_ox;
+ double n_to_p_eff_curr_drv_ratio;
+ double long_channel_leakage_reduction;
+
+ DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
+ C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
+ Vdd(0), Vth(0),
+ I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0),
+ I_g_on_p(0),
+ C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0),
+ long_channel_leakage_reduction(0) { };
+ void reset() {
+ C_g_ideal = 0;
+ C_fringe = 0;
+ C_overlap = 0;
+ C_junc = 0;
+ l_phy = 0;
+ l_elec = 0;
+ R_nch_on = 0;
+ R_pch_on = 0;
+ Vdd = 0;
+ Vth = 0;
+ I_on_n = 0;
+ I_on_p = 0;
+ I_off_n = 0;
+ I_off_p = 0;
+ I_g_on_n = 0;
+ I_g_on_p = 0;
+ C_ox = 0;
+ t_ox = 0;
+ n_to_p_eff_curr_drv_ratio = 0;
+ long_channel_leakage_reduction = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+ class InterconnectType {
+ public:
+ double pitch;
+ double R_per_um;
+ double C_per_um;
+ double horiz_dielectric_constant;
+ double vert_dielectric_constant;
+ double aspect_ratio;
+ double miller_value;
+ double ild_thickness;
+
+ InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
+
+ void reset() {
+ pitch = 0;
+ R_per_um = 0;
+ C_per_um = 0;
+ horiz_dielectric_constant = 0;
+ vert_dielectric_constant = 0;
+ aspect_ratio = 0;
+ miller_value = 0;
+ ild_thickness = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+ class MemoryType {
+ public:
+ double b_w;
+ double b_h;
+ double cell_a_w;
+ double cell_pmos_w;
+ double cell_nmos_w;
+ double Vbitpre;
+
+ void reset() {
+ b_w = 0;
+ b_h = 0;
+ cell_a_w = 0;
+ cell_pmos_w = 0;
+ cell_nmos_w = 0;
+ Vbitpre = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+
+ class ScalingFactor {
+ public:
+ double logic_scaling_co_eff;
+ double core_tx_density;
+ double long_channel_leakage_reduction;
+
+ ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
+ long_channel_leakage_reduction(0) { };
+
+ void reset() {
+ logic_scaling_co_eff = 0;
+ core_tx_density = 0;
+ long_channel_leakage_reduction = 0;
+ }
+
+ void display(uint32_t indent = 0);
+ };
+
+ double ram_wl_stitching_overhead_;
+ double min_w_nmos_;
+ double max_w_nmos_;
+ double max_w_nmos_dec;
+ double unit_len_wire_del;
+ double FO4;
+ double kinv;
+ double vpp;
+ double w_sense_en;
+ double w_sense_n;
+ double w_sense_p;
+ double sense_delay;
+ double sense_dy_power;
+ double w_iso;
+ double w_poly_contact;
+ double spacing_poly_to_poly;
+ double spacing_poly_to_contact;
+
+ double w_comp_inv_p1;
+ double w_comp_inv_p2;
+ double w_comp_inv_p3;
+ double w_comp_inv_n1;
+ double w_comp_inv_n2;
+ double w_comp_inv_n3;
+ double w_eval_inv_p;
+ double w_eval_inv_n;
+ double w_comp_n;
+ double w_comp_p;
+
+ double dram_cell_I_on;
+ double dram_cell_Vdd;
+ double dram_cell_I_off_worst_case_len_temp;
+ double dram_cell_C;
+ double gm_sense_amp_latch;
+
+ double w_nmos_b_mux;
+ double w_nmos_sa_mux;
+ double w_pmos_bl_precharge;
+ double w_pmos_bl_eq;
+ double MIN_GAP_BET_P_AND_N_DIFFS;
+ double MIN_GAP_BET_SAME_TYPE_DIFFS;
+ double HPOWERRAIL;
+ double cell_h_def;
+
+ double chip_layout_overhead;
+ double macro_layout_overhead;
+ double sckt_co_eff;
+
+ double fringe_cap;
+
+ uint64_t h_dec;
+
+ DeviceType sram_cell; // SRAM cell transistor
+ DeviceType dram_acc; // DRAM access transistor
+ DeviceType dram_wl; // DRAM wordline transistor
+ DeviceType peri_global; // peripheral global
+ DeviceType cam_cell; // SRAM cell transistor
+
+ InterconnectType wire_local;
+ InterconnectType wire_inside_mat;
+ InterconnectType wire_outside_mat;
+
+ ScalingFactor scaling_factor;
+
+ MemoryType sram;
+ MemoryType dram;
+ MemoryType cam;
void display(uint32_t indent = 0);
- };
- class InterconnectType
- {
- public:
- double pitch;
- double R_per_um;
- double C_per_um;
- double horiz_dielectric_constant;
- double vert_dielectric_constant;
- double aspect_ratio;
- double miller_value;
- double ild_thickness;
-
- InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
-
- void reset()
- {
- pitch = 0;
- R_per_um = 0;
- C_per_um = 0;
- horiz_dielectric_constant = 0;
- vert_dielectric_constant = 0;
- aspect_ratio = 0;
- miller_value = 0;
- ild_thickness = 0;
- }
- void display(uint32_t indent = 0);
- };
- class MemoryType
- {
- public:
- double b_w;
- double b_h;
- double cell_a_w;
- double cell_pmos_w;
- double cell_nmos_w;
- double Vbitpre;
-
- void reset()
- {
- b_w = 0;
- b_h = 0;
- cell_a_w = 0;
- cell_pmos_w = 0;
- cell_nmos_w = 0;
- Vbitpre = 0;
- }
+ void reset() {
+ dram_cell_Vdd = 0;
+ dram_cell_I_on = 0;
+ dram_cell_C = 0;
+ vpp = 0;
- void display(uint32_t indent = 0);
- };
-
- class ScalingFactor
- {
- public:
- double logic_scaling_co_eff;
- double core_tx_density;
- double long_channel_leakage_reduction;
-
- ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
- long_channel_leakage_reduction(0) { };
-
- void reset()
- {
- logic_scaling_co_eff= 0;
- core_tx_density = 0;
- long_channel_leakage_reduction= 0;
- }
-
- void display(uint32_t indent = 0);
- };
-
- double ram_wl_stitching_overhead_;
- double min_w_nmos_;
- double max_w_nmos_;
- double max_w_nmos_dec;
- double unit_len_wire_del;
- double FO4;
- double kinv;
- double vpp;
- double w_sense_en;
- double w_sense_n;
- double w_sense_p;
- double sense_delay;
- double sense_dy_power;
- double w_iso;
- double w_poly_contact;
- double spacing_poly_to_poly;
- double spacing_poly_to_contact;
-
- double w_comp_inv_p1;
- double w_comp_inv_p2;
- double w_comp_inv_p3;
- double w_comp_inv_n1;
- double w_comp_inv_n2;
- double w_comp_inv_n3;
- double w_eval_inv_p;
- double w_eval_inv_n;
- double w_comp_n;
- double w_comp_p;
-
- double dram_cell_I_on;
- double dram_cell_Vdd;
- double dram_cell_I_off_worst_case_len_temp;
- double dram_cell_C;
- double gm_sense_amp_latch;
-
- double w_nmos_b_mux;
- double w_nmos_sa_mux;
- double w_pmos_bl_precharge;
- double w_pmos_bl_eq;
- double MIN_GAP_BET_P_AND_N_DIFFS;
- double MIN_GAP_BET_SAME_TYPE_DIFFS;
- double HPOWERRAIL;
- double cell_h_def;
-
- double chip_layout_overhead;
- double macro_layout_overhead;
- double sckt_co_eff;
-
- double fringe_cap;
-
- uint64_t h_dec;
-
- DeviceType sram_cell; // SRAM cell transistor
- DeviceType dram_acc; // DRAM access transistor
- DeviceType dram_wl; // DRAM wordline transistor
- DeviceType peri_global; // peripheral global
- DeviceType cam_cell; // SRAM cell transistor
-
- InterconnectType wire_local;
- InterconnectType wire_inside_mat;
- InterconnectType wire_outside_mat;
-
- ScalingFactor scaling_factor;
-
- MemoryType sram;
- MemoryType dram;
- MemoryType cam;
-
- void display(uint32_t indent = 0);
-
- void reset()
- {
- dram_cell_Vdd = 0;
- dram_cell_I_on = 0;
- dram_cell_C = 0;
- vpp = 0;
-
- sense_delay = 0;
- sense_dy_power = 0;
- fringe_cap = 0;
+ sense_delay = 0;
+ sense_dy_power = 0;
+ fringe_cap = 0;
// horiz_dielectric_constant = 0;
// vert_dielectric_constant = 0;
// aspect_ratio = 0;
// miller_value = 0;
// ild_thickness = 0;
- dram_cell_I_off_worst_case_len_temp = 0;
+ dram_cell_I_off_worst_case_len_temp = 0;
- sram_cell.reset();
- dram_acc.reset();
- dram_wl.reset();
- peri_global.reset();
- cam_cell.reset();
+ sram_cell.reset();
+ dram_acc.reset();
+ dram_wl.reset();
+ peri_global.reset();
+ cam_cell.reset();
- scaling_factor.reset();
+ scaling_factor.reset();
- wire_local.reset();
- wire_inside_mat.reset();
- wire_outside_mat.reset();
+ wire_local.reset();
+ wire_inside_mat.reset();
+ wire_outside_mat.reset();
- sram.reset();
- dram.reset();
- cam.reset();
+ sram.reset();
+ dram.reset();
+ cam.reset();
- chip_layout_overhead = 0;
- macro_layout_overhead = 0;
- sckt_co_eff = 0;
- }
+ chip_layout_overhead = 0;
+ macro_layout_overhead = 0;
+ sckt_co_eff = 0;
+ }
};
-class DynamicParameter
-{
- public:
+class DynamicParameter {
+public:
bool is_tag;
bool pure_ram;
bool pure_cam;
@@ -313,8 +305,8 @@ class DynamicParameter
int num_so_b_mat;
int num_si_b_subbank;
int num_so_b_subbank;
- int num_si_b_bank_per_port;
- int num_so_b_bank_per_port;
+ int num_si_b_bank_per_port;
+ int num_so_b_bank_per_port;
int number_way_select_signals_mat;
int num_act_mats_hor_dir;
diff --git a/ext/mcpat/cacti/router.cc b/ext/mcpat/cacti/router.cc
index 06f170691..d3368d946 100644
--- a/ext/mcpat/cacti/router.cc
+++ b/ext/mcpat/cacti/router.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -41,57 +42,56 @@ Router::Router(
double I_,
double O_,
double M_
- ):flit_size(flit_size_),
- deviceType(dt),
- I(I_),
- O(O_),
- M(M_)
-{
- vc_buffer_size = vc_buf;
- vc_count = vc_c;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- double technology = g_ip->F_sz_um;
-
- Vdd = dt->Vdd;
-
- /*Crossbar parameters. Transmisson gate is employed for connector*/
- NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
- PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
- wt = 15*technology*1e-6/2; /*track width*/
- ht = 15*technology*1e-6/2; /*track height*/
+ ): flit_size(flit_size_),
+ deviceType(dt),
+ I(I_),
+ O(O_),
+ M(M_) {
+ vc_buffer_size = vc_buf;
+ vc_count = vc_c;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ double technology = g_ip->F_sz_um;
+
+ Vdd = dt->Vdd;
+
+ /*Crossbar parameters. Transmisson gate is employed for connector*/
+ NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/
+ wt = 15 * technology * 1e-6 / 2; /*track width*/
+ ht = 15 * technology * 1e-6 / 2; /*track height*/
// I = 5; /*Number of crossbar input ports*/
// O = 5; /*Number of crossbar output ports*/
- NTi = 12.5*technology*1e-6/2;
- PTi = 25*technology*1e-6/2;
+ NTi = 12.5 * technology * 1e-6 / 2;
+ PTi = 25 * technology * 1e-6 / 2;
- NTid = 60*technology*1e-6/2; //m
- PTid = 120*technology*1e-6/2; // m
- NTod = 60*technology*1e-6/2; // m
- PTod = 120*technology*1e-6/2; // m
+ NTid = 60 * technology * 1e-6 / 2; //m
+ PTid = 120 * technology * 1e-6 / 2; // m
+ NTod = 60 * technology * 1e-6 / 2; // m
+ PTod = 120 * technology * 1e-6 / 2; // m
- calc_router_parameters();
+ calc_router_parameters();
}
-Router::~Router(){}
+Router::~Router() {}
double //wire cap with triple spacing
Router::Cw3(double length) {
- Wire wc(g_ip->wt, length, 1, 3, 3);
- return (wc.wire_cap(length));
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ return (wc.wire_cap(length));
}
/*Function to calculate the gate capacitance*/
double
Router::gate_cap(double w) {
- return (double) gate_C (w*1e6 /*u*/, 0);
+ return (double) gate_C (w*1e6 /*u*/, 0);
}
/*Function to calculate the diffusion capacitance*/
double
Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
- double s /*number of stacking transistors*/) {
- return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
+ double s /*number of stacking transistors*/) {
+ return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
}
@@ -100,212 +100,216 @@ Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
// Model for simple transmission gate
double
Router::transmission_buf_inpcap() {
- return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
+ return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
}
double
Router::transmission_buf_outcap() {
- return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
+ return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
}
double
Router::transmission_buf_ctrcap() {
- return gate_cap(NTtr)+gate_cap(PTtr);
+ return gate_cap(NTtr) + gate_cap(PTtr);
}
double
Router::crossbar_inpline() {
- return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
- gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
+ return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
+ gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
}
double
Router::crossbar_outline() {
- return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
- gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
+ return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
+ gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
}
double
Router::crossbar_ctrline() {
- return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
- diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
- gate_cap(NTi) + gate_cap(PTi));
+ return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
+ diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
+ gate_cap(NTi) + gate_cap(PTi));
}
double
Router::tr_crossbar_power() {
- return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
- crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
+ return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 +
+ crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2;
}
-void Router::buffer_stats()
-{
- DynamicParameter dyn_p;
- dyn_p.is_tag = false;
- dyn_p.pure_cam = false;
- dyn_p.fully_assoc = false;
- dyn_p.pure_ram = true;
- dyn_p.is_dram = false;
- dyn_p.is_main_mem = false;
- dyn_p.num_subarrays = 1;
- dyn_p.num_mats = 1;
- dyn_p.Ndbl = 1;
- dyn_p.Ndwl = 1;
- dyn_p.Nspd = 1;
- dyn_p.deg_bl_muxing = 1;
- dyn_p.deg_senseamp_muxing_non_associativity = 1;
- dyn_p.Ndsam_lev_1 = 1;
- dyn_p.Ndsam_lev_2 = 1;
- dyn_p.Ndcm = 1;
- dyn_p.number_addr_bits_mat = 8;
- dyn_p.number_way_select_signals_mat = 1;
- dyn_p.number_subbanks_decode = 0;
- dyn_p.num_act_mats_hor_dir = 1;
- dyn_p.V_b_sense = Vdd; // FIXME check power calc.
- dyn_p.ram_cell_tech_type = 0;
- dyn_p.num_r_subarray = (int) vc_buffer_size;
- dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
- dyn_p.num_mats_h_dir = 1;
- dyn_p.num_mats_v_dir = 1;
- dyn_p.num_do_b_subbank = (int)flit_size;
- dyn_p.num_di_b_subbank = (int)flit_size;
- dyn_p.num_do_b_mat = (int) flit_size;
- dyn_p.num_di_b_mat = (int) flit_size;
- dyn_p.num_do_b_mat = (int) flit_size;
- dyn_p.num_di_b_mat = (int) flit_size;
- dyn_p.num_do_b_bank_per_port = (int) flit_size;
- dyn_p.num_di_b_bank_per_port = (int) flit_size;
- dyn_p.out_w = (int) flit_size;
-
- dyn_p.use_inp_params = 1;
- dyn_p.num_wr_ports = (unsigned int) vc_count;
- dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
- dyn_p.num_rw_ports = 0;
- dyn_p.num_se_rd_ports =0;
- dyn_p.num_search_ports =0;
-
-
-
- dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
- dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
- dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
- (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
- dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
-
- Mat buff(dyn_p);
- buff.compute_delays(0);
- buff.compute_power_energy();
- buffer.power.readOp = buff.power.readOp;
- buffer.power.writeOp = buffer.power.readOp; //FIXME
- buffer.area = buff.area;
+void Router::buffer_stats() {
+ DynamicParameter dyn_p;
+ dyn_p.is_tag = false;
+ dyn_p.pure_cam = false;
+ dyn_p.fully_assoc = false;
+ dyn_p.pure_ram = true;
+ dyn_p.is_dram = false;
+ dyn_p.is_main_mem = false;
+ dyn_p.num_subarrays = 1;
+ dyn_p.num_mats = 1;
+ dyn_p.Ndbl = 1;
+ dyn_p.Ndwl = 1;
+ dyn_p.Nspd = 1;
+ dyn_p.deg_bl_muxing = 1;
+ dyn_p.deg_senseamp_muxing_non_associativity = 1;
+ dyn_p.Ndsam_lev_1 = 1;
+ dyn_p.Ndsam_lev_2 = 1;
+ dyn_p.Ndcm = 1;
+ dyn_p.number_addr_bits_mat = 8;
+ dyn_p.number_way_select_signals_mat = 1;
+ dyn_p.number_subbanks_decode = 0;
+ dyn_p.num_act_mats_hor_dir = 1;
+ dyn_p.V_b_sense = Vdd; // FIXME check power calc.
+ dyn_p.ram_cell_tech_type = 0;
+ dyn_p.num_r_subarray = (int) vc_buffer_size;
+ dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
+ dyn_p.num_mats_h_dir = 1;
+ dyn_p.num_mats_v_dir = 1;
+ dyn_p.num_do_b_subbank = (int)flit_size;
+ dyn_p.num_di_b_subbank = (int)flit_size;
+ dyn_p.num_do_b_mat = (int) flit_size;
+ dyn_p.num_di_b_mat = (int) flit_size;
+ dyn_p.num_do_b_mat = (int) flit_size;
+ dyn_p.num_di_b_mat = (int) flit_size;
+ dyn_p.num_do_b_bank_per_port = (int) flit_size;
+ dyn_p.num_di_b_bank_per_port = (int) flit_size;
+ dyn_p.out_w = (int) flit_size;
+
+ dyn_p.use_inp_params = 1;
+ dyn_p.num_wr_ports = (unsigned int) vc_count;
+ dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
+ dyn_p.num_rw_ports = 0;
+ dyn_p.num_se_rd_ports = 0;
+ dyn_p.num_search_ports = 0;
+
+
+
+ dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
+ dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
+ dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
+ (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
+ dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
+
+ Mat buff(dyn_p);
+ buff.compute_delays(0);
+ buff.compute_power_energy();
+ buffer.power.readOp = buff.power.readOp;
+ buffer.power.writeOp = buffer.power.readOp; //FIXME
+ buffer.area = buff.area;
}
- void
-Router::cb_stats ()
-{
- if (1) {
- Crossbar c_b(I, O, flit_size);
- c_b.compute_power();
- crossbar.delay = c_b.delay;
- crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
- crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
- crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
- crossbar.area = c_b.area;
+void
+Router::cb_stats () {
+ if (1) {
+ Crossbar c_b(I, O, flit_size);
+ c_b.compute_power();
+ crossbar.delay = c_b.delay;
+ crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
+ crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
+ crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
+ crossbar.area = c_b.area;
// c_b.print_crossbar();
- }
- else {
- crossbar.power.readOp.dynamic = tr_crossbar_power();
- crossbar.power.readOp.leakage = flit_size * I * O *
- cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
- crossbar.power.readOp.gate_leakage = flit_size * I * O *
- cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
- }
+ } else {
+ crossbar.power.readOp.dynamic = tr_crossbar_power();
+ crossbar.power.readOp.leakage = flit_size * I * O *
+ cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
+ crossbar.power.readOp.gate_leakage = flit_size * I * O *
+ cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
+ }
}
void
-Router::get_router_power()
-{
- /* calculate buffer stats */
- buffer_stats();
-
- /* calculate cross-bar stats */
- cb_stats();
-
- /* calculate arbiter stats */
- Arbiter vcarb(vc_count, flit_size, buffer.area.w);
- Arbiter cbarb(I, flit_size, crossbar.area.w);
- vcarb.compute_power();
- cbarb.compute_power();
- arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
- cbarb.power.readOp.dynamic * O;
- arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
- cbarb.power.readOp.leakage * O;
- arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
- cbarb.power.readOp.gate_leakage * O;
+Router::get_router_power() {
+ /* calculate buffer stats */
+ buffer_stats();
+
+ /* calculate cross-bar stats */
+ cb_stats();
+
+ /* calculate arbiter stats */
+ Arbiter vcarb(vc_count, flit_size, buffer.area.w);
+ Arbiter cbarb(I, flit_size, crossbar.area.w);
+ vcarb.compute_power();
+ cbarb.compute_power();
+ arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
+ cbarb.power.readOp.dynamic * O;
+ arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
+ cbarb.power.readOp.leakage * O;
+ arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
+ cbarb.power.readOp.gate_leakage * O;
// arb_stats();
- power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
- crossbar.power.readOp.dynamic +
- arbiter.power.readOp.dynamic)*MIN(I, O)*M;
- double pppm_t[4] = {1,I,I,1};
- power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
+ power.readOp.dynamic = ((buffer.power.readOp.dynamic +
+ buffer.power.writeOp.dynamic) +
+ crossbar.power.readOp.dynamic +
+ arbiter.power.readOp.dynamic) * MIN(I, O) * M;
+ double pppm_t[4] = {1, I, I, 1};
+ power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) *
+ pppm_lkg;
}
- void
-Router::get_router_delay ()
-{
- FREQUENCY=5; // move this to config file --TODO
- cycle_time = (1/(double)FREQUENCY)*1e3; //ps
- delay = 4;
- max_cyc = 17 * g_tp.FO4; //s
- max_cyc *= 1e12; //ps
- if (cycle_time < max_cyc) {
- FREQUENCY = (1/max_cyc)*1e3; //GHz
- }
+void
+Router::get_router_delay () {
+ FREQUENCY = 5; // move this to config file --TODO
+ cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps
+ delay = 4;
+ max_cyc = 17 * g_tp.FO4; //s
+ max_cyc *= 1e12; //ps
+ if (cycle_time < max_cyc) {
+ FREQUENCY = (1 / max_cyc) * 1e3; //GHz
+ }
}
- void
-Router::get_router_area()
-{
- area.h = I*buffer.area.h;
- area.w = buffer.area.w+crossbar.area.w;
+void
+Router::get_router_area() {
+ area.h = I * buffer.area.h;
+ area.w = buffer.area.w + crossbar.area.w;
}
- void
-Router::calc_router_parameters()
-{
- /* calculate router frequency and pipeline cycles */
- get_router_delay();
+void
+Router::calc_router_parameters() {
+ /* calculate router frequency and pipeline cycles */
+ get_router_delay();
- /* router power stats */
- get_router_power();
+ /* router power stats */
+ get_router_power();
- /* area stats */
- get_router_area();
+ /* area stats */
+ get_router_area();
}
- void
-Router::print_router()
-{
- cout << "\n\nRouter stats:\n";
- cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
- cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
- cout << "\tNo. of Virtual channels - " << vc_count << "\n";
- cout << "\tNo. of pipeline stages - " << delay << endl;
- cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
- cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
- cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
- cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
- cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
- cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
- cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
- cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
+void
+Router::print_router() {
+ cout << "\n\nRouter stats:\n";
+ cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n";
+ cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3
+ << "GHz\n";
+ cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n";
+ cout << "\tNo. of Virtual channels - " << vc_count << "\n";
+ cout << "\tNo. of pipeline stages - " << delay << endl;
+ cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
+ cout << "\tNo. of buffer entries per virtual channel - "
+ << vc_buffer_size << "\n";
+ cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6
+ << "(mm^2)\n";
+ cout << "\tSimple buffer access (Read) - "
+ << buffer.power.readOp.dynamic * 1e9 << " (nJ)\n";
+ cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3
+ << " (mW)\n";
+ cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6
+ << "(mm^2)\n";
+ cout << "\tCross bar access energy - "
+ << crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n";
+ cout << "\tCross bar leakage power - "
+ << crossbar.power.readOp.leakage * 1e3 << " (mW)\n";
+ cout << "\tArbiter access energy (VC arb + Crossbar arb) - "
+ << arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n";
+ cout << "\tArbiter leakage (VC arb + Crossbar arb) - "
+ << arbiter.power.readOp.leakage * 1e3 << " (mW)\n";
}
diff --git a/ext/mcpat/cacti/router.h b/ext/mcpat/cacti/router.h
index 72ef44939..b7c4b7e69 100644
--- a/ext/mcpat/cacti/router.h
+++ b/ext/mcpat/cacti/router.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -47,9 +48,8 @@
#include "parameter.h"
#include "wire.h"
-class Router : public Component
-{
- public:
+class Router : public Component {
+public:
Router(
double flit_size_,
double vc_buf, /* vc size = vc_buffer_size * flit_size */
@@ -70,9 +70,9 @@ class Router : public Component
double vc_count;
double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
- private:
- TechnologyParameter::DeviceType *deviceType;
- double FREQUENCY; // move this to config file --TODO
+private:
+ TechnologyParameter::DeviceType *deviceType;
+ double FREQUENCY; // move this to config file --TODO
double Cw3(double len);
double gate_cap(double w);
double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
diff --git a/ext/mcpat/cacti/subarray.cc b/ext/mcpat/cacti/subarray.cc
index 7cbf7d990..f4b7de79b 100755
--- a/ext/mcpat/cacti/subarray.cc
+++ b/ext/mcpat/cacti/subarray.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -39,158 +40,152 @@
#include "subarray.h"
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
- dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
- num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
- cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
-{
- //num_cols=7;
- //cout<<"num_cols ="<< num_cols <<endl;
- if (!(is_fa || dp.pure_cam))
- {
- num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
- uint32_t ram_num_cells_wl_stitching =
- (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
- (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
-
- area.h = cell.h * num_rows;
-
- area.w = cell.w * num_cols +
- ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
- }
- else //cam fa
- {
-
- //should not add dummy row here since the dummy row do not need decoder
- if (is_fa)// fully associative cache
- {
- num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
- num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
- num_cols = num_cols_fa_cam + num_cols_fa_ram;
- }
- else
- {
- num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
- num_cols_fa_ram = 0;
- num_cols = num_cols_fa_cam;
- }
-
- area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
- area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
- + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
- + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
- + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
- }
-
- assert(area.h>0);
- assert(area.w>0);
- compute_C();
+ dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
+ num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
+ cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
+ //num_cols=7;
+ //cout<<"num_cols ="<< num_cols <<endl;
+ if (!(is_fa || dp.pure_cam)) {
+ // ECC overhead
+ num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
+ num_bits_per_ecc_b_) : 0);
+ uint32_t ram_num_cells_wl_stitching =
+ (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
+ (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
+
+ area.h = cell.h * num_rows;
+
+ area.w = cell.w * num_cols +
+ ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
+ } else { //cam fa
+
+ //should not add dummy row here since the dummy row do not need decoder
+ if (is_fa) { // fully associative cache
+ num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
+ num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
+ num_cols = num_cols_fa_cam + num_cols_fa_ram;
+ } else {
+ num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
+ num_cols_fa_ram = 0;
+ num_cols = num_cols_fa_cam;
+ }
+
+ area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
+ area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
+ + ceil((num_cols_fa_cam + num_cols_fa_ram) /
+ sram_num_cells_wl_stitching_) *
+ g_tp.ram_wl_stitching_overhead_
+ //the overhead for the NAND gate to connect the two halves
+ + 16 * g_tp.wire_local.pitch
+ //the overhead for the drivers from matchline to wordline of RAM
+ + 128 * g_tp.wire_local.pitch;
+ }
+
+ assert(area.h > 0);
+ assert(area.w > 0);
+ compute_C();
}
-Subarray::~Subarray()
-{
+Subarray::~Subarray() {
}
-double Subarray::get_total_cell_area()
-{
+double Subarray::get_total_cell_area() {
// return (is_fa==false? cell.get_area() * num_rows * num_cols
// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
if (!(is_fa || dp.pure_cam))
- return (cell.get_area() * num_rows * num_cols);
- else if (is_fa)
- { //for FA, this area includes the dummy cells in SRAM arrays.
- //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
- //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
- return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
+ return (cell.get_area() * num_rows * num_cols);
+ else if (is_fa) {
+ //for FA, this area includes the dummy cells in SRAM arrays.
+ //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
+ //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
+ return (cam_cell.h * (num_rows + 1) *
+ (cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
+ } else {
+ return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
}
- else
- return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
}
-void Subarray::compute_C()
-{
- double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
- double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
- double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
- double C_b_row_drain_C;
-
- if (dp.is_dram)
- {
- C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
-
- if (dp.ram_cell_tech_type == comm_dram)
- {
- C_bl = num_rows * C_b_metal;
- }
- else
- {
- C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
- C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+void Subarray::compute_C() {
+ double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
+ double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
+ double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
+ double C_b_row_drain_C;
+
+ if (dp.is_dram) {
+ C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
+
+ if (dp.ram_cell_tech_type == comm_dram) {
+ C_bl = num_rows * C_b_metal;
+ } else {
+ C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
+ C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+ }
+ } else {
+ if (!(is_fa || dp.pure_cam)) {
+ C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
+ (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
+ false, true) * 2 +
+ c_w_metal) * num_cols;
+ C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
+ C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
+ } else {
+ //Following is wordline not matchline
+ //CAM portion
+ c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
+ r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
+ (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
+ 2.0, false, true) * 2 +
+ c_w_metal) * num_cols_fa_cam;
+ R_wl_cam = (r_w_metal) * num_cols_fa_cam;
+
+ if (!dp.pure_cam) {
+ //RAM portion
+ c_w_metal = cell.w * g_tp.wire_local.C_per_um;
+ r_w_metal = cell.w * g_tp.wire_local.R_per_um;
+ C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
+ (g_tp.sram.b_w - 2 *
+ g_tp.sram.cell_a_w) / 2.0, false,
+ true) * 2 +
+ c_w_metal) * num_cols_fa_ram;
+ R_wl_ram = (r_w_metal) * num_cols_fa_ram;
+ } else {
+ C_wl_ram = R_wl_ram = 0;
+ }
+ C_wl = C_wl_cam + C_wl_ram;
+ C_wl += (16 + 128) * g_tp.wire_local.pitch *
+ g_tp.wire_local.C_per_um;
+
+ R_wl = R_wl_cam + R_wl_ram;
+ R_wl += (16 + 128) * g_tp.wire_local.pitch *
+ g_tp.wire_local.R_per_um;
+
+ //there are two ways to write to a FA,
+ //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
+ //2) using separate wordline for read/write and search in RAM.
+ //We are using the second approach.
+
+ //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
+ C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
+ C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
+ C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
+ //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
+ C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
+ C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
+
+ }
}
- }
- else
- {
- if (!(is_fa ||dp.pure_cam))
- {
- C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
- c_w_metal) * num_cols;
- C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
- C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
- }
- else
- {
- //Following is wordline not matchline
- //CAM portion
- c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
- r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
- c_w_metal) * num_cols_fa_cam;
- R_wl_cam = (r_w_metal) * num_cols_fa_cam;
-
- if (!dp.pure_cam)
- {
- //RAM portion
- c_w_metal = cell.w * g_tp.wire_local.C_per_um;
- r_w_metal = cell.w * g_tp.wire_local.R_per_um;
- C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
- c_w_metal) * num_cols_fa_ram;
- R_wl_ram = (r_w_metal) * num_cols_fa_ram;
- }
- else
- {
- C_wl_ram = R_wl_ram =0;
- }
- C_wl = C_wl_cam + C_wl_ram;
- C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
-
- R_wl = R_wl_cam + R_wl_ram;
- R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
-
- //there are two ways to write to a FA,
- //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
- //2) using separate wordline for read/write and search in RAM.
- //We are using the second approach.
-
- //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
- C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
- C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
- C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
- //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
- C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
- C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
-
- }
- }
}
diff --git a/ext/mcpat/cacti/subarray.h b/ext/mcpat/cacti/subarray.h
index 5fb062420..50560a101 100755
--- a/ext/mcpat/cacti/subarray.h
+++ b/ext/mcpat/cacti/subarray.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -41,9 +42,8 @@
using namespace std;
-class Subarray : public Component
-{
- public:
+class Subarray : public Component {
+public:
Subarray(const DynamicParameter & dp, bool is_fa_);
~Subarray();
@@ -59,7 +59,7 @@ class Subarray : public Component
double C_wl, C_wl_cam, C_wl_ram;
double R_wl, R_wl_cam, R_wl_ram;
double C_bl, C_bl_cam;
- private:
+private:
void compute_C(); // compute bitline and wordline capacitance
};
diff --git a/ext/mcpat/cacti/technology.cc b/ext/mcpat/cacti/technology.cc
index a40c6eb44..f2e0ef196 100644
--- a/ext/mcpat/cacti/technology.cc
+++ b/ext/mcpat/cacti/technology.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -34,1445 +35,1381 @@
#include "parameter.h"
-double wire_resistance(double resistivity, double wire_width, double wire_thickness,
- double barrier_thickness, double dishing_thickness, double alpha_scatter)
-{
- double resistance;
- resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
- return(resistance);
+double wire_resistance(double resistivity, double wire_width,
+ double wire_thickness,
+ double barrier_thickness, double dishing_thickness,
+ double alpha_scatter) {
+ double resistance;
+ resistance = alpha_scatter * resistivity /
+ ((wire_thickness - barrier_thickness - dishing_thickness) *
+ (wire_width - 2 * barrier_thickness));
+ return(resistance);
}
-double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
- double ild_thickness, double miller_value, double horiz_dielectric_constant,
- double vert_dielectric_constant, double fringe_cap)
-{
- double vertical_cap, sidewall_cap, total_cap;
- vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
- sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
- total_cap = vertical_cap + sidewall_cap + fringe_cap;
- return(total_cap);
+double wire_capacitance(double wire_width, double wire_thickness,
+ double wire_spacing,
+ double ild_thickness, double miller_value,
+ double horiz_dielectric_constant,
+ double vert_dielectric_constant, double fringe_cap) {
+ double vertical_cap, sidewall_cap, total_cap;
+ vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
+ sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
+ total_cap = vertical_cap + sidewall_cap + fringe_cap;
+ return(total_cap);
}
-void init_tech_params(double technology, bool is_tag)
-{
- int iter, tech, tech_lo, tech_hi;
- double curr_alpha, curr_vpp;
- double wire_width, wire_thickness, wire_spacing,
- fringe_cap, pmos_to_nmos_sizing_r;
+void init_tech_params(double technology, bool is_tag) {
+ int iter, tech, tech_lo, tech_hi;
+ double curr_alpha, curr_vpp;
+ double wire_width, wire_thickness, wire_spacing,
+ fringe_cap, pmos_to_nmos_sizing_r;
// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant;
- double barrier_thickness, dishing_thickness, alpha_scatter;
- double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
-
- uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
-
- technology = technology * 1000.0; // in the unit of nm
-
- // initialize parameters
- g_tp.reset();
- double gmp_to_gmn_multiplier_periph_global = 0;
-
- double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
- curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
- curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
- curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
- double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
- curr_asp_ratio_cell_cam;
- double SENSE_AMP_D, SENSE_AMP_P; // J
- double area_cell_dram = 0;
- double asp_ratio_cell_dram = 0;
- double area_cell_sram = 0;
- double asp_ratio_cell_sram = 0;
- double area_cell_cam = 0;
- double asp_ratio_cell_cam = 0;
- double mobility_eff_periph_global = 0;
- double Vdsat_periph_global = 0;
- double nmos_effective_resistance_multiplier;
- double width_dram_access_transistor;
-
- double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
- double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
- double curr_chip_layout_overhead = 0;
- double curr_macro_layout_overhead = 0;
- double curr_sckt_co_eff = 0;
-
- if (technology < 181 && technology > 179)
- {
+ double barrier_thickness, dishing_thickness, alpha_scatter;
+ double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
+
+ uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
+ uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
+
+ technology = technology * 1000.0; // in the unit of nm
+
+ // initialize parameters
+ g_tp.reset();
+ double gmp_to_gmn_multiplier_periph_global = 0;
+
+ double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
+ curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
+ curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
+ curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
+ double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
+ curr_asp_ratio_cell_cam;
+ double SENSE_AMP_D, SENSE_AMP_P; // J
+ double area_cell_dram = 0;
+ double asp_ratio_cell_dram = 0;
+ double area_cell_sram = 0;
+ double asp_ratio_cell_sram = 0;
+ double area_cell_cam = 0;
+ double asp_ratio_cell_cam = 0;
+ double mobility_eff_periph_global = 0;
+ double Vdsat_periph_global = 0;
+ double nmos_effective_resistance_multiplier;
+ double width_dram_access_transistor;
+
+ double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
+ double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
+ double curr_chip_layout_overhead = 0;
+ double curr_macro_layout_overhead = 0;
+ double curr_sckt_co_eff = 0;
+
+ if (technology < 181 && technology > 179) {
tech_lo = 180;
tech_hi = 180;
- }
- else if (technology < 91 && technology > 89)
- {
- tech_lo = 90;
- tech_hi = 90;
- }
- else if (technology < 66 && technology > 64)
- {
- tech_lo = 65;
- tech_hi = 65;
- }
- else if (technology < 46 && technology > 44)
- {
- tech_lo = 45;
- tech_hi = 45;
- }
- else if (technology < 33 && technology > 31)
- {
- tech_lo = 32;
- tech_hi = 32;
- }
- else if (technology < 23 && technology > 21)
- {
- tech_lo = 22;
- tech_hi = 22;
- if (ram_cell_tech_type == 3 )
- {
- cout<<"current version does not support eDRAM technologies at 22nm"<<endl;
- exit(0);
- }
- }
-// else if (technology < 17 && technology > 15)
-// {
-// tech_lo = 16;
-// tech_hi = 16;
-// }
- else if (technology < 180 && technology > 90)
- {
- tech_lo = 180;
- tech_hi = 90;
- }
- else if (technology < 90 && technology > 65)
- {
- tech_lo = 90;
- tech_hi = 65;
- }
- else if (technology < 65 && technology > 45)
- {
- tech_lo = 65;
- tech_hi = 45;
- }
- else if (technology < 45 && technology > 32)
- {
- tech_lo = 45;
- tech_hi = 32;
- }
- else if (technology < 32 && technology > 22)
- {
- tech_lo = 32;
- tech_hi = 22;
+ } else if (technology < 91 && technology > 89) {
+ tech_lo = 90;
+ tech_hi = 90;
+ } else if (technology < 66 && technology > 64) {
+ tech_lo = 65;
+ tech_hi = 65;
+ } else if (technology < 46 && technology > 44) {
+ tech_lo = 45;
+ tech_hi = 45;
+ } else if (technology < 33 && technology > 31) {
+ tech_lo = 32;
+ tech_hi = 32;
+ } else if (technology < 23 && technology > 21) {
+ tech_lo = 22;
+ tech_hi = 22;
+ if (ram_cell_tech_type == 3 ) {
+ cout << "current version does not support eDRAM technologies at "
+ << "22nm" << endl;
+ exit(0);
+ }
+ } else if (technology < 180 && technology > 90) {
+ tech_lo = 180;
+ tech_hi = 90;
+ } else if (technology < 90 && technology > 65) {
+ tech_lo = 90;
+ tech_hi = 65;
+ } else if (technology < 65 && technology > 45) {
+ tech_lo = 65;
+ tech_hi = 45;
+ } else if (technology < 45 && technology > 32) {
+ tech_lo = 45;
+ tech_hi = 32;
+ } else if (technology < 32 && technology > 22) {
+ tech_lo = 32;
+ tech_hi = 22;
}
// else if (technology < 22 && technology > 16)
// {
// tech_lo = 22;
// tech_hi = 16;
// }
- else
- {
- cout<<"Invalid technology nodes"<<endl;
- exit(0);
+ else {
+ cout << "Invalid technology nodes" << endl;
+ exit(0);
}
- double vdd[NUMBER_TECH_FLAVORS];
- double Lphy[NUMBER_TECH_FLAVORS];
- double Lelec[NUMBER_TECH_FLAVORS];
- double t_ox[NUMBER_TECH_FLAVORS];
- double v_th[NUMBER_TECH_FLAVORS];
- double c_ox[NUMBER_TECH_FLAVORS];
- double mobility_eff[NUMBER_TECH_FLAVORS];
- double Vdsat[NUMBER_TECH_FLAVORS];
- double c_g_ideal[NUMBER_TECH_FLAVORS];
- double c_fringe[NUMBER_TECH_FLAVORS];
- double c_junc[NUMBER_TECH_FLAVORS];
- double I_on_n[NUMBER_TECH_FLAVORS];
- double I_on_p[NUMBER_TECH_FLAVORS];
- double Rnchannelon[NUMBER_TECH_FLAVORS];
- double Rpchannelon[NUMBER_TECH_FLAVORS];
- double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
- double I_off_n[NUMBER_TECH_FLAVORS][101];
- double I_g_on_n[NUMBER_TECH_FLAVORS][101];
- //double I_off_p[NUMBER_TECH_FLAVORS][101];
- double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
- //double curr_sckt_co_eff[NUMBER_TECH_FLAVORS];
- double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
-
- for (iter = 0; iter <= 1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
- }
+ double vdd[NUMBER_TECH_FLAVORS];
+ double Lphy[NUMBER_TECH_FLAVORS];
+ double Lelec[NUMBER_TECH_FLAVORS];
+ double t_ox[NUMBER_TECH_FLAVORS];
+ double v_th[NUMBER_TECH_FLAVORS];
+ double c_ox[NUMBER_TECH_FLAVORS];
+ double mobility_eff[NUMBER_TECH_FLAVORS];
+ double Vdsat[NUMBER_TECH_FLAVORS];
+ double c_g_ideal[NUMBER_TECH_FLAVORS];
+ double c_fringe[NUMBER_TECH_FLAVORS];
+ double c_junc[NUMBER_TECH_FLAVORS];
+ double I_on_n[NUMBER_TECH_FLAVORS];
+ double I_on_p[NUMBER_TECH_FLAVORS];
+ double Rnchannelon[NUMBER_TECH_FLAVORS];
+ double Rpchannelon[NUMBER_TECH_FLAVORS];
+ double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
+ double I_off_n[NUMBER_TECH_FLAVORS][101];
+ double I_g_on_n[NUMBER_TECH_FLAVORS][101];
+ double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
+ double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
+
+ for (iter = 0; iter <= 1; ++iter) {
+ // linear interpolation
+ if (iter == 0) {
+ tech = tech_lo;
+ if (tech_lo == tech_hi) {
+ curr_alpha = 1;
+ } else {
+ curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi);
+ }
+ } else {
+ tech = tech_hi;
+ if (tech_lo == tech_hi) {
+ break;
+ } else {
+ curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi);
+ }
+ }
- if (tech == 180)
- {
- //180nm technology-node. Corresponds to year 1999 in ITRS
- //Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king
- //MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input
- bool Aggre_proj = false;
- SENSE_AMP_D = .28e-9; // s
- SENSE_AMP_P = 14.7e-15; // J
- vdd[0] = 1.5;
- Lphy[0] = 0.12;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3*(Aggre_proj? 1.9/1.2:2);//micron
- v_th[0] = Aggre_proj? 0.36 : 0.4407;//V
- c_ox[0] = 1.79e-14*(Aggre_proj? 1.9/1.2:2);//F/micron2
- mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128*2; //V
- c_g_ideal[0] = (Aggre_proj? 1.9/1.2:2)*6.64e-16;//F/micron
- c_fringe[0] = (Aggre_proj? 1.9/1.2:2)*0.08e-15;//F/micron
- c_junc[0] = (Aggre_proj? 1.9/1.2:2)*1e-15;//F/micron2
- I_on_n[0] = 750e-6;//A/micron
- I_on_p[0] = 350e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 7e-10;//A/micron
- I_off_n[0][10] = 8.26e-10;
- I_off_n[0][20] = 9.74e-10;
- I_off_n[0][30] = 1.15e-9;
- I_off_n[0][40] = 1.35e-9;
- I_off_n[0][50] = 1.60e-9;
- I_off_n[0][60] = 1.88e-9;
- I_off_n[0][70] = 2.29e-9;
- I_off_n[0][80] = 2.70e-9;
- I_off_n[0][90] = 3.19e-9;
- I_off_n[0][100] = 3.76e-9;
-
- I_g_on_n[0][0] = 1.65e-10;//A/micron
- I_g_on_n[0][10] = 1.65e-10;
- I_g_on_n[0][20] = 1.65e-10;
- I_g_on_n[0][30] = 1.65e-10;
- I_g_on_n[0][40] = 1.65e-10;
- I_g_on_n[0][50] = 1.65e-10;
- I_g_on_n[0][60] = 1.65e-10;
- I_g_on_n[0][70] = 1.65e-10;
- I_g_on_n[0][80] = 1.65e-10;
- I_g_on_n[0][90] = 1.65e-10;
- I_g_on_n[0][100] = 1.65e-10;
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm
- curr_core_tx_density = 1.25*0.7*0.7*0.4;
- curr_sckt_co_eff = 1.11;
- curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb
+ if (tech == 180) {
+ //180nm technology-node. Corresponds to year 1999 in ITRS
+ //Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king
+ //MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input
+ bool Aggre_proj = false;
+ SENSE_AMP_D = .28e-9; // s
+ SENSE_AMP_P = 14.7e-15; // J
+ vdd[0] = 1.5;
+ Lphy[0] = 0.12;//Lphy is the physical gate-length. micron
+ Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron
+ t_ox[0] = 1.2e-3 * (Aggre_proj ? 1.9 / 1.2 : 2);//micron
+ v_th[0] = Aggre_proj ? 0.36 : 0.4407;//V
+ c_ox[0] = 1.79e-14 * (Aggre_proj ? 1.9 / 1.2 : 2);//F/micron2
+ mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 0.128 * 2; //V
+ c_g_ideal[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 6.64e-16;//F/micron
+ c_fringe[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 0.08e-15;//F/micron
+ c_junc[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 1e-15;//F/micron2
+ I_on_n[0] = 750e-6;//A/micron
+ I_on_p[0] = 350e-6;//A/micron
+ //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
+ nmos_effective_resistance_multiplier = 1.54;
+ n_to_p_eff_curr_drv_ratio[0] = 2.45;
+ gmp_to_gmn_multiplier[0] = 1.22;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1;
+ I_off_n[0][0] = 7e-10;//A/micron
+ I_off_n[0][10] = 8.26e-10;
+ I_off_n[0][20] = 9.74e-10;
+ I_off_n[0][30] = 1.15e-9;
+ I_off_n[0][40] = 1.35e-9;
+ I_off_n[0][50] = 1.60e-9;
+ I_off_n[0][60] = 1.88e-9;
+ I_off_n[0][70] = 2.29e-9;
+ I_off_n[0][80] = 2.70e-9;
+ I_off_n[0][90] = 3.19e-9;
+ I_off_n[0][100] = 3.76e-9;
+
+ I_g_on_n[0][0] = 1.65e-10;//A/micron
+ I_g_on_n[0][10] = 1.65e-10;
+ I_g_on_n[0][20] = 1.65e-10;
+ I_g_on_n[0][30] = 1.65e-10;
+ I_g_on_n[0][40] = 1.65e-10;
+ I_g_on_n[0][50] = 1.65e-10;
+ I_g_on_n[0][60] = 1.65e-10;
+ I_g_on_n[0][70] = 1.65e-10;
+ I_g_on_n[0][80] = 1.65e-10;
+ I_g_on_n[0][90] = 1.65e-10;
+ I_g_on_n[0][100] = 1.65e-10;
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
+ curr_asp_ratio_cell_cam = 2.92;//2.5
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm
+ curr_core_tx_density = 1.25 * 0.7 * 0.7 * 0.4;
+ curr_sckt_co_eff = 1.11;
+ curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb
- }
+ }
- if (tech == 90)
- {
- SENSE_AMP_D = .28e-9; // s
- SENSE_AMP_P = 14.7e-15; // J
- //90nm technology-node. Corresponds to year 2004 in ITRS
- //ITRS HP device type
- vdd[0] = 1.2;
- Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3;//micron
- v_th[0] = 0.23707;//V
- c_ox[0] = 1.79e-14;//F/micron2
- mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128; //V
- c_g_ideal[0] = 6.64e-16;//F/micron
- c_fringe[0] = 0.08e-15;//F/micron
- c_junc[0] = 1e-15;//F/micron2
- I_on_n[0] = 1076.9e-6;//A/micron
- I_on_p[0] = 712.6e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 3.24e-8;//A/micron
- I_off_n[0][10] = 4.01e-8;
- I_off_n[0][20] = 4.90e-8;
- I_off_n[0][30] = 5.92e-8;
- I_off_n[0][40] = 7.08e-8;
- I_off_n[0][50] = 8.38e-8;
- I_off_n[0][60] = 9.82e-8;
- I_off_n[0][70] = 1.14e-7;
- I_off_n[0][80] = 1.29e-7;
- I_off_n[0][90] = 1.43e-7;
- I_off_n[0][100] = 1.54e-7;
-
- I_g_on_n[0][0] = 1.65e-8;//A/micron
- I_g_on_n[0][10] = 1.65e-8;
- I_g_on_n[0][20] = 1.65e-8;
- I_g_on_n[0][30] = 1.65e-8;
- I_g_on_n[0][40] = 1.65e-8;
- I_g_on_n[0][50] = 1.65e-8;
- I_g_on_n[0][60] = 1.65e-8;
- I_g_on_n[0][70] = 1.65e-8;
- I_g_on_n[0][80] = 1.65e-8;
- I_g_on_n[0][90] = 1.65e-8;
- I_g_on_n[0][100] = 1.65e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.3;
- Lphy[1] = 0.075;
- Lelec[1] = 0.0486;
- t_ox[1] = 2.2e-3;
- v_th[1] = 0.48203;
- c_ox[1] = 1.22e-14;
- mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.373;
- c_g_ideal[1] = 9.15e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 503.6e-6;
- I_on_p[1] = 235.1e-6;
- nmos_effective_resistance_multiplier = 1.92;
- n_to_p_eff_curr_drv_ratio[1] = 2.44;
- gmp_to_gmn_multiplier[1] =0.88;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1;
- I_off_n[1][0] = 2.81e-12;
- I_off_n[1][10] = 4.76e-12;
- I_off_n[1][20] = 7.82e-12;
- I_off_n[1][30] = 1.25e-11;
- I_off_n[1][40] = 1.94e-11;
- I_off_n[1][50] = 2.94e-11;
- I_off_n[1][60] = 4.36e-11;
- I_off_n[1][70] = 6.32e-11;
- I_off_n[1][80] = 8.95e-11;
- I_off_n[1][90] = 1.25e-10;
- I_off_n[1][100] = 1.7e-10;
-
- I_g_on_n[1][0] = 3.87e-11;//A/micron
- I_g_on_n[1][10] = 3.87e-11;
- I_g_on_n[1][20] = 3.87e-11;
- I_g_on_n[1][30] = 3.87e-11;
- I_g_on_n[1][40] = 3.87e-11;
- I_g_on_n[1][50] = 3.87e-11;
- I_g_on_n[1][60] = 3.87e-11;
- I_g_on_n[1][70] = 3.87e-11;
- I_g_on_n[1][80] = 3.87e-11;
- I_g_on_n[1][90] = 3.87e-11;
- I_g_on_n[1][100] = 3.87e-11;
-
- //ITRS LOP device type
- vdd[2] = 0.9;
- Lphy[2] = 0.053;
- Lelec[2] = 0.0354;
- t_ox[2] = 1.5e-3;
- v_th[2] = 0.30764;
- c_ox[2] = 1.59e-14;
- mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.113;
- c_g_ideal[2] = 8.45e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 386.6e-6;
- I_on_p[2] = 209.7e-6;
- nmos_effective_resistance_multiplier = 1.77;
- n_to_p_eff_curr_drv_ratio[2] = 2.54;
- gmp_to_gmn_multiplier[2] = 0.98;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1;
- I_off_n[2][0] = 2.14e-9;
- I_off_n[2][10] = 2.9e-9;
- I_off_n[2][20] = 3.87e-9;
- I_off_n[2][30] = 5.07e-9;
- I_off_n[2][40] = 6.54e-9;
- I_off_n[2][50] = 8.27e-8;
- I_off_n[2][60] = 1.02e-7;
- I_off_n[2][70] = 1.20e-7;
- I_off_n[2][80] = 1.36e-8;
- I_off_n[2][90] = 1.52e-8;
- I_off_n[2][100] = 1.73e-8;
-
- I_g_on_n[2][0] = 4.31e-8;//A/micron
- I_g_on_n[2][10] = 4.31e-8;
- I_g_on_n[2][20] = 4.31e-8;
- I_g_on_n[2][30] = 4.31e-8;
- I_g_on_n[2][40] = 4.31e-8;
- I_g_on_n[2][50] = 4.31e-8;
- I_g_on_n[2][60] = 4.31e-8;
- I_g_on_n[2][70] = 4.31e-8;
- I_g_on_n[2][80] = 4.31e-8;
- I_g_on_n[2][90] = 4.31e-8;
- I_g_on_n[2][100] = 4.31e-8;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.4545;
- width_dram_access_transistor = 0.14;
- curr_I_on_dram_cell = 45e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.168;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.4545;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.3;
- c_g_ideal[3] = 1.47e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 321.6e-6;
- I_on_p[3] = 203.3e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.42e-11;
- I_off_n[3][10] = 2.25e-11;
- I_off_n[3][20] = 3.46e-11;
- I_off_n[3][30] = 5.18e-11;
- I_off_n[3][40] = 7.58e-11;
- I_off_n[3][50] = 1.08e-10;
- I_off_n[3][60] = 1.51e-10;
- I_off_n[3][70] = 2.02e-10;
- I_off_n[3][80] = 2.57e-10;
- I_off_n[3][90] = 3.14e-10;
- I_off_n[3][100] = 3.85e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.6;
- Lphy[3] = 0.09;
- Lelec[3] = 0.0576;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.09*0.09;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.7;
- t_ox[3] = 5.5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 5.65e-15;
- mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.32;
- c_g_ideal[3] = 5.08e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1094.3e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.62;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 5.80e-15;
- I_off_n[3][10] = 1.21e-14;
- I_off_n[3][20] = 2.42e-14;
- I_off_n[3][30] = 4.65e-14;
- I_off_n[3][40] = 8.60e-14;
- I_off_n[3][50] = 1.54e-13;
- I_off_n[3][60] = 2.66e-13;
- I_off_n[3][70] = 4.45e-13;
- I_off_n[3][80] = 7.17e-13;
- I_off_n[3][90] = 1.11e-12;
- I_off_n[3][100] = 1.67e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1;
- curr_core_tx_density = 1.25*0.7*0.7;
- curr_sckt_co_eff = 1.1539;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ if (tech == 90) {
+ SENSE_AMP_D = .28e-9; // s
+ SENSE_AMP_P = 14.7e-15; // J
+ //90nm technology-node. Corresponds to year 2004 in ITRS
+ //ITRS HP device type
+ vdd[0] = 1.2;
+ Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
+ Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
+ t_ox[0] = 1.2e-3;//micron
+ v_th[0] = 0.23707;//V
+ c_ox[0] = 1.79e-14;//F/micron2
+ mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 0.128; //V
+ c_g_ideal[0] = 6.64e-16;//F/micron
+ c_fringe[0] = 0.08e-15;//F/micron
+ c_junc[0] = 1e-15;//F/micron2
+ I_on_n[0] = 1076.9e-6;//A/micron
+ I_on_p[0] = 712.6e-6;//A/micron
+ //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
+ nmos_effective_resistance_multiplier = 1.54;
+ n_to_p_eff_curr_drv_ratio[0] = 2.45;
+ gmp_to_gmn_multiplier[0] = 1.22;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1;
+ I_off_n[0][0] = 3.24e-8;//A/micron
+ I_off_n[0][10] = 4.01e-8;
+ I_off_n[0][20] = 4.90e-8;
+ I_off_n[0][30] = 5.92e-8;
+ I_off_n[0][40] = 7.08e-8;
+ I_off_n[0][50] = 8.38e-8;
+ I_off_n[0][60] = 9.82e-8;
+ I_off_n[0][70] = 1.14e-7;
+ I_off_n[0][80] = 1.29e-7;
+ I_off_n[0][90] = 1.43e-7;
+ I_off_n[0][100] = 1.54e-7;
+
+ I_g_on_n[0][0] = 1.65e-8;//A/micron
+ I_g_on_n[0][10] = 1.65e-8;
+ I_g_on_n[0][20] = 1.65e-8;
+ I_g_on_n[0][30] = 1.65e-8;
+ I_g_on_n[0][40] = 1.65e-8;
+ I_g_on_n[0][50] = 1.65e-8;
+ I_g_on_n[0][60] = 1.65e-8;
+ I_g_on_n[0][70] = 1.65e-8;
+ I_g_on_n[0][80] = 1.65e-8;
+ I_g_on_n[0][90] = 1.65e-8;
+ I_g_on_n[0][100] = 1.65e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.3;
+ Lphy[1] = 0.075;
+ Lelec[1] = 0.0486;
+ t_ox[1] = 2.2e-3;
+ v_th[1] = 0.48203;
+ c_ox[1] = 1.22e-14;
+ mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 0.373;
+ c_g_ideal[1] = 9.15e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 503.6e-6;
+ I_on_p[1] = 235.1e-6;
+ nmos_effective_resistance_multiplier = 1.92;
+ n_to_p_eff_curr_drv_ratio[1] = 2.44;
+ gmp_to_gmn_multiplier[1] = 0.88;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1;
+ I_off_n[1][0] = 2.81e-12;
+ I_off_n[1][10] = 4.76e-12;
+ I_off_n[1][20] = 7.82e-12;
+ I_off_n[1][30] = 1.25e-11;
+ I_off_n[1][40] = 1.94e-11;
+ I_off_n[1][50] = 2.94e-11;
+ I_off_n[1][60] = 4.36e-11;
+ I_off_n[1][70] = 6.32e-11;
+ I_off_n[1][80] = 8.95e-11;
+ I_off_n[1][90] = 1.25e-10;
+ I_off_n[1][100] = 1.7e-10;
+
+ I_g_on_n[1][0] = 3.87e-11;//A/micron
+ I_g_on_n[1][10] = 3.87e-11;
+ I_g_on_n[1][20] = 3.87e-11;
+ I_g_on_n[1][30] = 3.87e-11;
+ I_g_on_n[1][40] = 3.87e-11;
+ I_g_on_n[1][50] = 3.87e-11;
+ I_g_on_n[1][60] = 3.87e-11;
+ I_g_on_n[1][70] = 3.87e-11;
+ I_g_on_n[1][80] = 3.87e-11;
+ I_g_on_n[1][90] = 3.87e-11;
+ I_g_on_n[1][100] = 3.87e-11;
+
+ //ITRS LOP device type
+ vdd[2] = 0.9;
+ Lphy[2] = 0.053;
+ Lelec[2] = 0.0354;
+ t_ox[2] = 1.5e-3;
+ v_th[2] = 0.30764;
+ c_ox[2] = 1.59e-14;
+ mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 0.113;
+ c_g_ideal[2] = 8.45e-16;
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 386.6e-6;
+ I_on_p[2] = 209.7e-6;
+ nmos_effective_resistance_multiplier = 1.77;
+ n_to_p_eff_curr_drv_ratio[2] = 2.54;
+ gmp_to_gmn_multiplier[2] = 0.98;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1;
+ I_off_n[2][0] = 2.14e-9;
+ I_off_n[2][10] = 2.9e-9;
+ I_off_n[2][20] = 3.87e-9;
+ I_off_n[2][30] = 5.07e-9;
+ I_off_n[2][40] = 6.54e-9;
+ I_off_n[2][50] = 8.27e-8;
+ I_off_n[2][60] = 1.02e-7;
+ I_off_n[2][70] = 1.20e-7;
+ I_off_n[2][80] = 1.36e-8;
+ I_off_n[2][90] = 1.52e-8;
+ I_off_n[2][100] = 1.73e-8;
+
+ I_g_on_n[2][0] = 4.31e-8;//A/micron
+ I_g_on_n[2][10] = 4.31e-8;
+ I_g_on_n[2][20] = 4.31e-8;
+ I_g_on_n[2][30] = 4.31e-8;
+ I_g_on_n[2][40] = 4.31e-8;
+ I_g_on_n[2][50] = 4.31e-8;
+ I_g_on_n[2][60] = 4.31e-8;
+ I_g_on_n[2][70] = 4.31e-8;
+ I_g_on_n[2][80] = 4.31e-8;
+ I_g_on_n[2][90] = 4.31e-8;
+ I_g_on_n[2][100] = 4.31e-8;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.2;
+ Lphy[3] = 0.12;
+ Lelec[3] = 0.0756;
+ curr_v_th_dram_access_transistor = 0.4545;
+ width_dram_access_transistor = 0.14;
+ curr_I_on_dram_cell = 45e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 0.168;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.6;
+ t_ox[3] = 2.2e-3;
+ v_th[3] = 0.4545;
+ c_ox[3] = 1.22e-14;
+ mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.3;
+ c_g_ideal[3] = 1.47e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 321.6e-6;
+ I_on_p[3] = 203.3e-6;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.42e-11;
+ I_off_n[3][10] = 2.25e-11;
+ I_off_n[3][20] = 3.46e-11;
+ I_off_n[3][30] = 5.18e-11;
+ I_off_n[3][40] = 7.58e-11;
+ I_off_n[3][50] = 1.08e-10;
+ I_off_n[3][60] = 1.51e-10;
+ I_off_n[3][70] = 2.02e-10;
+ I_off_n[3][80] = 2.57e-10;
+ I_off_n[3][90] = 3.14e-10;
+ I_off_n[3][100] = 3.85e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.6;
+ Lphy[3] = 0.09;
+ Lelec[3] = 0.0576;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.09;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.09 * 0.09;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 3.7;
+ t_ox[3] = 5.5e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 5.65e-15;
+ mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.32;
+ c_g_ideal[3] = 5.08e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1094.3e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.62;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 5.80e-15;
+ I_off_n[3][10] = 1.21e-14;
+ I_off_n[3][20] = 2.42e-14;
+ I_off_n[3][30] = 4.65e-14;
+ I_off_n[3][40] = 8.60e-14;
+ I_off_n[3][50] = 1.54e-13;
+ I_off_n[3][60] = 2.66e-13;
+ I_off_n[3][70] = 4.45e-13;
+ I_off_n[3][80] = 7.17e-13;
+ I_off_n[3][90] = 1.11e-12;
+ I_off_n[3][100] = 1.67e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
+ curr_asp_ratio_cell_cam = 2.92;//2.5
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 1;
+ curr_core_tx_density = 1.25 * 0.7 * 0.7;
+ curr_sckt_co_eff = 1.1539;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ }
- if (tech == 65)
- { //65nm technology-node. Corresponds to year 2007 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .2e-9; // s
- SENSE_AMP_P = 5.7e-15; // J
- vdd[0] = 1.1;
- Lphy[0] = 0.025;
- Lelec[0] = 0.019;
- t_ox[0] = 1.1e-3;
- v_th[0] = .19491;
- c_ox[0] = 1.88e-14;
- mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 7.71e-2;
- c_g_ideal[0] = 4.69e-16;
- c_fringe[0] = 0.077e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 1197.2e-6;
- I_on_p[0] = 870.8e-6;
- nmos_effective_resistance_multiplier = 1.50;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.74;
- //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
- //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
- I_off_n[0][0] = 1.96e-7;
- I_off_n[0][10] = 2.29e-7;
- I_off_n[0][20] = 2.66e-7;
- I_off_n[0][30] = 3.05e-7;
- I_off_n[0][40] = 3.49e-7;
- I_off_n[0][50] = 3.95e-7;
- I_off_n[0][60] = 4.45e-7;
- I_off_n[0][70] = 4.97e-7;
- I_off_n[0][80] = 5.48e-7;
- I_off_n[0][90] = 5.94e-7;
- I_off_n[0][100] = 6.3e-7;
- I_g_on_n[0][0] = 4.09e-8;//A/micron
- I_g_on_n[0][10] = 4.09e-8;
- I_g_on_n[0][20] = 4.09e-8;
- I_g_on_n[0][30] = 4.09e-8;
- I_g_on_n[0][40] = 4.09e-8;
- I_g_on_n[0][50] = 4.09e-8;
- I_g_on_n[0][60] = 4.09e-8;
- I_g_on_n[0][70] = 4.09e-8;
- I_g_on_n[0][80] = 4.09e-8;
- I_g_on_n[0][90] = 4.09e-8;
- I_g_on_n[0][100] = 4.09e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.2;
- Lphy[1] = 0.045;
- Lelec[1] = 0.0298;
- t_ox[1] = 1.9e-3;
- v_th[1] = 0.52354;
- c_ox[1] = 1.36e-14;
- mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.128;
- c_g_ideal[1] = 6.14e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 519.2e-6;
- I_on_p[1] = 266e-6;
- nmos_effective_resistance_multiplier = 1.96;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.82;
- I_off_n[1][0] = 9.12e-12;
- I_off_n[1][10] = 1.49e-11;
- I_off_n[1][20] = 2.36e-11;
- I_off_n[1][30] = 3.64e-11;
- I_off_n[1][40] = 5.48e-11;
- I_off_n[1][50] = 8.05e-11;
- I_off_n[1][60] = 1.15e-10;
- I_off_n[1][70] = 1.59e-10;
- I_off_n[1][80] = 2.1e-10;
- I_off_n[1][90] = 2.62e-10;
- I_off_n[1][100] = 3.21e-10;
-
- I_g_on_n[1][0] = 1.09e-10;//A/micron
- I_g_on_n[1][10] = 1.09e-10;
- I_g_on_n[1][20] = 1.09e-10;
- I_g_on_n[1][30] = 1.09e-10;
- I_g_on_n[1][40] = 1.09e-10;
- I_g_on_n[1][50] = 1.09e-10;
- I_g_on_n[1][60] = 1.09e-10;
- I_g_on_n[1][70] = 1.09e-10;
- I_g_on_n[1][80] = 1.09e-10;
- I_g_on_n[1][90] = 1.09e-10;
- I_g_on_n[1][100] = 1.09e-10;
-
- //ITRS LOP device type
- vdd[2] = 0.8;
- Lphy[2] = 0.032;
- Lelec[2] = 0.0216;
- t_ox[2] = 1.2e-3;
- v_th[2] = 0.28512;
- c_ox[2] = 1.87e-14;
- mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.292;
- c_g_ideal[2] = 6e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 573.1e-6;
- I_on_p[2] = 340.6e-6;
- nmos_effective_resistance_multiplier = 1.82;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/2.05;
- I_off_n[2][0] = 4.9e-9;
- I_off_n[2][10] = 6.49e-9;
- I_off_n[2][20] = 8.45e-9;
- I_off_n[2][30] = 1.08e-8;
- I_off_n[2][40] = 1.37e-8;
- I_off_n[2][50] = 1.71e-8;
- I_off_n[2][60] = 2.09e-8;
- I_off_n[2][70] = 2.48e-8;
- I_off_n[2][80] = 2.84e-8;
- I_off_n[2][90] = 3.13e-8;
- I_off_n[2][100] = 3.42e-8;
-
- I_g_on_n[2][0] = 9.61e-9;//A/micron
- I_g_on_n[2][10] = 9.61e-9;
- I_g_on_n[2][20] = 9.61e-9;
- I_g_on_n[2][30] = 9.61e-9;
- I_g_on_n[2][40] = 9.61e-9;
- I_g_on_n[2][50] = 9.61e-9;
- I_g_on_n[2][60] = 9.61e-9;
- I_g_on_n[2][70] = 9.61e-9;
- I_g_on_n[2][80] = 9.61e-9;
- I_g_on_n[2][90] = 9.61e-9;
- I_g_on_n[2][100] = 9.61e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.43806;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.11;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.43806;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.43806;
- c_g_ideal[3] = 1.46e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 399.8e-6;
- I_on_p[3] = 243.4e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.23e-11;
- I_off_n[3][10] = 3.46e-11;
- I_off_n[3][20] = 5.24e-11;
- I_off_n[3][30] = 7.75e-11;
- I_off_n[3][40] = 1.12e-10;
- I_off_n[3][50] = 1.58e-10;
- I_off_n[3][60] = 2.18e-10;
- I_off_n[3][70] = 2.88e-10;
- I_off_n[3][80] = 3.63e-10;
- I_off_n[3][90] = 4.41e-10;
- I_off_n[3][100] = 5.36e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.3;
- Lphy[3] = 0.065;
- Lelec[3] = 0.0426;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.065;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.065*0.065;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.3;
- t_ox[3] = 5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 6.16e-15;
- mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.385;
- c_g_ideal[3] = 4e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 1031e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 2.39;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.80e-14;
- I_off_n[3][10] = 3.64e-14;
- I_off_n[3][20] = 7.03e-14;
- I_off_n[3][30] = 1.31e-13;
- I_off_n[3][40] = 2.35e-13;
- I_off_n[3][50] = 4.09e-13;
- I_off_n[3][60] = 6.89e-13;
- I_off_n[3][70] = 1.13e-12;
- I_off_n[3][80] = 1.78e-12;
- I_off_n[3][90] = 2.71e-12;
- I_off_n[3][100] = 3.99e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor
- curr_core_tx_density = 1.25*0.7;
- curr_sckt_co_eff = 1.1359;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ if (tech == 65) {
+ //65nm technology-node. Corresponds to year 2007 in ITRS
+ //ITRS HP device type
+ SENSE_AMP_D = .2e-9; // s
+ SENSE_AMP_P = 5.7e-15; // J
+ vdd[0] = 1.1;
+ Lphy[0] = 0.025;
+ Lelec[0] = 0.019;
+ t_ox[0] = 1.1e-3;
+ v_th[0] = .19491;
+ c_ox[0] = 1.88e-14;
+ mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 7.71e-2;
+ c_g_ideal[0] = 4.69e-16;
+ c_fringe[0] = 0.077e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 1197.2e-6;
+ I_on_p[0] = 870.8e-6;
+ nmos_effective_resistance_multiplier = 1.50;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
+ long_channel_leakage_reduction[0] = 1 / 3.74;
+ //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
+ //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
+ I_off_n[0][0] = 1.96e-7;
+ I_off_n[0][10] = 2.29e-7;
+ I_off_n[0][20] = 2.66e-7;
+ I_off_n[0][30] = 3.05e-7;
+ I_off_n[0][40] = 3.49e-7;
+ I_off_n[0][50] = 3.95e-7;
+ I_off_n[0][60] = 4.45e-7;
+ I_off_n[0][70] = 4.97e-7;
+ I_off_n[0][80] = 5.48e-7;
+ I_off_n[0][90] = 5.94e-7;
+ I_off_n[0][100] = 6.3e-7;
+ I_g_on_n[0][0] = 4.09e-8;//A/micron
+ I_g_on_n[0][10] = 4.09e-8;
+ I_g_on_n[0][20] = 4.09e-8;
+ I_g_on_n[0][30] = 4.09e-8;
+ I_g_on_n[0][40] = 4.09e-8;
+ I_g_on_n[0][50] = 4.09e-8;
+ I_g_on_n[0][60] = 4.09e-8;
+ I_g_on_n[0][70] = 4.09e-8;
+ I_g_on_n[0][80] = 4.09e-8;
+ I_g_on_n[0][90] = 4.09e-8;
+ I_g_on_n[0][100] = 4.09e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.2;
+ Lphy[1] = 0.045;
+ Lelec[1] = 0.0298;
+ t_ox[1] = 1.9e-3;
+ v_th[1] = 0.52354;
+ c_ox[1] = 1.36e-14;
+ mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 0.128;
+ c_g_ideal[1] = 6.14e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 519.2e-6;
+ I_on_p[1] = 266e-6;
+ nmos_effective_resistance_multiplier = 1.96;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1 / 2.82;
+ I_off_n[1][0] = 9.12e-12;
+ I_off_n[1][10] = 1.49e-11;
+ I_off_n[1][20] = 2.36e-11;
+ I_off_n[1][30] = 3.64e-11;
+ I_off_n[1][40] = 5.48e-11;
+ I_off_n[1][50] = 8.05e-11;
+ I_off_n[1][60] = 1.15e-10;
+ I_off_n[1][70] = 1.59e-10;
+ I_off_n[1][80] = 2.1e-10;
+ I_off_n[1][90] = 2.62e-10;
+ I_off_n[1][100] = 3.21e-10;
+
+ I_g_on_n[1][0] = 1.09e-10;//A/micron
+ I_g_on_n[1][10] = 1.09e-10;
+ I_g_on_n[1][20] = 1.09e-10;
+ I_g_on_n[1][30] = 1.09e-10;
+ I_g_on_n[1][40] = 1.09e-10;
+ I_g_on_n[1][50] = 1.09e-10;
+ I_g_on_n[1][60] = 1.09e-10;
+ I_g_on_n[1][70] = 1.09e-10;
+ I_g_on_n[1][80] = 1.09e-10;
+ I_g_on_n[1][90] = 1.09e-10;
+ I_g_on_n[1][100] = 1.09e-10;
+
+ //ITRS LOP device type
+ vdd[2] = 0.8;
+ Lphy[2] = 0.032;
+ Lelec[2] = 0.0216;
+ t_ox[2] = 1.2e-3;
+ v_th[2] = 0.28512;
+ c_ox[2] = 1.87e-14;
+ mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 0.292;
+ c_g_ideal[2] = 6e-16;
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 573.1e-6;
+ I_on_p[2] = 340.6e-6;
+ nmos_effective_resistance_multiplier = 1.82;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1 / 2.05;
+ I_off_n[2][0] = 4.9e-9;
+ I_off_n[2][10] = 6.49e-9;
+ I_off_n[2][20] = 8.45e-9;
+ I_off_n[2][30] = 1.08e-8;
+ I_off_n[2][40] = 1.37e-8;
+ I_off_n[2][50] = 1.71e-8;
+ I_off_n[2][60] = 2.09e-8;
+ I_off_n[2][70] = 2.48e-8;
+ I_off_n[2][80] = 2.84e-8;
+ I_off_n[2][90] = 3.13e-8;
+ I_off_n[2][100] = 3.42e-8;
+
+ I_g_on_n[2][0] = 9.61e-9;//A/micron
+ I_g_on_n[2][10] = 9.61e-9;
+ I_g_on_n[2][20] = 9.61e-9;
+ I_g_on_n[2][30] = 9.61e-9;
+ I_g_on_n[2][40] = 9.61e-9;
+ I_g_on_n[2][50] = 9.61e-9;
+ I_g_on_n[2][60] = 9.61e-9;
+ I_g_on_n[2][70] = 9.61e-9;
+ I_g_on_n[2][80] = 9.61e-9;
+ I_g_on_n[2][90] = 9.61e-9;
+ I_g_on_n[2][100] = 9.61e-9;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.2;
+ Lphy[3] = 0.12;
+ Lelec[3] = 0.0756;
+ curr_v_th_dram_access_transistor = 0.43806;
+ width_dram_access_transistor = 0.09;
+ curr_I_on_dram_cell = 36e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 0.11;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.6;
+ t_ox[3] = 2.2e-3;
+ v_th[3] = 0.43806;
+ c_ox[3] = 1.22e-14;
+ mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.43806;
+ c_g_ideal[3] = 1.46e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15 ;
+ I_on_n[3] = 399.8e-6;
+ I_on_p[3] = 243.4e-6;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 2.23e-11;
+ I_off_n[3][10] = 3.46e-11;
+ I_off_n[3][20] = 5.24e-11;
+ I_off_n[3][30] = 7.75e-11;
+ I_off_n[3][40] = 1.12e-10;
+ I_off_n[3][50] = 1.58e-10;
+ I_off_n[3][60] = 2.18e-10;
+ I_off_n[3][70] = 2.88e-10;
+ I_off_n[3][80] = 3.63e-10;
+ I_off_n[3][90] = 4.41e-10;
+ I_off_n[3][100] = 5.36e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.3;
+ Lphy[3] = 0.065;
+ Lelec[3] = 0.0426;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.065;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.065 * 0.065;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 3.3;
+ t_ox[3] = 5e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 6.16e-15;
+ mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.385;
+ c_g_ideal[3] = 4e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15 ;
+ I_on_n[3] = 1031e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 2.39;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.80e-14;
+ I_off_n[3][10] = 3.64e-14;
+ I_off_n[3][20] = 7.03e-14;
+ I_off_n[3][30] = 1.31e-13;
+ I_off_n[3][40] = 2.35e-13;
+ I_off_n[3][50] = 4.09e-13;
+ I_off_n[3][60] = 6.89e-13;
+ I_off_n[3][70] = 1.13e-12;
+ I_off_n[3][80] = 1.78e-12;
+ I_off_n[3][90] = 2.71e-12;
+ I_off_n[3][100] = 3.99e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor
+ curr_core_tx_density = 1.25 * 0.7;
+ curr_sckt_co_eff = 1.1359;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
- if (tech == 45)
- { //45nm technology-node. Corresponds to year 2010 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .04e-9; // s
- SENSE_AMP_P = 2.7e-15; // J
- vdd[0] = 1.0;
- Lphy[0] = 0.018;
- Lelec[0] = 0.01345;
- t_ox[0] = 0.65e-3;
- v_th[0] = .18035;
- c_ox[0] = 3.77e-14;
- mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 9.38E-2;
- c_g_ideal[0] = 6.78e-16;
- c_fringe[0] = 0.05e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2046.6e-6;
- //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
- //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
- I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
- nmos_effective_resistance_multiplier = 1.51;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
- I_off_n[0][0] = 2.8e-7;
- I_off_n[0][10] = 3.28e-7;
- I_off_n[0][20] = 3.81e-7;
- I_off_n[0][30] = 4.39e-7;
- I_off_n[0][40] = 5.02e-7;
- I_off_n[0][50] = 5.69e-7;
- I_off_n[0][60] = 6.42e-7;
- I_off_n[0][70] = 7.2e-7;
- I_off_n[0][80] = 8.03e-7;
- I_off_n[0][90] = 8.91e-7;
- I_off_n[0][100] = 9.84e-7;
-
- I_g_on_n[0][0] = 3.59e-8;//A/micron
- I_g_on_n[0][10] = 3.59e-8;
- I_g_on_n[0][20] = 3.59e-8;
- I_g_on_n[0][30] = 3.59e-8;
- I_g_on_n[0][40] = 3.59e-8;
- I_g_on_n[0][50] = 3.59e-8;
- I_g_on_n[0][60] = 3.59e-8;
- I_g_on_n[0][70] = 3.59e-8;
- I_g_on_n[0][80] = 3.59e-8;
- I_g_on_n[0][90] = 3.59e-8;
- I_g_on_n[0][100] = 3.59e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.1;
- Lphy[1] = 0.028;
- Lelec[1] = 0.0212;
- t_ox[1] = 1.4e-3;
- v_th[1] = 0.50245;
- c_ox[1] = 2.01e-14;
- mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 9.12e-2;
- c_g_ideal[1] = 5.18e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 666.2e-6;
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.08;
- I_off_n[1][0] = 1.01e-11;
- I_off_n[1][10] = 1.65e-11;
- I_off_n[1][20] = 2.62e-11;
- I_off_n[1][30] = 4.06e-11;
- I_off_n[1][40] = 6.12e-11;
- I_off_n[1][50] = 9.02e-11;
- I_off_n[1][60] = 1.3e-10;
- I_off_n[1][70] = 1.83e-10;
- I_off_n[1][80] = 2.51e-10;
- I_off_n[1][90] = 3.29e-10;
- I_off_n[1][100] = 4.1e-10;
-
- I_g_on_n[1][0] = 9.47e-12;//A/micron
- I_g_on_n[1][10] = 9.47e-12;
- I_g_on_n[1][20] = 9.47e-12;
- I_g_on_n[1][30] = 9.47e-12;
- I_g_on_n[1][40] = 9.47e-12;
- I_g_on_n[1][50] = 9.47e-12;
- I_g_on_n[1][60] = 9.47e-12;
- I_g_on_n[1][70] = 9.47e-12;
- I_g_on_n[1][80] = 9.47e-12;
- I_g_on_n[1][90] = 9.47e-12;
- I_g_on_n[1][100] = 9.47e-12;
-
- //ITRS LOP device type
- vdd[2] = 0.7;
- Lphy[2] = 0.022;
- Lelec[2] = 0.016;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.22599;
- c_ox[2] = 2.82e-14;//F/micron2
- mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 5.71e-2;
- c_g_ideal[2] = 6.2e-16;
- c_fringe[2] = 0.073e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 748.9e-6;
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.76;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.92;
- I_off_n[2][0] = 4.03e-9;
- I_off_n[2][10] = 5.02e-9;
- I_off_n[2][20] = 6.18e-9;
- I_off_n[2][30] = 7.51e-9;
- I_off_n[2][40] = 9.04e-9;
- I_off_n[2][50] = 1.08e-8;
- I_off_n[2][60] = 1.27e-8;
- I_off_n[2][70] = 1.47e-8;
- I_off_n[2][80] = 1.66e-8;
- I_off_n[2][90] = 1.84e-8;
- I_off_n[2][100] = 2.03e-8;
-
- I_g_on_n[2][0] = 3.24e-8;//A/micron
- I_g_on_n[2][10] = 4.01e-8;
- I_g_on_n[2][20] = 4.90e-8;
- I_g_on_n[2][30] = 5.92e-8;
- I_g_on_n[2][40] = 7.08e-8;
- I_g_on_n[2][50] = 8.38e-8;
- I_g_on_n[2][60] = 9.82e-8;
- I_g_on_n[2][70] = 1.14e-7;
- I_g_on_n[2][80] = 1.29e-7;
- I_g_on_n[2][90] = 1.43e-7;
- I_g_on_n[2][100] = 1.54e-7;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.078;
- Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44559;
- width_dram_access_transistor = 0.079;
- curr_I_on_dram_cell = 36e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2.1e-3;
- v_th[3] = 0.44559;
- c_ox[3] = 1.41e-14;
- mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.181;
- c_g_ideal[3] = 1.10e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 456e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.54e-11;
- I_off_n[3][10] = 3.94e-11;
- I_off_n[3][20] = 5.95e-11;
- I_off_n[3][30] = 8.79e-11;
- I_off_n[3][40] = 1.27e-10;
- I_off_n[3][50] = 1.79e-10;
- I_off_n[3][60] = 2.47e-10;
- I_off_n[3][70] = 3.31e-10;
- I_off_n[3][80] = 4.26e-10;
- I_off_n[3][90] = 5.27e-10;
- I_off_n[3][100] = 6.46e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.045;
- Lelec[3] = 0.0298;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.045;
- curr_I_on_dram_cell = 20e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.045*0.045;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.7;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.98e-15;
- mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.147;
- c_g_ideal[3] = 3.59e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 999.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.31e-14;
- I_off_n[3][10] = 2.68e-14;
- I_off_n[3][20] = 5.25e-14;
- I_off_n[3][30] = 9.88e-14;
- I_off_n[3][40] = 1.79e-13;
- I_off_n[3][50] = 3.15e-13;
- I_off_n[3][60] = 5.36e-13;
- I_off_n[3][70] = 8.86e-13;
- I_off_n[3][80] = 1.42e-12;
- I_off_n[3][90] = 2.20e-12;
- I_off_n[3][100] = 3.29e-12;
- }
-
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7;
- curr_core_tx_density = 1.25;
- curr_sckt_co_eff = 1.1387;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ if (tech == 45) {
+ //45nm technology-node. Corresponds to year 2010 in ITRS
+ //ITRS HP device type
+ SENSE_AMP_D = .04e-9; // s
+ SENSE_AMP_P = 2.7e-15; // J
+ vdd[0] = 1.0;
+ Lphy[0] = 0.018;
+ Lelec[0] = 0.01345;
+ t_ox[0] = 0.65e-3;
+ v_th[0] = .18035;
+ c_ox[0] = 3.77e-14;
+ mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 9.38E-2;
+ c_g_ideal[0] = 6.78e-16;
+ c_fringe[0] = 0.05e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 2046.6e-6;
+ //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
+ //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
+ I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
+ nmos_effective_resistance_multiplier = 1.51;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
+ //Using MASTAR, @380K, increase Lgate until Ion reduces to 90%,
+ //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
+ long_channel_leakage_reduction[0] = 1 / 3.546;
+ I_off_n[0][0] = 2.8e-7;
+ I_off_n[0][10] = 3.28e-7;
+ I_off_n[0][20] = 3.81e-7;
+ I_off_n[0][30] = 4.39e-7;
+ I_off_n[0][40] = 5.02e-7;
+ I_off_n[0][50] = 5.69e-7;
+ I_off_n[0][60] = 6.42e-7;
+ I_off_n[0][70] = 7.2e-7;
+ I_off_n[0][80] = 8.03e-7;
+ I_off_n[0][90] = 8.91e-7;
+ I_off_n[0][100] = 9.84e-7;
+
+ I_g_on_n[0][0] = 3.59e-8;//A/micron
+ I_g_on_n[0][10] = 3.59e-8;
+ I_g_on_n[0][20] = 3.59e-8;
+ I_g_on_n[0][30] = 3.59e-8;
+ I_g_on_n[0][40] = 3.59e-8;
+ I_g_on_n[0][50] = 3.59e-8;
+ I_g_on_n[0][60] = 3.59e-8;
+ I_g_on_n[0][70] = 3.59e-8;
+ I_g_on_n[0][80] = 3.59e-8;
+ I_g_on_n[0][90] = 3.59e-8;
+ I_g_on_n[0][100] = 3.59e-8;
+
+ //ITRS LSTP device type
+ vdd[1] = 1.1;
+ Lphy[1] = 0.028;
+ Lelec[1] = 0.0212;
+ t_ox[1] = 1.4e-3;
+ v_th[1] = 0.50245;
+ c_ox[1] = 2.01e-14;
+ mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 9.12e-2;
+ c_g_ideal[1] = 5.18e-16;
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 666.2e-6;
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1 / 2.08;
+ I_off_n[1][0] = 1.01e-11;
+ I_off_n[1][10] = 1.65e-11;
+ I_off_n[1][20] = 2.62e-11;
+ I_off_n[1][30] = 4.06e-11;
+ I_off_n[1][40] = 6.12e-11;
+ I_off_n[1][50] = 9.02e-11;
+ I_off_n[1][60] = 1.3e-10;
+ I_off_n[1][70] = 1.83e-10;
+ I_off_n[1][80] = 2.51e-10;
+ I_off_n[1][90] = 3.29e-10;
+ I_off_n[1][100] = 4.1e-10;
+
+ I_g_on_n[1][0] = 9.47e-12;//A/micron
+ I_g_on_n[1][10] = 9.47e-12;
+ I_g_on_n[1][20] = 9.47e-12;
+ I_g_on_n[1][30] = 9.47e-12;
+ I_g_on_n[1][40] = 9.47e-12;
+ I_g_on_n[1][50] = 9.47e-12;
+ I_g_on_n[1][60] = 9.47e-12;
+ I_g_on_n[1][70] = 9.47e-12;
+ I_g_on_n[1][80] = 9.47e-12;
+ I_g_on_n[1][90] = 9.47e-12;
+ I_g_on_n[1][100] = 9.47e-12;
+
+ //ITRS LOP device type
+ vdd[2] = 0.7;
+ Lphy[2] = 0.022;
+ Lelec[2] = 0.016;
+ t_ox[2] = 0.9e-3;
+ v_th[2] = 0.22599;
+ c_ox[2] = 2.82e-14;//F/micron2
+ mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 5.71e-2;
+ c_g_ideal[2] = 6.2e-16;
+ c_fringe[2] = 0.073e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 748.9e-6;
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.76;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1 / 1.92;
+ I_off_n[2][0] = 4.03e-9;
+ I_off_n[2][10] = 5.02e-9;
+ I_off_n[2][20] = 6.18e-9;
+ I_off_n[2][30] = 7.51e-9;
+ I_off_n[2][40] = 9.04e-9;
+ I_off_n[2][50] = 1.08e-8;
+ I_off_n[2][60] = 1.27e-8;
+ I_off_n[2][70] = 1.47e-8;
+ I_off_n[2][80] = 1.66e-8;
+ I_off_n[2][90] = 1.84e-8;
+ I_off_n[2][100] = 2.03e-8;
+
+ I_g_on_n[2][0] = 3.24e-8;//A/micron
+ I_g_on_n[2][10] = 4.01e-8;
+ I_g_on_n[2][20] = 4.90e-8;
+ I_g_on_n[2][30] = 5.92e-8;
+ I_g_on_n[2][40] = 7.08e-8;
+ I_g_on_n[2][50] = 8.38e-8;
+ I_g_on_n[2][60] = 9.82e-8;
+ I_g_on_n[2][70] = 1.14e-7;
+ I_g_on_n[2][80] = 1.29e-7;
+ I_g_on_n[2][90] = 1.43e-7;
+ I_g_on_n[2][100] = 1.54e-7;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.1;
+ Lphy[3] = 0.078;
+ Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 0.44559;
+ width_dram_access_transistor = 0.079;
+ curr_I_on_dram_cell = 36e-6;//A
+ curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.5;
+ t_ox[3] = 2.1e-3;
+ v_th[3] = 0.44559;
+ c_ox[3] = 1.41e-14;
+ mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.181;
+ c_g_ideal[3] = 1.10e-15;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 456e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 2.54e-11;
+ I_off_n[3][10] = 3.94e-11;
+ I_off_n[3][20] = 5.95e-11;
+ I_off_n[3][30] = 8.79e-11;
+ I_off_n[3][40] = 1.27e-10;
+ I_off_n[3][50] = 1.79e-10;
+ I_off_n[3][60] = 2.47e-10;
+ I_off_n[3][70] = 3.31e-10;
+ I_off_n[3][80] = 4.26e-10;
+ I_off_n[3][90] = 5.27e-10;
+ I_off_n[3][100] = 6.46e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.1;
+ Lphy[3] = 0.045;
+ Lelec[3] = 0.0298;
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.045;
+ curr_I_on_dram_cell = 20e-6;//A
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.045 * 0.045;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 2.7;
+ t_ox[3] = 4e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 7.98e-15;
+ mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.147;
+ c_g_ideal[3] = 3.59e-16;
+ c_fringe[3] = 0.08e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 999.4e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 1.31e-14;
+ I_off_n[3][10] = 2.68e-14;
+ I_off_n[3][20] = 5.25e-14;
+ I_off_n[3][30] = 9.88e-14;
+ I_off_n[3][40] = 1.79e-13;
+ I_off_n[3][50] = 3.15e-13;
+ I_off_n[3][60] = 5.36e-13;
+ I_off_n[3][70] = 8.86e-13;
+ I_off_n[3][80] = 1.42e-12;
+ I_off_n[3][90] = 2.20e-12;
+ I_off_n[3][100] = 3.29e-12;
+ }
+
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7;
+ curr_core_tx_density = 1.25;
+ curr_sckt_co_eff = 1.1387;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
- if (tech == 32)
- {
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
- //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
- //HP and LSTP.
- vdd[0] = 0.9;
- Lphy[0] = 0.013;
- Lelec[0] = 0.01013;
- t_ox[0] = 0.5e-3;
- v_th[0] = 0.21835;
- c_ox[0] = 4.11e-14;
- mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 5.09E-2;
- c_g_ideal[0] = 5.34e-16;
- c_fringe[0] = 0.04e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2211.7e-6;
- I_on_p[0] = I_on_n[0] / 2;
- nmos_effective_resistance_multiplier = 1.49;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.706;
- //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
- //whichever comes first
- I_off_n[0][0] = 1.52e-7;
- I_off_n[0][10] = 1.55e-7;
- I_off_n[0][20] = 1.59e-7;
- I_off_n[0][30] = 1.68e-7;
- I_off_n[0][40] = 1.90e-7;
- I_off_n[0][50] = 2.69e-7;
- I_off_n[0][60] = 5.32e-7;
- I_off_n[0][70] = 1.02e-6;
- I_off_n[0][80] = 1.62e-6;
- I_off_n[0][90] = 2.73e-6;
- I_off_n[0][100] = 6.1e-6;
-
- I_g_on_n[0][0] = 6.55e-8;//A/micron
- I_g_on_n[0][10] = 6.55e-8;
- I_g_on_n[0][20] = 6.55e-8;
- I_g_on_n[0][30] = 6.55e-8;
- I_g_on_n[0][40] = 6.55e-8;
- I_g_on_n[0][50] = 6.55e-8;
- I_g_on_n[0][60] = 6.55e-8;
- I_g_on_n[0][70] = 6.55e-8;
- I_g_on_n[0][80] = 6.55e-8;
- I_g_on_n[0][90] = 6.55e-8;
- I_g_on_n[0][100] = 6.55e-8;
-
-// 32 DG
-// I_g_on_n[0][0] = 2.71e-9;//A/micron
-// I_g_on_n[0][10] = 2.71e-9;
-// I_g_on_n[0][20] = 2.71e-9;
-// I_g_on_n[0][30] = 2.71e-9;
-// I_g_on_n[0][40] = 2.71e-9;
-// I_g_on_n[0][50] = 2.71e-9;
-// I_g_on_n[0][60] = 2.71e-9;
-// I_g_on_n[0][70] = 2.71e-9;
-// I_g_on_n[0][80] = 2.71e-9;
-// I_g_on_n[0][90] = 2.71e-9;
-// I_g_on_n[0][100] = 2.71e-9;
-
- //LSTP device type
- vdd[1] = 1;
- Lphy[1] = 0.020;
- Lelec[1] = 0.0173;
- t_ox[1] = 1.2e-3;
- v_th[1] = 0.513;
- c_ox[1] = 2.29e-14;
- mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 8.64e-2;
- c_g_ideal[1] = 4.58e-16;
- c_fringe[1] = 0.053e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 683.6e-6;
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/1.93;
- I_off_n[1][0] = 2.06e-11;
- I_off_n[1][10] = 3.30e-11;
- I_off_n[1][20] = 5.15e-11;
- I_off_n[1][30] = 7.83e-11;
- I_off_n[1][40] = 1.16e-10;
- I_off_n[1][50] = 1.69e-10;
- I_off_n[1][60] = 2.40e-10;
- I_off_n[1][70] = 3.34e-10;
- I_off_n[1][80] = 4.54e-10;
- I_off_n[1][90] = 5.96e-10;
- I_off_n[1][100] = 7.44e-10;
-
- I_g_on_n[1][0] = 3.73e-11;//A/micron
- I_g_on_n[1][10] = 3.73e-11;
- I_g_on_n[1][20] = 3.73e-11;
- I_g_on_n[1][30] = 3.73e-11;
- I_g_on_n[1][40] = 3.73e-11;
- I_g_on_n[1][50] = 3.73e-11;
- I_g_on_n[1][60] = 3.73e-11;
- I_g_on_n[1][70] = 3.73e-11;
- I_g_on_n[1][80] = 3.73e-11;
- I_g_on_n[1][90] = 3.73e-11;
- I_g_on_n[1][100] = 3.73e-11;
-
-
- //LOP device type
- vdd[2] = 0.6;
- Lphy[2] = 0.016;
- Lelec[2] = 0.01232;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.24227;
- c_ox[2] = 2.84e-14;
- mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 4.64e-2;
- c_g_ideal[2] = 4.54e-16;
- c_fringe[2] = 0.057e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 827.8e-6;
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.89;
- I_off_n[2][0] = 5.94e-8;
- I_off_n[2][10] = 7.23e-8;
- I_off_n[2][20] = 8.7e-8;
- I_off_n[2][30] = 1.04e-7;
- I_off_n[2][40] = 1.22e-7;
- I_off_n[2][50] = 1.43e-7;
- I_off_n[2][60] = 1.65e-7;
- I_off_n[2][70] = 1.90e-7;
- I_off_n[2][80] = 2.15e-7;
- I_off_n[2][90] = 2.39e-7;
- I_off_n[2][100] = 2.63e-7;
-
- I_g_on_n[2][0] = 2.93e-9;//A/micron
- I_g_on_n[2][10] = 2.93e-9;
- I_g_on_n[2][20] = 2.93e-9;
- I_g_on_n[2][30] = 2.93e-9;
- I_g_on_n[2][40] = 2.93e-9;
- I_g_on_n[2][50] = 2.93e-9;
- I_g_on_n[2][60] = 2.93e-9;
- I_g_on_n[2][70] = 2.93e-9;
- I_g_on_n[2][80] = 2.93e-9;
- I_g_on_n[2][90] = 2.93e-9;
- I_g_on_n[2][100] = 2.93e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.056;
- Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44129;
- width_dram_access_transistor = 0.056;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2e-3;
- v_th[3] = 0.44467;
- c_ox[3] = 1.48e-14;
- mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.174;
- c_g_ideal[3] = 7.45e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1055.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.57e-11;
- I_off_n[3][10] = 5.51e-11;
- I_off_n[3][20] = 8.27e-11;
- I_off_n[3][30] = 1.21e-10;
- I_off_n[3][40] = 1.74e-10;
- I_off_n[3][50] = 2.45e-10;
- I_off_n[3][60] = 3.38e-10;
- I_off_n[3][70] = 4.53e-10;
- I_off_n[3][80] = 5.87e-10;
- I_off_n[3][90] = 7.29e-10;
- I_off_n[3][100] = 8.87e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.032;
- Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.032;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.032*0.032;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.6;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.99e-15;
- mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.129;
- c_g_ideal[3] = 2.56e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1024.5e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.63e-14;
- I_off_n[3][10] = 7.18e-14;
- I_off_n[3][20] = 1.36e-13;
- I_off_n[3][30] = 2.49e-13;
- I_off_n[3][40] = 4.41e-13;
- I_off_n[3][50] = 7.55e-13;
- I_off_n[3][60] = 1.26e-12;
- I_off_n[3][70] = 2.03e-12;
- I_off_n[3][80] = 3.19e-12;
- I_off_n[3][90] = 4.87e-12;
- I_off_n[3][100] = 7.16e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7;
- curr_sckt_co_eff = 1.1111;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ if (tech == 32) {
+ SENSE_AMP_D = .03e-9; // s
+ SENSE_AMP_P = 2.16e-15; // J
+ //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
+ //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
+ //HP and LSTP.
+ vdd[0] = 0.9;
+ Lphy[0] = 0.013;
+ Lelec[0] = 0.01013;
+ t_ox[0] = 0.5e-3;
+ v_th[0] = 0.21835;
+ c_ox[0] = 4.11e-14;
+ mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[0] = 5.09E-2;
+ c_g_ideal[0] = 5.34e-16;
+ c_fringe[0] = 0.04e-15;
+ c_junc[0] = 1e-15;
+ I_on_n[0] = 2211.7e-6;
+ I_on_p[0] = I_on_n[0] / 2;
+ nmos_effective_resistance_multiplier = 1.49;
+ n_to_p_eff_curr_drv_ratio[0] = 2.41;
+ gmp_to_gmn_multiplier[0] = 1.38;
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1 / 3.706;
+ //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
+ //whichever comes first
+ I_off_n[0][0] = 1.52e-7;
+ I_off_n[0][10] = 1.55e-7;
+ I_off_n[0][20] = 1.59e-7;
+ I_off_n[0][30] = 1.68e-7;
+ I_off_n[0][40] = 1.90e-7;
+ I_off_n[0][50] = 2.69e-7;
+ I_off_n[0][60] = 5.32e-7;
+ I_off_n[0][70] = 1.02e-6;
+ I_off_n[0][80] = 1.62e-6;
+ I_off_n[0][90] = 2.73e-6;
+ I_off_n[0][100] = 6.1e-6;
+
+ I_g_on_n[0][0] = 6.55e-8;//A/micron
+ I_g_on_n[0][10] = 6.55e-8;
+ I_g_on_n[0][20] = 6.55e-8;
+ I_g_on_n[0][30] = 6.55e-8;
+ I_g_on_n[0][40] = 6.55e-8;
+ I_g_on_n[0][50] = 6.55e-8;
+ I_g_on_n[0][60] = 6.55e-8;
+ I_g_on_n[0][70] = 6.55e-8;
+ I_g_on_n[0][80] = 6.55e-8;
+ I_g_on_n[0][90] = 6.55e-8;
+ I_g_on_n[0][100] = 6.55e-8;
+
+ //LSTP device type
+ vdd[1] = 1;
+ Lphy[1] = 0.020;
+ Lelec[1] = 0.0173;
+ t_ox[1] = 1.2e-3;
+ v_th[1] = 0.513;
+ c_ox[1] = 2.29e-14;
+ mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[1] = 8.64e-2;
+ c_g_ideal[1] = 4.58e-16;
+ c_fringe[1] = 0.053e-15;
+ c_junc[1] = 1e-15;
+ I_on_n[1] = 683.6e-6;
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2.23;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
+ long_channel_leakage_reduction[1] = 1 / 1.93;
+ I_off_n[1][0] = 2.06e-11;
+ I_off_n[1][10] = 3.30e-11;
+ I_off_n[1][20] = 5.15e-11;
+ I_off_n[1][30] = 7.83e-11;
+ I_off_n[1][40] = 1.16e-10;
+ I_off_n[1][50] = 1.69e-10;
+ I_off_n[1][60] = 2.40e-10;
+ I_off_n[1][70] = 3.34e-10;
+ I_off_n[1][80] = 4.54e-10;
+ I_off_n[1][90] = 5.96e-10;
+ I_off_n[1][100] = 7.44e-10;
+
+ I_g_on_n[1][0] = 3.73e-11;//A/micron
+ I_g_on_n[1][10] = 3.73e-11;
+ I_g_on_n[1][20] = 3.73e-11;
+ I_g_on_n[1][30] = 3.73e-11;
+ I_g_on_n[1][40] = 3.73e-11;
+ I_g_on_n[1][50] = 3.73e-11;
+ I_g_on_n[1][60] = 3.73e-11;
+ I_g_on_n[1][70] = 3.73e-11;
+ I_g_on_n[1][80] = 3.73e-11;
+ I_g_on_n[1][90] = 3.73e-11;
+ I_g_on_n[1][100] = 3.73e-11;
+
+ //LOP device type
+ vdd[2] = 0.6;
+ Lphy[2] = 0.016;
+ Lelec[2] = 0.01232;
+ t_ox[2] = 0.9e-3;
+ v_th[2] = 0.24227;
+ c_ox[2] = 2.84e-14;
+ mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[2] = 4.64e-2;
+ c_g_ideal[2] = 4.54e-16;
+ c_fringe[2] = 0.057e-15;
+ c_junc[2] = 1e-15;
+ I_on_n[2] = 827.8e-6;
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.73;
+ n_to_p_eff_curr_drv_ratio[2] = 2.28;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
+ long_channel_leakage_reduction[2] = 1 / 1.89;
+ I_off_n[2][0] = 5.94e-8;
+ I_off_n[2][10] = 7.23e-8;
+ I_off_n[2][20] = 8.7e-8;
+ I_off_n[2][30] = 1.04e-7;
+ I_off_n[2][40] = 1.22e-7;
+ I_off_n[2][50] = 1.43e-7;
+ I_off_n[2][60] = 1.65e-7;
+ I_off_n[2][70] = 1.90e-7;
+ I_off_n[2][80] = 2.15e-7;
+ I_off_n[2][90] = 2.39e-7;
+ I_off_n[2][100] = 2.63e-7;
+
+ I_g_on_n[2][0] = 2.93e-9;//A/micron
+ I_g_on_n[2][10] = 2.93e-9;
+ I_g_on_n[2][20] = 2.93e-9;
+ I_g_on_n[2][30] = 2.93e-9;
+ I_g_on_n[2][40] = 2.93e-9;
+ I_g_on_n[2][50] = 2.93e-9;
+ I_g_on_n[2][60] = 2.93e-9;
+ I_g_on_n[2][70] = 2.93e-9;
+ I_g_on_n[2][80] = 2.93e-9;
+ I_g_on_n[2][90] = 2.93e-9;
+ I_g_on_n[2][100] = 2.93e-9;
+
+ if (ram_cell_tech_type == lp_dram) {
+ //LP-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.0;
+ Lphy[3] = 0.056;
+ Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 0.44129;
+ width_dram_access_transistor = 0.056;
+ curr_I_on_dram_cell = 36e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
+ curr_asp_ratio_cell_dram = 1.46;
+ curr_c_dram_cell = 20e-15;
+
+ //LP-DRAM wordline transistor parameters
+ curr_vpp = 1.5;
+ t_ox[3] = 2e-3;
+ v_th[3] = 0.44467;
+ c_ox[3] = 1.48e-14;
+ mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.174;
+ c_g_ideal[3] = 7.45e-16;
+ c_fringe[3] = 0.053e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1055.4e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.65;
+ n_to_p_eff_curr_drv_ratio[3] = 2.05;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 3.57e-11;
+ I_off_n[3][10] = 5.51e-11;
+ I_off_n[3][20] = 8.27e-11;
+ I_off_n[3][30] = 1.21e-10;
+ I_off_n[3][40] = 1.74e-10;
+ I_off_n[3][50] = 2.45e-10;
+ I_off_n[3][60] = 3.38e-10;
+ I_off_n[3][70] = 4.53e-10;
+ I_off_n[3][80] = 5.87e-10;
+ I_off_n[3][90] = 7.29e-10;
+ I_off_n[3][100] = 8.87e-10;
+ } else if (ram_cell_tech_type == comm_dram) {
+ //COMM-DRAM cell access transistor technology parameters
+ curr_vdd_dram_cell = 1.0;
+ Lphy[3] = 0.032;
+ Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
+ curr_v_th_dram_access_transistor = 1;
+ width_dram_access_transistor = 0.032;
+ curr_I_on_dram_cell = 20e-6;
+ curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
+ curr_Wmemcella_dram = width_dram_access_transistor;
+ curr_Wmemcellpmos_dram = 0;
+ curr_Wmemcellnmos_dram = 0;
+ curr_area_cell_dram = 6 * 0.032 * 0.032;
+ curr_asp_ratio_cell_dram = 1.5;
+ curr_c_dram_cell = 30e-15;
+
+ //COMM-DRAM wordline transistor parameters
+ curr_vpp = 2.6;
+ t_ox[3] = 4e-3;
+ v_th[3] = 1.0;
+ c_ox[3] = 7.99e-15;
+ mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
+ Vdsat[3] = 0.129;
+ c_g_ideal[3] = 2.56e-16;
+ c_fringe[3] = 0.053e-15;
+ c_junc[3] = 1e-15;
+ I_on_n[3] = 1024.5e-6;
+ I_on_p[3] = I_on_n[3] / 2;
+ nmos_effective_resistance_multiplier = 1.69;
+ n_to_p_eff_curr_drv_ratio[3] = 1.95;
+ gmp_to_gmn_multiplier[3] = 0.90;
+ Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
+ Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
+ long_channel_leakage_reduction[3] = 1;
+ I_off_n[3][0] = 3.63e-14;
+ I_off_n[3][10] = 7.18e-14;
+ I_off_n[3][20] = 1.36e-13;
+ I_off_n[3][30] = 2.49e-13;
+ I_off_n[3][40] = 4.41e-13;
+ I_off_n[3][50] = 7.55e-13;
+ I_off_n[3][60] = 1.26e-12;
+ I_off_n[3][70] = 2.03e-12;
+ I_off_n[3][80] = 3.19e-12;
+ I_off_n[3][90] = 4.87e-12;
+ I_off_n[3][100] = 7.16e-12;
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7;
+ curr_core_tx_density = 1.25 / 0.7;
+ curr_sckt_co_eff = 1.1111;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
+ }
- if(tech == 22){
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
- //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
- //22 nm HP
- vdd[0] = 0.8;
- Lphy[0] = 0.009;//Lphy is the physical gate-length.
- Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
- t_ox[0] = 0.55e-3;//micron
- v_th[0] = 0.1395;//V
- c_ox[0] = 3.63e-14;//F/micron2
- mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 2.33e-2; //V/micron
- c_g_ideal[0] = 3.27e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron
- c_junc[0] = 0;//F/micron2
- I_on_n[0] = 2626.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.45;
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.274;
- I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there.
- I_off_n[0][10] = 1.55e-7/1.5*1.2;
- I_off_n[0][20] = 1.59e-7/1.5*1.2;
- I_off_n[0][30] = 1.68e-7/1.5*1.2;
- I_off_n[0][40] = 1.90e-7/1.5*1.2;
- I_off_n[0][50] = 2.69e-7/1.5*1.2;
- I_off_n[0][60] = 5.32e-7/1.5*1.2;
- I_off_n[0][70] = 1.02e-6/1.5*1.2;
- I_off_n[0][80] = 1.62e-6/1.5*1.2;
- I_off_n[0][90] = 2.73e-6/1.5*1.2;
- I_off_n[0][100] = 6.1e-6/1.5*1.2;
- //for 22nm DG HP
- I_g_on_n[0][0] = 1.81e-9;//A/micron
- I_g_on_n[0][10] = 1.81e-9;
- I_g_on_n[0][20] = 1.81e-9;
- I_g_on_n[0][30] = 1.81e-9;
- I_g_on_n[0][40] = 1.81e-9;
- I_g_on_n[0][50] = 1.81e-9;
- I_g_on_n[0][60] = 1.81e-9;
- I_g_on_n[0][70] = 1.81e-9;
- I_g_on_n[0][80] = 1.81e-9;
- I_g_on_n[0][90] = 1.81e-9;
- I_g_on_n[0][100] = 1.81e-9;
-
- //22 nm LSTP DG
- vdd[1] = 0.8;
- Lphy[1] = 0.014;
- Lelec[1] = 0.008;//Lelec is the electrical gate-length.
- t_ox[1] = 1.1e-3;//micron
- v_th[1] = 0.40126;//V
- c_ox[1] = 2.30e-14;//F/micron2
- mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[1] = 6.64e-2; //V/micron
- c_g_ideal[1] = 3.22e-16;//F/micron
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 0;//F/micron2
- I_on_n[1] = 727.6e-6;//A/micron
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
- long_channel_leakage_reduction[1] = 1/1.89;
- I_off_n[1][0] = 2.43e-11;
- I_off_n[1][10] = 4.85e-11;
- I_off_n[1][20] = 9.68e-11;
- I_off_n[1][30] = 1.94e-10;
- I_off_n[1][40] = 3.87e-10;
- I_off_n[1][50] = 7.73e-10;
- I_off_n[1][60] = 3.55e-10;
- I_off_n[1][70] = 3.09e-9;
- I_off_n[1][80] = 6.19e-9;
- I_off_n[1][90] = 1.24e-8;
- I_off_n[1][100]= 2.48e-8;
-
- I_g_on_n[1][0] = 4.51e-10;//A/micron
- I_g_on_n[1][10] = 4.51e-10;
- I_g_on_n[1][20] = 4.51e-10;
- I_g_on_n[1][30] = 4.51e-10;
- I_g_on_n[1][40] = 4.51e-10;
- I_g_on_n[1][50] = 4.51e-10;
- I_g_on_n[1][60] = 4.51e-10;
- I_g_on_n[1][70] = 4.51e-10;
- I_g_on_n[1][80] = 4.51e-10;
- I_g_on_n[1][90] = 4.51e-10;
- I_g_on_n[1][100] = 4.51e-10;
-
- //22 nm LOP
- vdd[2] = 0.6;
- Lphy[2] = 0.011;
- Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
- t_ox[2] = 0.8e-3;//micron
- v_th[2] = 0.2315;//V
- c_ox[2] = 2.87e-14;//F/micron2
- mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[2] = 1.81e-2; //V/micron
- c_g_ideal[2] = 3.16e-16;//F/micron
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
- I_on_n[2] = 916.1e-6;//A/micron
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
- long_channel_leakage_reduction[2] = 1/2.38;
-
- I_off_n[2][0] = 1.31e-8;
- I_off_n[2][10] = 2.60e-8;
- I_off_n[2][20] = 5.14e-8;
- I_off_n[2][30] = 1.02e-7;
- I_off_n[2][40] = 2.02e-7;
- I_off_n[2][50] = 3.99e-7;
- I_off_n[2][60] = 7.91e-7;
- I_off_n[2][70] = 1.09e-6;
- I_off_n[2][80] = 2.09e-6;
- I_off_n[2][90] = 4.04e-6;
- I_off_n[2][100]= 4.48e-6;
-
- I_g_on_n[2][0] = 2.74e-9;//A/micron
- I_g_on_n[2][10] = 2.74e-9;
- I_g_on_n[2][20] = 2.74e-9;
- I_g_on_n[2][30] = 2.74e-9;
- I_g_on_n[2][40] = 2.74e-9;
- I_g_on_n[2][50] = 2.74e-9;
- I_g_on_n[2][60] = 2.74e-9;
- I_g_on_n[2][70] = 2.74e-9;
- I_g_on_n[2][80] = 2.74e-9;
- I_g_on_n[2][90] = 2.74e-9;
- I_g_on_n[2][100] = 2.74e-9;
-
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
+ if (tech == 22) {
+ SENSE_AMP_D = .03e-9; // s
+ SENSE_AMP_P = 2.16e-15; // J
+ //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
+ //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
+ //22 nm HP
+ vdd[0] = 0.8;
+ Lphy[0] = 0.009;//Lphy is the physical gate-length.
+ Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
+ t_ox[0] = 0.55e-3;//micron
+ v_th[0] = 0.1395;//V
+ c_ox[0] = 3.63e-14;//F/micron2
+ mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 2.33e-2; //V/micron
+ c_g_ideal[0] = 3.27e-16;//F/micron
+ c_fringe[0] = 0.06e-15;//F/micron
+ c_junc[0] = 0;//F/micron2
+ I_on_n[0] = 2626.4e-6;//A/micron
+ I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.45;
+ n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
+ //"Dynamic" tab of Device workspace.
+ gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1 / 3.274;
+ //From 22nm, leakage current are directly from ITRS report rather
+ //than MASTAR, since MASTAR has serious bugs there.
+ I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2;
+ I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2;
+ I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2;
+ I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2;
+ I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2;
+ I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2;
+ I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2;
+ I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2;
+ I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2;
+ I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2;
+ I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2;
+ //for 22nm DG HP
+ I_g_on_n[0][0] = 1.81e-9;//A/micron
+ I_g_on_n[0][10] = 1.81e-9;
+ I_g_on_n[0][20] = 1.81e-9;
+ I_g_on_n[0][30] = 1.81e-9;
+ I_g_on_n[0][40] = 1.81e-9;
+ I_g_on_n[0][50] = 1.81e-9;
+ I_g_on_n[0][60] = 1.81e-9;
+ I_g_on_n[0][70] = 1.81e-9;
+ I_g_on_n[0][80] = 1.81e-9;
+ I_g_on_n[0][90] = 1.81e-9;
+ I_g_on_n[0][100] = 1.81e-9;
+
+ //22 nm LSTP DG
+ vdd[1] = 0.8;
+ Lphy[1] = 0.014;
+ Lelec[1] = 0.008;//Lelec is the electrical gate-length.
+ t_ox[1] = 1.1e-3;//micron
+ v_th[1] = 0.40126;//V
+ c_ox[1] = 2.30e-14;//F/micron2
+ mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[1] = 6.64e-2; //V/micron
+ c_g_ideal[1] = 3.22e-16;//F/micron
+ c_fringe[1] = 0.08e-15;
+ c_junc[1] = 0;//F/micron2
+ I_on_n[1] = 727.6e-6;//A/micron
+ I_on_p[1] = I_on_n[1] / 2;
+ nmos_effective_resistance_multiplier = 1.99;
+ n_to_p_eff_curr_drv_ratio[1] = 2;
+ gmp_to_gmn_multiplier[1] = 0.99;
+ Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
+ Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
+ long_channel_leakage_reduction[1] = 1 / 1.89;
+ I_off_n[1][0] = 2.43e-11;
+ I_off_n[1][10] = 4.85e-11;
+ I_off_n[1][20] = 9.68e-11;
+ I_off_n[1][30] = 1.94e-10;
+ I_off_n[1][40] = 3.87e-10;
+ I_off_n[1][50] = 7.73e-10;
+ I_off_n[1][60] = 3.55e-10;
+ I_off_n[1][70] = 3.09e-9;
+ I_off_n[1][80] = 6.19e-9;
+ I_off_n[1][90] = 1.24e-8;
+ I_off_n[1][100] = 2.48e-8;
+
+ I_g_on_n[1][0] = 4.51e-10;//A/micron
+ I_g_on_n[1][10] = 4.51e-10;
+ I_g_on_n[1][20] = 4.51e-10;
+ I_g_on_n[1][30] = 4.51e-10;
+ I_g_on_n[1][40] = 4.51e-10;
+ I_g_on_n[1][50] = 4.51e-10;
+ I_g_on_n[1][60] = 4.51e-10;
+ I_g_on_n[1][70] = 4.51e-10;
+ I_g_on_n[1][80] = 4.51e-10;
+ I_g_on_n[1][90] = 4.51e-10;
+ I_g_on_n[1][100] = 4.51e-10;
+
+ //22 nm LOP
+ vdd[2] = 0.6;
+ Lphy[2] = 0.011;
+ Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
+ t_ox[2] = 0.8e-3;//micron
+ v_th[2] = 0.2315;//V
+ c_ox[2] = 2.87e-14;//F/micron2
+ mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[2] = 1.81e-2; //V/micron
+ c_g_ideal[2] = 3.16e-16;//F/micron
+ c_fringe[2] = 0.08e-15;
+ c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
+ I_on_n[2] = 916.1e-6;//A/micron
+ I_on_p[2] = I_on_n[2] / 2;
+ nmos_effective_resistance_multiplier = 1.73;
+ n_to_p_eff_curr_drv_ratio[2] = 2;
+ gmp_to_gmn_multiplier[2] = 1.11;
+ Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron
+ Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
+ long_channel_leakage_reduction[2] = 1 / 2.38;
+
+ I_off_n[2][0] = 1.31e-8;
+ I_off_n[2][10] = 2.60e-8;
+ I_off_n[2][20] = 5.14e-8;
+ I_off_n[2][30] = 1.02e-7;
+ I_off_n[2][40] = 2.02e-7;
+ I_off_n[2][50] = 3.99e-7;
+ I_off_n[2][60] = 7.91e-7;
+ I_off_n[2][70] = 1.09e-6;
+ I_off_n[2][80] = 2.09e-6;
+ I_off_n[2][90] = 4.04e-6;
+ I_off_n[2][100] = 4.48e-6;
+
+ I_g_on_n[2][0] = 2.74e-9;//A/micron
+ I_g_on_n[2][10] = 2.74e-9;
+ I_g_on_n[2][20] = 2.74e-9;
+ I_g_on_n[2][30] = 2.74e-9;
+ I_g_on_n[2][40] = 2.74e-9;
+ I_g_on_n[2][50] = 2.74e-9;
+ I_g_on_n[2][60] = 2.74e-9;
+ I_g_on_n[2][70] = 2.74e-9;
+ I_g_on_n[2][80] = 2.74e-9;
+ I_g_on_n[2][90] = 2.74e-9;
+ I_g_on_n[2][100] = 2.74e-9;
+
+
+
+ if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) {
+ //22 nm commodity DRAM cell access transistor technology parameters.
//parameters
curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
//2005 ITRS, the value was about twice the value in 2007 ITRS
@@ -1486,12 +1423,12 @@ void init_tech_params(double technology, bool is_tag)
curr_Wmemcella_dram = width_dram_access_transistor;
curr_Wmemcellpmos_dram = 0;
curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
+ curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2.
curr_asp_ratio_cell_dram = 0.667;
curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
//kept constant.
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
+ //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
curr_vpp = 2.3;//vpp. V
t_ox[3] = 3.5e-3;//micron
v_th[3] = 1.0;//V
@@ -1522,130 +1459,80 @@ void init_tech_params(double technology, bool is_tag)
I_off_n[3][90] = 1.18e-11;
I_off_n[3][100] = 1.72e-11;
- }
- else
- {
- //some error handler
+ } else {
+ //some error handler
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7;
+ curr_core_tx_density = 1.25 / 0.7 / 0.7;
+ curr_sckt_co_eff = 1.1296;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
}
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if(tech == 16){
- //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
- //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
- //16 nm HP
- vdd[0] = 0.7;
- Lphy[0] = 0.006;//Lphy is the physical gate-length.
- Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
- t_ox[0] = 0.5e-3;//micron
- v_th[0] = 0.1489;//V
- c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
- mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
- c_g_ideal[0] = 2.30e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
- c_junc[0] = 0;//F/micron2 MASTAR result dynamic
- I_on_n[0] = 2768.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/2.655;
- I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07;
- I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07;
- I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07;
- I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07;
- I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07;
- I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07;
- I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07;
- I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07;
- I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07;
- I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07;
- I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07;
- //for 16nm DG HP
- I_g_on_n[0][0] = 1.07e-9;//A/micron
- I_g_on_n[0][10] = 1.07e-9;
- I_g_on_n[0][20] = 1.07e-9;
- I_g_on_n[0][30] = 1.07e-9;
- I_g_on_n[0][40] = 1.07e-9;
- I_g_on_n[0][50] = 1.07e-9;
- I_g_on_n[0][60] = 1.07e-9;
- I_g_on_n[0][70] = 1.07e-9;
- I_g_on_n[0][80] = 1.07e-9;
- I_g_on_n[0][90] = 1.07e-9;
- I_g_on_n[0][100] = 1.07e-9;
-
-// //16 nm LSTP DG
-// vdd[1] = 0.8;
-// Lphy[1] = 0.014;
-// Lelec[1] = 0.008;//Lelec is the electrical gate-length.
-// t_ox[1] = 1.1e-3;//micron
-// v_th[1] = 0.40126;//V
-// c_ox[1] = 2.30e-14;//F/micron2
-// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
-// Vdsat[1] = 6.64e-2; //V/micron
-// c_g_ideal[1] = 3.22e-16;//F/micron
-// c_fringe[1] = 0.008e-15;
-// c_junc[1] = 0;//F/micron2
-// I_on_n[1] = 727.6e-6;//A/micron
-// I_on_p[1] = I_on_n[1] / 2;
-// nmos_effective_resistance_multiplier = 1.99;
-// n_to_p_eff_curr_drv_ratio[1] = 2;
-// gmp_to_gmn_multiplier[1] = 0.99;
-// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
-// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
-// I_off_n[1][0] = 2.43e-11;
-// I_off_n[1][10] = 4.85e-11;
-// I_off_n[1][20] = 9.68e-11;
-// I_off_n[1][30] = 1.94e-10;
-// I_off_n[1][40] = 3.87e-10;
-// I_off_n[1][50] = 7.73e-10;
-// I_off_n[1][60] = 3.55e-10;
-// I_off_n[1][70] = 3.09e-9;
-// I_off_n[1][80] = 6.19e-9;
-// I_off_n[1][90] = 1.24e-8;
-// I_off_n[1][100]= 2.48e-8;
-//
-// // for 22nm LSTP HP
-// I_g_on_n[1][0] = 4.51e-10;//A/micron
-// I_g_on_n[1][10] = 4.51e-10;
-// I_g_on_n[1][20] = 4.51e-10;
-// I_g_on_n[1][30] = 4.51e-10;
-// I_g_on_n[1][40] = 4.51e-10;
-// I_g_on_n[1][50] = 4.51e-10;
-// I_g_on_n[1][60] = 4.51e-10;
-// I_g_on_n[1][70] = 4.51e-10;
-// I_g_on_n[1][80] = 4.51e-10;
-// I_g_on_n[1][90] = 4.51e-10;
-// I_g_on_n[1][100] = 4.51e-10;
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
+ if (tech == 16) {
+ //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
+ //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
+ //16 nm HP
+ vdd[0] = 0.7;
+ Lphy[0] = 0.006;//Lphy is the physical gate-length.
+ Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
+ t_ox[0] = 0.5e-3;//micron
+ v_th[0] = 0.1489;//V
+ c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
+ mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
+ Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
+ c_g_ideal[0] = 2.30e-16;//F/micron
+ c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
+ c_junc[0] = 0;//F/micron2 MASTAR result dynamic
+ I_on_n[0] = 2768.4e-6;//A/micron
+ I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
+ nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
+ n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
+ //"Dynamic" tab of Device workspace.
+ gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
+ Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
+ Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
+ long_channel_leakage_reduction[0] = 1 / 2.655;
+ I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2 * 1.07;
+ I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2 * 1.07;
+ //for 16nm DG HP
+ I_g_on_n[0][0] = 1.07e-9;//A/micron
+ I_g_on_n[0][10] = 1.07e-9;
+ I_g_on_n[0][20] = 1.07e-9;
+ I_g_on_n[0][30] = 1.07e-9;
+ I_g_on_n[0][40] = 1.07e-9;
+ I_g_on_n[0][50] = 1.07e-9;
+ I_g_on_n[0][60] = 1.07e-9;
+ I_g_on_n[0][70] = 1.07e-9;
+ I_g_on_n[0][80] = 1.07e-9;
+ I_g_on_n[0][90] = 1.07e-9;
+ I_g_on_n[0][100] = 1.07e-9;
+
+ if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) {
+ //22 nm commodity DRAM cell access transistor technology parameters.
//parameters
curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
//2005 ITRS, the value was about twice the value in 2007 ITRS
@@ -1659,12 +1546,12 @@ void init_tech_params(double technology, bool is_tag)
curr_Wmemcella_dram = width_dram_access_transistor;
curr_Wmemcellpmos_dram = 0;
curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
+ curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2.
curr_asp_ratio_cell_dram = 0.667;
curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
//kept constant.
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
+ //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
curr_vpp = 2.3;//vpp. V
t_ox[3] = 3.5e-3;//micron
v_th[3] = 1.0;//V
@@ -1695,930 +1582,766 @@ void init_tech_params(double technology, bool is_tag)
I_off_n[3][90] = 1.18e-11;
I_off_n[3][100] = 1.72e-11;
- }
- else
- {
- //some error handler
+ } else {
+ //some error handler
+ }
+
+ //SRAM cell properties
+ curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_sram = 1.46;
+ //CAM cell properties //TODO: data need to be revisited
+ curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
+ curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
+ curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
+ curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
+ curr_asp_ratio_cell_cam = 2.92;
+ //Empirical undifferetiated core/FU coefficient
+ curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7 * 0.7;
+ curr_core_tx_density = 1.25 / 0.7 / 0.7 / 0.7;
+ curr_sckt_co_eff = 1.1296;
+ curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
+ curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
}
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
+ g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type];
+ g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
+ g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
+ g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
+ g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
+ g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
+ g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
+ g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
+ g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
+ g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
+ g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
+ g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
+ g_tp.peri_global.n_to_p_eff_curr_drv_ratio
+ += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
+ g_tp.peri_global.long_channel_leakage_reduction
+ += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
+ g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
+ g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
+ gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
+
+ g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
+ g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
+ g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
+ g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
+ g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
+ g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
+ g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
+ g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
+ g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
+ g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
+ g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
+ g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
+ g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
+ g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+
+ g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
+ g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
+ g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
+ g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
+ g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
+ g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
+ g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
+ g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
+ g_tp.vpp += curr_alpha * curr_vpp;
+ g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
+ g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
+ g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
+ g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
+ g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
+ g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
+ g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
+ g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
+ g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
+
+ g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
+ g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
+ g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
+ g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
+ g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
+ g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
+ g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
+ g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
+ g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
+ g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
+ g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
+ g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
+ g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
+ g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
+ g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+ g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
+
+ g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
+ g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
+ g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
+ area_cell_dram += curr_alpha * curr_area_cell_dram;
+ asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
+
+ g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
+ g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
+ g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
+ area_cell_sram += curr_alpha * curr_area_cell_sram;
+ asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
+
+ g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
+ g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
+ g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
+ area_cell_cam += curr_alpha * curr_area_cell_cam;
+ asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
+
+ //Sense amplifier latch Gm calculation
+ mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
+ Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
- g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type];
- g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
- g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
- g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
- g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
- g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
- g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
- g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
- g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
- g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
- g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
- g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
- g_tp.peri_global.n_to_p_eff_curr_drv_ratio
- += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
- g_tp.peri_global.long_channel_leakage_reduction
- += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
- g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
-
- g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
- g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
- g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
- g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
- g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
- g_tp.vpp += curr_alpha * curr_vpp;
- g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
- g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
- g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
- g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
-
- g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
- g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
- g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
- area_cell_dram += curr_alpha * curr_area_cell_dram;
- asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
-
- g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
- g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
- g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
- area_cell_sram += curr_alpha * curr_area_cell_sram;
- asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
-
- g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
- g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
- g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
- area_cell_cam += curr_alpha * curr_area_cell_cam;
- asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
-
- //Sense amplifier latch Gm calculation
- mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
- Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
-
- //Empirical undifferetiated core/FU coefficient
- g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
- g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
- g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
- g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
- g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
- }
-
-
- //Currently we are not modeling the resistance/capacitance of poly anywhere.
- //Continuous function (or date have been processed) does not need linear interpolation
- g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
- g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
- g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
- g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
- g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
- g_tp.cell_h_def = 50 * g_ip->F_sz_um;
- g_tp.w_poly_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
- g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
-
- g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
- g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
- g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process
- g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process
- g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process
- g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process
- g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
- g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_;
-
- if (ram_cell_tech_type == comm_dram)
- {
- g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
- g_tp.h_dec = 8; // in the unit of memory cell height
- }
- else
- {
- g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
- g_tp.h_dec = 4; // in the unit of memory cell height
- }
-
- g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
- g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
- g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
-
- g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
- g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
- //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
-
- g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
-
- double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
- double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
- g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch;
-
- g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
- g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
- g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
- g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
- g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
- g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
-
- g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
- g_tp.sram.Vbitpre = vdd[ram_cell_tech_type];
- g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng
- pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
- g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
-
-
- double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
-
- for (iter=0; iter<=1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
+ //Empirical undifferetiated core/FU coefficient
+ g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
+ g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
+ g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
+ g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
+ g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
}
- if (tech == 180)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.0;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.017;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.75;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.75;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.2;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 1.5;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0]= 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.017;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.75;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.75;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.98;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.18;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
- wire_r_per_micron[1][3] = 12 / 0.18;
- }
- else if (tech == 90)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.4;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.01;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.48;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.48;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.7;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.96;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.008;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.48;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.48;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.1;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.09;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
- wire_r_per_micron[1][3] = 12 / 0.09;
- }
- else if (tech == 65)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 2.7;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.405;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.303;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.7;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.405;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.303;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.8;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.81;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.303;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.006;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.405;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.734;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.405;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.734;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.77;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.734;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.065;
- wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
- wire_r_per_micron[1][3] = 12 / 0.065;
- }
- else if (tech == 45)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.315;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.958;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.315;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.958;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.63;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.958;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.004;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.315;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.46;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.315;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.46;
- vert_dielectric_constant[1][1] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.55;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.46;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.045;
- wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
- wire_r_per_micron[1][3] = 12 / 0.045;
- }
- else if (tech == 32)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.21;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.664;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.21;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.664;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.42;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.664;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.21;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.214;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- aspect_ratio[1][1] = 2.0;
- wire_width = wire_pitch[1][1] / 2;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.21;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.214;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.385;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.214;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.032;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
+
+ //Currently we are not modeling the resistance/capacitance of poly anywhere.
+ //Continuous function (or date have been processed) does not need linear interpolation
+ g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
+ g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
+ g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
+ g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
+ g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
+ g_tp.cell_h_def = 50 * g_ip->F_sz_um;
+ g_tp.w_poly_contact = g_ip->F_sz_um;
+ g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
+ g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
+ g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
+
+ g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
+ g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
+ //was 10 micron for the 0.8 micron process
+ g_tp.w_iso = 12.5 * g_ip->F_sz_um;
+ // sense amplifier N-trans; was 3 micron for the 0.8 micron process
+ g_tp.w_sense_n = 3.75 * g_ip->F_sz_um;
+ // sense amplifier P-trans; was 6 micron for the 0.8 micron process
+ g_tp.w_sense_p = 7.5 * g_ip->F_sz_um;
+ // Sense enable transistor of the sense amplifier; was 4 micron for the
+ //0.8 micron process
+ g_tp.w_sense_en = 5 * g_ip->F_sz_um;
+ g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
+ g_tp.w_nmos_sa_mux= 6 * g_tp.min_w_nmos_;
+
+ if (ram_cell_tech_type == comm_dram) {
+ g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
+ g_tp.h_dec = 8; // in the unit of memory cell height
+ } else {
+ g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
+ g_tp.h_dec = 4; // in the unit of memory cell height
}
- else if (tech == 22)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.15;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.414;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.15;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.414;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.3;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.414;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.15;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.104;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.15;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.104;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
+
+ g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
+ g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
+ g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
+
+ g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
+ g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
+ //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
+
+ g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
+
+ double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
+ double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
+ g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch;
+
+ g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
+ g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
+ g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
+ g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
+ g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
+ g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
+
+ g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
+ g_tp.sram.Vbitpre = vdd[ram_cell_tech_type];
+ g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng
+ pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
+ g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
+
+
+ double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
+ ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
+
+ for (iter = 0; iter <= 1; ++iter) {
+ // linear interpolation
+ if (iter == 0) {
+ tech = tech_lo;
+ if (tech_lo == tech_hi) {
+ curr_alpha = 1;
+ } else {
+ curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi);
+ }
+ } else {
+ tech = tech_hi;
+ if (tech_lo == tech_hi) {
+ break;
+ } else {
+ curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi);
+ }
+ }
+
+ if (tech == 180) {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
+ aspect_ratio[0][0] = 2.0;
+ wire_width = wire_pitch[0][0] / 2; //micron
+ wire_thickness = aspect_ratio[0][0] * wire_width;//micron
+ wire_spacing = wire_pitch[0][0] - wire_width;//micron
+ barrier_thickness = 0.017;//micron
+ dishing_thickness = 0;//micron
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
+ ild_thickness[0][0] = 0.75;//micron
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.709;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
+ vert_dielectric_constant[0][0],
+ fringe_cap);//F/micron.
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.4;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.75;//micron
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.709;
+ vert_dielectric_constant[0][1] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.2;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 1.5;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.709;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.017;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.75;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 3.038;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
+ vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.75;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 3.038;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
+ vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 1.98;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 3.038;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.18;
+ wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
+ wire_r_per_micron[1][3] = 12 / 0.18;
+ } else if (tech == 90) {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
+ aspect_ratio[0][0] = 2.4;
+ wire_width = wire_pitch[0][0] / 2; //micron
+ wire_thickness = aspect_ratio[0][0] * wire_width;//micron
+ wire_spacing = wire_pitch[0][0] - wire_width;//micron
+ barrier_thickness = 0.01;//micron
+ dishing_thickness = 0;//micron
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
+ ild_thickness[0][0] = 0.48;//micron
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.709;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15; //F/micron
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
+ vert_dielectric_constant[0][0],
+ fringe_cap);//F/micron.
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.4;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.48;//micron
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.709;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.7;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.96;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.709;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.008;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.48;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 3.038;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
+ vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.48;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 3.038;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
+ vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 1.1;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 3.038;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.09;
+ wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
+ wire_r_per_micron[1][3] = 12 / 0.09;
+ } else if (tech == 65) {
+ //Aggressive projections
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 2.7;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.405;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 2.303;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 2.7;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.405;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 2.303;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
+ vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 2.8;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.81;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 2.303;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.006;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.405;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.734;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.405;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.734;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.77;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.734;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.065;
+ wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
+ wire_r_per_micron[1][3] = 12 / 0.065;
+ } else if (tech == 45) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.315;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.958;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.315;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.958;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.63;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.958;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.004;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.315;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.46;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.315;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.46;
+ vert_dielectric_constant[1][1] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.55;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.46;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.045;
+ wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
+ wire_r_per_micron[1][3] = 12 / 0.045;
+ } else if (tech == 32) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.21;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.664;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.21;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.664;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.42;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.664;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.003;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.21;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.214;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ aspect_ratio[1][1] = 2.0;
+ wire_width = wire_pitch[1][1] / 2;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.21;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.214;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
wire_pitch[1][2] = 8 * g_ip->F_sz_um;
aspect_ratio[1][2] = 2.2;
@@ -2627,184 +2350,210 @@ void init_tech_params(double technology, bool is_tag)
wire_spacing = wire_pitch[1][2] - wire_width;
dishing_thickness = 0.1 * wire_thickness;
wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][2] = 0.385;
+ miller_value[1][2] = 1.5;
+ horiz_dielectric_constant[1][2] = 2.214;
+ vert_dielectric_constant[1][2] = 3.9;
+ wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
+ //Nominal projections for commodity DRAM wordline/bitline
+ wire_pitch[1][3] = 2 * 0.032;//micron
+ wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
+ wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
+ } else if (tech == 22) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.15;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.414;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
+ wire_width = wire_pitch[0][1] / 2;
+ aspect_ratio[0][1] = 3.0;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.15;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.414;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.3;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.414;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.003;
+ dishing_thickness = 0;
+ alpha_scatter = 1.05;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.15;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 2.104;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.15;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 2.104;
+ vert_dielectric_constant[1][1] = 3.9;
+ wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
+
+ wire_pitch[1][2] = 8 * g_ip->F_sz_um;
+ aspect_ratio[1][2] = 2.2;
+ wire_width = wire_pitch[1][2] / 2;
+ wire_thickness = aspect_ratio[1][2] * wire_width;
+ wire_spacing = wire_pitch[1][2] - wire_width;
+ dishing_thickness = 0.1 * wire_thickness;
+ wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
ild_thickness[1][2] = 0.275;
miller_value[1][2] = 1.5;
horiz_dielectric_constant[1][2] = 2.104;
vert_dielectric_constant[1][2] = 3.9;
wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
//Nominal projections for commodity DRAM wordline/bitline
wire_pitch[1][3] = 2 * 0.022;//micron
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron
wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
}
- else if (tech == 16)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.108;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.202;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- aspect_ratio[0][1] = 3.0;
- wire_width = wire_pitch[0][1] / 2;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.108;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.202;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.216;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.202;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.002;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.108;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 1.998;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.108;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 1.998;
- vert_dielectric_constant[1][1] = 3.9;
+ else if (tech == 16) {
+ //Aggressive projections.
+ wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
+ aspect_ratio[0][0] = 3.0;
+ wire_width = wire_pitch[0][0] / 2;
+ wire_thickness = aspect_ratio[0][0] * wire_width;
+ wire_spacing = wire_pitch[0][0] - wire_width;
+ barrier_thickness = 0;
+ dishing_thickness = 0;
+ alpha_scatter = 1;
+ wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][0] = 0.108;
+ miller_value[0][0] = 1.5;
+ horiz_dielectric_constant[0][0] = 1.202;
+ vert_dielectric_constant[0][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
+ fringe_cap);
+
+ wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
+ aspect_ratio[0][1] = 3.0;
+ wire_width = wire_pitch[0][1] / 2;
+ wire_thickness = aspect_ratio[0][1] * wire_width;
+ wire_spacing = wire_pitch[0][1] - wire_width;
+ wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][1] = 0.108;
+ miller_value[0][1] = 1.5;
+ horiz_dielectric_constant[0][1] = 1.202;
+ vert_dielectric_constant[0][1] = 3.9;
+ wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
+ fringe_cap);
+
+ wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
+ aspect_ratio[0][2] = 3.0;
+ wire_width = wire_pitch[0][2] / 2;
+ wire_thickness = aspect_ratio[0][2] * wire_width;
+ wire_spacing = wire_pitch[0][2] - wire_width;
+ wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[0][2] = 0.216;
+ miller_value[0][2] = 1.5;
+ horiz_dielectric_constant[0][2] = 1.202;
+ vert_dielectric_constant[0][2] = 3.9;
+ wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
+ fringe_cap);
+
+ //Conservative projections
+ wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
+ aspect_ratio[1][0] = 2.0;
+ wire_width = wire_pitch[1][0] / 2;
+ wire_thickness = aspect_ratio[1][0] * wire_width;
+ wire_spacing = wire_pitch[1][0] - wire_width;
+ barrier_thickness = 0.002;
+ dishing_thickness = 0;
+ alpha_scatter = 1.05;
+ wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][0] = 0.108;
+ miller_value[1][0] = 1.5;
+ horiz_dielectric_constant[1][0] = 1.998;
+ vert_dielectric_constant[1][0] = 3.9;
+ fringe_cap = 0.115e-15;
+ wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
+ ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
+ fringe_cap);
+
+ wire_pitch[1][1] = 4 * g_ip->F_sz_um;
+ wire_width = wire_pitch[1][1] / 2;
+ aspect_ratio[1][1] = 2.0;
+ wire_thickness = aspect_ratio[1][1] * wire_width;
+ wire_spacing = wire_pitch[1][1] - wire_width;
+ wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ ild_thickness[1][1] = 0.108;
+ miller_value[1][1] = 1.5;
+ horiz_dielectric_constant[1][1] = 1.998;
+ vert_dielectric_constant[1][1] = 3.9;
wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
+ ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
+ fringe_cap);
wire_pitch[1][2] = 8 * g_ip->F_sz_um;
aspect_ratio[1][2] = 2.2;
@@ -2813,109 +2562,101 @@ void init_tech_params(double technology, bool is_tag)
wire_spacing = wire_pitch[1][2] - wire_width;
dishing_thickness = 0.1 * wire_thickness;
wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
+ wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
ild_thickness[1][2] = 0.198;
miller_value[1][2] = 1.5;
horiz_dielectric_constant[1][2] = 1.998;
vert_dielectric_constant[1][2] = 3.9;
wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
+ ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
+ fringe_cap);
//Nominal projections for commodity DRAM wordline/bitline
wire_pitch[1][3] = 2 * 0.016;//micron
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron
wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
}
- g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
-
- g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type];
-
- g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type];
-
- g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2;
-
- g_tp.sense_delay += curr_alpha *SENSE_AMP_D;
- g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P;
-// g_tp.horiz_dielectric_constant += horiz_dielectric_constant;
-// g_tp.vert_dielectric_constant += vert_dielectric_constant;
-// g_tp.aspect_ratio += aspect_ratio;
-// g_tp.miller_value += miller_value;
-// g_tp.ild_thickness += ild_thickness;
-
- }
- g_tp.fringe_cap = fringe_cap;
-
- double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
- double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
- double tf = rd * c_load;
- g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
- double KLOAD = 1;
- c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
- tf = rd * c_load;
- g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
+ g_tp.wire_local.pitch += curr_alpha *
+ wire_pitch[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.R_per_um += curr_alpha *
+ wire_r_per_micron[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.C_per_um += curr_alpha *
+ wire_c_per_micron[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.aspect_ratio += curr_alpha *
+ aspect_ratio[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.ild_thickness += curr_alpha *
+ ild_thickness[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.miller_value += curr_alpha *
+ miller_value[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.horiz_dielectric_constant += curr_alpha *
+ horiz_dielectric_constant[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+ g_tp.wire_local.vert_dielectric_constant += curr_alpha *
+ vert_dielectric_constant[g_ip->ic_proj_type]
+ [(ram_cell_tech_type == comm_dram) ? 3 : 0];
+
+ g_tp.wire_inside_mat.pitch += curr_alpha *
+ wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.R_per_um += curr_alpha *
+ wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.C_per_um += curr_alpha *
+ wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.aspect_ratio += curr_alpha *
+ aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.ild_thickness += curr_alpha *
+ ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.miller_value += curr_alpha *
+ miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha *
+ horiz_dielectric_constant[g_ip->ic_proj_type]
+ [g_ip->wire_is_mat_type];
+ g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha *
+ vert_dielectric_constant [g_ip->ic_proj_type]
+ [g_ip->wire_is_mat_type];
+
+ g_tp.wire_outside_mat.pitch += curr_alpha *
+ wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.R_per_um += curr_alpha *
+ wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.C_per_um += curr_alpha *
+ wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.aspect_ratio += curr_alpha *
+ aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.ild_thickness += curr_alpha *
+ ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.miller_value += curr_alpha *
+ miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha *
+ horiz_dielectric_constant[g_ip->ic_proj_type]
+ [g_ip->wire_os_mat_type];
+ g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha *
+ vert_dielectric_constant [g_ip->ic_proj_type]
+ [g_ip->wire_os_mat_type];
+
+ g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um *
+ g_tp.wire_inside_mat.C_per_um / 2;
+
+ g_tp.sense_delay += curr_alpha * SENSE_AMP_D;
+ g_tp.sense_dy_power += curr_alpha * SENSE_AMP_P;
+
+ }
+ g_tp.fringe_cap = fringe_cap;
+
+ double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
+ double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
+ double tf = rd * c_load;
+ g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
+ double KLOAD = 1;
+ c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
+ tf = rd * c_load;
+ g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
}
diff --git a/ext/mcpat/cacti/uca.cc b/ext/mcpat/cacti/uca.cc
index 568cd9e44..703ad470f 100755
--- a/ext/mcpat/cacti/uca.cc
+++ b/ext/mcpat/cacti/uca.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -37,390 +38,390 @@
#include "uca.h"
UCA::UCA(const DynamicParameter & dyn_p)
- :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
-{
- int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
- int num_banks_hor_dir = nbanks/num_banks_ver_dir;
-
- if (dp.use_inp_params)
- {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else
- {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
- }
-
- num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
- num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
- num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
- num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
- num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
-
- if (!dp.fully_assoc && !dp.pure_cam)
- {
-
- if (g_ip->fast_access && dp.is_tag == false)
- {
- num_do_b_bank *= g_ip->data_assoc;
- }
-
- htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
- htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- }
-
- else
- {
-
- htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
- htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- }
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
-
- area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
+ : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) {
+ int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)
+ / 2 : (_log2(nbanks) - _log2(nbanks) / 2));
+ int num_banks_hor_dir = nbanks / num_banks_ver_dir;
+
+ if (dp.use_inp_params) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+ }
+
+ num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) *
+ (RWP + ERP + EWP);
+ num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
+ num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
+ num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
+ num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
+
+ if (!dp.fully_assoc && !dp.pure_cam) {
+
+ if (g_ip->fast_access && dp.is_tag == false) {
+ num_do_b_bank *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0,
+ num_do_b_bank, 0, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Add_htree, true);
+ htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0,
+ num_do_b_bank, 0, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_in_htree, true);
+ htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank, 0,
+ num_do_b_bank, 0, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_out_htree, true);
+ }
+
+ else {
+
+ htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank, num_so_b_bank,
+ num_banks_ver_dir * 2, num_banks_hor_dir * 2,
+ Add_htree, true);
+ htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank, num_so_b_bank,
+ num_banks_ver_dir * 2, num_banks_hor_dir * 2,
+ Data_in_htree, true);
+ htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank,
+ num_so_b_bank, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_out_htree, true);
+ htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank,
+ num_so_b_bank, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_in_htree, true);
+ htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
+ num_addr_b_bank, num_di_b_bank,
+ num_si_b_bank, num_do_b_bank,
+ num_so_b_bank, num_banks_ver_dir * 2,
+ num_banks_hor_dir * 2, Data_out_htree,
+ true);
+ }
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+
+ area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
// cout<<"area cell"<<area_all_dataramcells<<endl;
// cout<<area.get_area()<<endl;
- // delay calculation
- double inrisetime = 0.0;
- compute_delays(inrisetime);
- compute_power_energy();
+ // delay calculation
+ double inrisetime = 0.0;
+ compute_delays(inrisetime);
+ compute_power_energy();
}
-UCA::~UCA()
-{
- delete htree_in_add;
- delete htree_in_data;
- delete htree_out_data;
+UCA::~UCA() {
+ delete htree_in_add;
+ delete htree_in_data;
+ delete htree_out_data;
}
-double UCA::compute_delays(double inrisetime)
-{
- double outrisetime = bank.compute_delays(inrisetime);
-
- double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
- double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
- delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
- bank.mat.sa_mux_lev_1_predec->delay +
- bank.mat.sa_mux_lev_1_dec->delay;
- delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
- bank.mat.sa_mux_lev_2_predec->delay +
- bank.mat.sa_mux_lev_2_dec->delay;
- double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
-
- delay_before_subarray_output_driver =
- MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
- delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
- MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
- delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
- delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
- bank.htree_out_data->delay + htree_out_data->delay;
- access_time = bank.mat.delay_comparator;
-
- double ram_delay_inside_mat;
- if (dp.fully_assoc)
- {
- //delay of FA contains both CAM tag and RAM data
- { //delay of CAM
- ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
- access_time = htree_in_add->delay + bank.htree_in_add->delay;
- //delay of fully-associative data array
- access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
+double UCA::compute_delays(double inrisetime) {
+ double outrisetime = bank.compute_delays(inrisetime);
+
+ double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
+ double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
+ delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
+ bank.mat.sa_mux_lev_1_predec->delay +
+ bank.mat.sa_mux_lev_1_dec->delay;
+ delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
+ bank.mat.sa_mux_lev_2_predec->delay +
+ bank.mat.sa_mux_lev_2_dec->delay;
+ double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
+
+ delay_before_subarray_output_driver =
+ MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
+ delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
+ MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
+ delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
+ delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
+ bank.htree_out_data->delay + htree_out_data->delay;
+ access_time = bank.mat.delay_comparator;
+
+ double ram_delay_inside_mat;
+ if (dp.fully_assoc) {
+ //delay of FA contains both CAM tag and RAM data
+ { //delay of CAM
+ ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
+ access_time = htree_in_add->delay + bank.htree_in_add->delay;
+ //delay of fully-associative data array
+ access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
+ }
+ } else {
+ access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
}
- }
- else
- {
- access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
- }
-
- if (dp.is_main_mem)
- {
- double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
- double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
- delay_from_subarray_out_drv_to_out;
- access_time = t_rcd + cas_latency;
- }
-
- double temp;
-
- if (!dp.fully_assoc)
- {
- temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
- if (dp.is_dram)
- {
- temp += bank.mat.delay_writeback; // temp stores random cycle time
+
+ if (dp.is_main_mem) {
+ double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
+ double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
+ delay_from_subarray_out_drv_to_out;
+ access_time = t_rcd + cas_latency;
+ }
+
+ double temp;
+
+ if (!dp.fully_assoc) {
+ temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
+ if (dp.is_dram) {
+ temp += bank.mat.delay_writeback; // temp stores random cycle time
+ }
+
+
+ temp = MAX(temp, bank.mat.r_predec->delay);
+ temp = MAX(temp, bank.mat.b_mux_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
+ } else {
+ ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
+ temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
+ + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
+
+ temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
+ temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
+ temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
+ }
+
+ // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
+ if (g_ip->rpters_in_htree == false) {
+ temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
+ }
+ cycle_time = temp;
+
+ double delay_req_network = max_delay_before_row_decoder;
+ double delay_rep_network = delay_from_subarray_out_drv_to_out;
+ multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
+
+ if (dp.is_main_mem) {
+ multisubbank_interleave_cycle_time = htree_in_add->delay;
+ precharge_delay = htree_in_add->delay +
+ bank.htree_in_add->delay + bank.mat.delay_writeback +
+ bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
+ cycle_time = access_time + precharge_delay;
+ } else {
+ precharge_delay = 0;
}
+ double dram_array_availability = 0;
+ if (dp.is_dram) {
+ dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
+ }
- temp = MAX(temp, bank.mat.r_predec->delay);
- temp = MAX(temp, bank.mat.b_mux_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
- }
- else
- {
- ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
- temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
- + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
-
- temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
- temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
- }
-
- // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
- if (g_ip->rpters_in_htree == false)
- {
- temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
- }
- cycle_time = temp;
-
- double delay_req_network = max_delay_before_row_decoder;
- double delay_rep_network = delay_from_subarray_out_drv_to_out;
- multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
-
- if (dp.is_main_mem)
- {
- multisubbank_interleave_cycle_time = htree_in_add->delay;
- precharge_delay = htree_in_add->delay +
- bank.htree_in_add->delay + bank.mat.delay_writeback +
- bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
- cycle_time = access_time + precharge_delay;
- }
- else
- {
- precharge_delay = 0;
- }
-
- double dram_array_availability = 0;
- if (dp.is_dram)
- {
- dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
- }
-
- return outrisetime;
+ return outrisetime;
}
// note: currently, power numbers are for a bank of an array
-void UCA::compute_power_energy()
-{
- bank.compute_power_energy();
- power = bank.power;
-
- power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
- power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
- if (dp.fully_assoc || dp.pure_cam)
- power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
-
- power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
- htree_in_data->power.readOp.leakage +
- htree_out_data->power.readOp.leakage;
-
- power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
- htree_in_data->power.readOp.gate_leakage +
- htree_out_data->power.readOp.gate_leakage;
- if (dp.fully_assoc || dp.pure_cam)
- {
+void UCA::compute_power_energy() {
+ bank.compute_power_energy();
+ power = bank.power;
+
+ power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
+ power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
+ if (dp.fully_assoc || dp.pure_cam)
+ power_routing_to_bank.searchOp.dynamic =
+ htree_in_search->power.searchOp.dynamic +
+ htree_out_search->power.searchOp.dynamic;
+
+ power_routing_to_bank.readOp.leakage +=
+ htree_in_add->power.readOp.leakage +
+ htree_in_data->power.readOp.leakage +
+ htree_out_data->power.readOp.leakage;
+
+ power_routing_to_bank.readOp.gate_leakage +=
+ htree_in_add->power.readOp.gate_leakage +
+ htree_in_data->power.readOp.gate_leakage +
+ htree_out_data->power.readOp.gate_leakage;
+ if (dp.fully_assoc || dp.pure_cam) {
power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
- }
-
- power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
- power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
- power.readOp.leakage += power_routing_to_bank.readOp.leakage;
- power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
-
- // calculate total write energy per access
- power.writeOp.dynamic = power.readOp.dynamic
- - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
- + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- - power_routing_to_bank.readOp.dynamic
- + power_routing_to_bank.writeOp.dynamic
- + bank.htree_in_data->power.readOp.dynamic
- - bank.htree_out_data->power.readOp.dynamic;
-
- if (dp.is_dram == false)
- {
- power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- }
-
- dyn_read_energy_from_closed_page = power.readOp.dynamic;
- dyn_read_energy_from_open_page = power.readOp.dynamic -
- (bank.mat.r_predec->power.readOp.dynamic +
- bank.mat.power_row_decoders.readOp.dynamic +
- bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
- bank.mat.power_sa.readOp.dynamic +
- bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
-
- dyn_read_energy_remaining_words_in_burst =
- (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
- ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
- bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
- bank.htree_out_data->power.readOp.dynamic +
- power_routing_to_bank.readOp.dynamic);
- dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
- dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
-
- activate_energy = htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
- (bank.mat.r_predec->power.readOp.dynamic +
- bank.mat.power_row_decoders.readOp.dynamic +
- bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
- read_energy = (htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
- (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
- bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
- bank.htree_out_data->power.readOp.dynamic +
- htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
- write_energy = (htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
- htree_in_data->power.readOp.dynamic +
- bank.htree_in_data->power.readOp.dynamic +
- (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
- precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
- bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_closed_page =
- (bank.mat.r_predec->power.readOp.leakage +
- bank.mat.b_mux_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
- bank.mat.power_row_decoders.readOp.leakage +
- bank.mat.power_bit_mux_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
- bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_closed_page +=
- (bank.mat.r_predec->power.readOp.gate_leakage +
- bank.mat.b_mux_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
- bank.mat.power_row_decoders.readOp.gate_leakage +
- bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
- //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_open_page =
- (bank.mat.r_predec->power.readOp.leakage +
- bank.mat.b_mux_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
- bank.mat.power_row_decoders.readOp.leakage +
- bank.mat.power_bit_mux_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
- bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_open_page +=
- (bank.mat.r_predec->power.readOp.gate_leakage +
- bank.mat.b_mux_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
- bank.mat.power_row_decoders.readOp.gate_leakage +
- bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
- //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_request_and_reply_networks =
- power_routing_to_bank.readOp.leakage +
- bank.htree_in_add->power.readOp.leakage +
- bank.htree_in_data->power.readOp.leakage +
- bank.htree_out_data->power.readOp.leakage;
-
- leak_power_request_and_reply_networks +=
- power_routing_to_bank.readOp.gate_leakage +
- bank.htree_in_add->power.readOp.gate_leakage +
- bank.htree_in_data->power.readOp.gate_leakage +
- bank.htree_out_data->power.readOp.gate_leakage;
-
- if (dp.fully_assoc || dp.pure_cam)
- {
+ }
+
+ power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
+ power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
+ power.readOp.leakage += power_routing_to_bank.readOp.leakage;
+ power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
+
+ // calculate total write energy per access
+ power.writeOp.dynamic = power.readOp.dynamic
+ - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ - power_routing_to_bank.readOp.dynamic
+ + power_routing_to_bank.writeOp.dynamic
+ + bank.htree_in_data->power.readOp.dynamic
+ - bank.htree_out_data->power.readOp.dynamic;
+
+ if (dp.is_dram == false) {
+ power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ }
+
+ dyn_read_energy_from_closed_page = power.readOp.dynamic;
+ dyn_read_energy_from_open_page = power.readOp.dynamic -
+ (bank.mat.r_predec->power.readOp.dynamic +
+ bank.mat.power_row_decoders.readOp.dynamic +
+ bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
+ bank.mat.power_sa.readOp.dynamic +
+ bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
+
+ dyn_read_energy_remaining_words_in_burst =
+ (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
+ ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
+ bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
+ bank.htree_out_data->power.readOp.dynamic +
+ power_routing_to_bank.readOp.dynamic);
+ dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
+ dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
+
+ activate_energy = htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
+ (bank.mat.r_predec->power.readOp.dynamic +
+ bank.mat.power_row_decoders.readOp.dynamic +
+ bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
+ read_energy = (htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
+ (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
+ bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
+ bank.htree_out_data->power.readOp.dynamic +
+ htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
+ write_energy = (htree_in_add->power.readOp.dynamic +
+ bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
+ htree_in_data->power.readOp.dynamic +
+ bank.htree_in_data->power.readOp.dynamic +
+ (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
+ precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
+ bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_closed_page =
+ (bank.mat.r_predec->power.readOp.leakage +
+ bank.mat.b_mux_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
+ bank.mat.power_row_decoders.readOp.leakage +
+ bank.mat.power_bit_mux_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
+ bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_closed_page +=
+ (bank.mat.r_predec->power.readOp.gate_leakage +
+ bank.mat.b_mux_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ bank.mat.power_row_decoders.readOp.gate_leakage +
+ bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
+ //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_open_page =
+ (bank.mat.r_predec->power.readOp.leakage +
+ bank.mat.b_mux_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
+ bank.mat.power_row_decoders.readOp.leakage +
+ bank.mat.power_bit_mux_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
+ bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_subbank_open_page +=
+ (bank.mat.r_predec->power.readOp.gate_leakage +
+ bank.mat.b_mux_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ bank.mat.power_row_decoders.readOp.gate_leakage +
+ bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
+ //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
+
+ leak_power_request_and_reply_networks =
+ power_routing_to_bank.readOp.leakage +
+ bank.htree_in_add->power.readOp.leakage +
+ bank.htree_in_data->power.readOp.leakage +
+ bank.htree_out_data->power.readOp.leakage;
+
+ leak_power_request_and_reply_networks +=
+ power_routing_to_bank.readOp.gate_leakage +
+ bank.htree_in_add->power.readOp.gate_leakage +
+ bank.htree_in_data->power.readOp.gate_leakage +
+ bank.htree_out_data->power.readOp.gate_leakage;
+
+ if (dp.fully_assoc || dp.pure_cam) {
leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
- }
-
-
- if (dp.is_dram)
- { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
- refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
- bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
- refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
- refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
- refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- refresh_power /= dp.dram_refresh_period;
- }
-
-
- if (dp.is_tag == false)
- {
- power.readOp.dynamic = dyn_read_energy_from_closed_page;
- power.writeOp.dynamic = dyn_read_energy_from_closed_page
- - dyn_read_energy_remaining_words_in_burst
- - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
- + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- + (power_routing_to_bank.writeOp.dynamic -
- power_routing_to_bank.readOp.dynamic -
- bank.htree_out_data->power.readOp.dynamic +
- bank.htree_in_data->power.readOp.dynamic) *
- (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
+ }
- if (dp.is_dram == false)
- {
- power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+
+ // if DRAM, add contribution of power spent in row predecoder drivers,
+ // blocks and decoders to refresh power
+ if (dp.is_dram) {
+ refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
+ bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
+ refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
+ refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
+ refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ refresh_power /= dp.dram_refresh_period;
}
- }
-
- // if DRAM, add refresh power to total leakage
- if (dp.is_dram)
- {
- power.readOp.leakage += refresh_power;
- }
-
- // TODO: below should be avoided.
- /*if (dp.is_main_mem)
- {
- power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
- }*/
-
- assert(power.readOp.dynamic > 0);
- assert(power.writeOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
+
+
+ if (dp.is_tag == false) {
+ power.readOp.dynamic = dyn_read_energy_from_closed_page;
+ power.writeOp.dynamic = dyn_read_energy_from_closed_page
+ - dyn_read_energy_remaining_words_in_burst
+ - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ + (power_routing_to_bank.writeOp.dynamic -
+ power_routing_to_bank.readOp.dynamic -
+ bank.htree_out_data->power.readOp.dynamic +
+ bank.htree_in_data->power.readOp.dynamic) *
+ (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
+
+ if (dp.is_dram == false) {
+ power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
+ }
+ }
+
+ // if DRAM, add refresh power to total leakage
+ if (dp.is_dram) {
+ power.readOp.leakage += refresh_power;
+ }
+
+ // TODO: below should be avoided.
+ /*if (dp.is_main_mem)
+ {
+ power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
+ }*/
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.writeOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
}
diff --git a/ext/mcpat/cacti/uca.h b/ext/mcpat/cacti/uca.h
index fdab14fc7..402035f9a 100755
--- a/ext/mcpat/cacti/uca.h
+++ b/ext/mcpat/cacti/uca.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -40,9 +41,8 @@
#include "htree2.h"
#include "parameter.h"
-class UCA : public Component
-{
- public:
+class UCA : public Component {
+public:
UCA(const DynamicParameter & dyn_p);
~UCA();
double compute_delays(double inrisetime); // returns outrisetime
@@ -66,7 +66,10 @@ class UCA : public Component
int num_do_b_bank;
int num_si_b_bank;
int num_so_b_bank;
- int RWP, ERP, EWP,SCHP;
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
double area_all_dataramcells;
double dyn_read_energy_from_closed_page;
diff --git a/ext/mcpat/cacti/wire.cc b/ext/mcpat/cacti/wire.cc
index 742000c85..b7d9e34ce 100644
--- a/ext/mcpat/cacti/wire.cc
+++ b/ext/mcpat/cacti/wire.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -41,173 +42,173 @@ Wire::Wire(
enum Wire_placement wp,
double resistivity,
TechnologyParameter::DeviceType *dt
- ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s),
- resistivity(resistivity), deviceType(dt)
-{
- wire_placement = wp;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- in_rise_time = 0;
- out_rise_time = 0;
- if (initialized != 1) {
- cout << "Wire not initialized. Initializing it with default values\n";
- Wire winit;
- }
- calculate_wire_stats();
- // change everything back to seconds, microns, and Joules
- repeater_spacing *= 1e6;
- wire_length *= 1e6;
- wire_width *= 1e6;
- wire_spacing *= 1e6;
- assert(wire_length > 0);
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
+ ): wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s),
+ s_scale(s_s),
+ resistivity(resistivity), deviceType(dt) {
+ wire_placement = wp;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ in_rise_time = 0;
+ out_rise_time = 0;
+ if (initialized != 1) {
+ cout << "Wire not initialized. Initializing it with default values\n";
+ Wire winit;
+ }
+ calculate_wire_stats();
+ // change everything back to seconds, microns, and Joules
+ repeater_spacing *= 1e6;
+ wire_length *= 1e6;
+ wire_width *= 1e6;
+ wire_spacing *= 1e6;
+ assert(wire_length > 0);
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
}
- // the following values are for peripheral global technology
- // specified in the input config file
- Component Wire::global;
- Component Wire::global_5;
- Component Wire::global_10;
- Component Wire::global_20;
- Component Wire::global_30;
- Component Wire::low_swing;
-
- int Wire::initialized;
- double Wire::wire_width_init;
- double Wire::wire_spacing_init;
-
-
-Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt)
-{
- w_scale = w_s;
- s_scale = s_s;
- deviceType = dt;
- wire_placement = wp;
- resistivity = resis;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
- in_rise_time = 0;
- out_rise_time = 0;
-
- switch (wire_placement)
- {
- case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break;
- case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break;
- default: wire_width = g_tp.wire_local.pitch; break;
- }
-
- wire_spacing = wire_width;
-
- wire_width *= (w_scale * 1e-6/2) /* (m) */;
- wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
-
- initialized = 1;
- init_wire();
- wire_width_init = wire_width;
- wire_spacing_init = wire_spacing;
-
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
+// the following values are for peripheral global technology
+// specified in the input config file
+Component Wire::global;
+Component Wire::global_5;
+Component Wire::global_10;
+Component Wire::global_20;
+Component Wire::global_30;
+Component Wire::low_swing;
+
+int Wire::initialized;
+double Wire::wire_width_init;
+double Wire::wire_spacing_init;
+
+
+Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis,
+ TechnologyParameter::DeviceType *dt) {
+ w_scale = w_s;
+ s_scale = s_s;
+ deviceType = dt;
+ wire_placement = wp;
+ resistivity = resis;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
+ in_rise_time = 0;
+ out_rise_time = 0;
+
+ switch (wire_placement) {
+ case outside_mat:
+ wire_width = g_tp.wire_outside_mat.pitch;
+ break;
+ case inside_mat :
+ wire_width = g_tp.wire_inside_mat.pitch;
+ break;
+ default:
+ wire_width = g_tp.wire_local.pitch;
+ break;
+ }
+
+ wire_spacing = wire_width;
+
+ wire_width *= (w_scale * 1e-6 / 2) /* (m) */;
+ wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */;
+
+ initialized = 1;
+ init_wire();
+ wire_width_init = wire_width;
+ wire_spacing_init = wire_spacing;
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
}
-Wire::~Wire()
-{
+Wire::~Wire() {
}
void
-Wire::calculate_wire_stats()
-{
-
- if (wire_placement == outside_mat) {
- wire_width = g_tp.wire_outside_mat.pitch;
- }
- else if (wire_placement == inside_mat) {
- wire_width = g_tp.wire_inside_mat.pitch;
- }
- else {
- wire_width = g_tp.wire_local.pitch;
- }
-
- wire_spacing = wire_width;
-
- wire_width *= (w_scale * 1e-6/2) /* (m) */;
- wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
-
-
- if (wt != Low_swing) {
-
- // delay_optimal_wire();
-
- if (wt == Global) {
- delay = global.delay * wire_length;
- power.readOp.dynamic = global.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global.area.w;
- repeater_size = global.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_5) {
- delay = global_5.delay * wire_length;
- power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_5.area.w;
- repeater_size = global_5.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_10) {
- delay = global_10.delay * wire_length;
- power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_10.area.w;
- repeater_size = global_10.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_20) {
- delay = global_20.delay * wire_length;
- power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_20.area.w;
- repeater_size = global_20.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_30) {
- delay = global_30.delay * wire_length;
- power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_30.area.w;
- repeater_size = global_30.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- out_rise_time = delay*repeater_spacing/deviceType->Vth;
- }
- else if (wt == Low_swing) {
- low_swing_model ();
- repeater_spacing = wire_length;
- repeater_size = 1;
- }
- else {
- assert(0);
- }
+Wire::calculate_wire_stats() {
+
+ if (wire_placement == outside_mat) {
+ wire_width = g_tp.wire_outside_mat.pitch;
+ } else if (wire_placement == inside_mat) {
+ wire_width = g_tp.wire_inside_mat.pitch;
+ } else {
+ wire_width = g_tp.wire_local.pitch;
+ }
+
+ wire_spacing = wire_width;
+
+ wire_width *= (w_scale * 1e-6 / 2) /* (m) */;
+ wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */;
+
+
+ if (wt != Low_swing) {
+
+ // delay_optimal_wire();
+
+ if (wt == Global) {
+ delay = global.delay * wire_length;
+ power.readOp.dynamic = global.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global.area.w;
+ repeater_size = global.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_5) {
+ delay = global_5.delay * wire_length;
+ power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_5.area.w;
+ repeater_size = global_5.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_10) {
+ delay = global_10.delay * wire_length;
+ power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_10.area.w;
+ repeater_size = global_10.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_20) {
+ delay = global_20.delay * wire_length;
+ power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_20.area.w;
+ repeater_size = global_20.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ } else if (wt == Global_30) {
+ delay = global_30.delay * wire_length;
+ power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
+ power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
+ power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length;
+ repeater_spacing = global_30.area.w;
+ repeater_size = global_30.area.h;
+ area.set_area((wire_length / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_size,
+ g_tp.min_w_nmos_ * repeater_size,
+ g_tp.cell_h_def));
+ }
+ out_rise_time = delay * repeater_spacing / deviceType->Vth;
+ } else if (wt == Low_swing) {
+ low_swing_model ();
+ repeater_spacing = wire_length;
+ repeater_size = 1;
+ } else {
+ assert(0);
+ }
}
@@ -218,51 +219,55 @@ Wire::calculate_wire_stats()
* inverters connected in series (refer: CACTI 1 Technical report,
* section 6.1.3)
*/
- double
-Wire::signal_fall_time ()
-{
-
- /* rise time of inverter 1's output */
- double rt;
- /* fall time of inverter 2's output */
- double ft;
- double timeconst;
-
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(min_w_pmos, PCH, 1);
- rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
- return ft;
+double
+Wire::signal_fall_time () {
+
+ /* rise time of inverter 1's output */
+ double rt;
+ /* fall time of inverter 2's output */
+ double ft;
+ double timeconst;
+
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(min_w_pmos, PCH, 1);
+ rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, FALL) /
+ (deviceType->Vdd - deviceType->Vth);
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth;
+ return ft;
}
-double Wire::signal_rise_time ()
-{
-
- /* rise time of inverter 1's output */
- double ft;
- /* fall time of inverter 2's output */
- double rt;
- double timeconst;
-
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(min_w_pmos, PCH, 1);
- ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
- return ft; //sec
+double Wire::signal_rise_time () {
+
+ /* rise time of inverter 1's output */
+ double ft;
+ /* fall time of inverter 2's output */
+ double rt;
+ double timeconst;
+
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(g_tp.min_w_nmos_, NCH, 1);
+ rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth;
+ timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
+ tr_R_on(min_w_pmos, PCH, 1);
+ ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, FALL) /
+ (deviceType->Vdd - deviceType->Vth);
+ return ft; //sec
}
@@ -281,111 +286,110 @@ double Wire::signal_rise_time ()
*
*/
-double Wire::wire_cap (double len /* in m */, bool call_from_outside)
-{
- //TODO: this should be consistent with the wire_res in technology file
- double sidewall, adj, tot_cap;
- double wire_height;
- double epsilon0 = 8.8542e-12;
- double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness;
+double Wire::wire_cap (double len /* in m */, bool call_from_outside) {
+ //TODO: this should be consistent with the wire_res in technology file
+ double sidewall, adj, tot_cap;
+ double wire_height;
+ double epsilon0 = 8.8542e-12;
+ double aspect_ratio;
+ double horiz_dielectric_constant;
+ double vert_dielectric_constant;
+ double miller_value;
+ double ild_thickness;
+
+ switch (wire_placement) {
+ case outside_mat: {
+ aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
+ miller_value = g_tp.wire_outside_mat.miller_value;
+ ild_thickness = g_tp.wire_outside_mat.ild_thickness;
+ break;
+ }
+ case inside_mat : {
+ aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
+ miller_value = g_tp.wire_inside_mat.miller_value;
+ ild_thickness = g_tp.wire_inside_mat.ild_thickness;
+ break;
+ }
+ default: {
+ aspect_ratio = g_tp.wire_local.aspect_ratio;
+ horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
+ vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
+ miller_value = g_tp.wire_local.miller_value;
+ ild_thickness = g_tp.wire_local.ild_thickness;
+ break;
+ }
+ }
- switch (wire_placement)
- {
- case outside_mat:
- {
- aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
- miller_value = g_tp.wire_outside_mat.miller_value;
- ild_thickness = g_tp.wire_outside_mat.ild_thickness;
- break;
- }
- case inside_mat :
- {
- aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
- miller_value = g_tp.wire_inside_mat.miller_value;
- ild_thickness = g_tp.wire_inside_mat.ild_thickness;
- break;
- }
- default:
- {
- aspect_ratio = g_tp.wire_local.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
- miller_value = g_tp.wire_local.miller_value;
- ild_thickness = g_tp.wire_local.ild_thickness;
- break;
- }
- }
-
- if (call_from_outside)
- {
- wire_width *= 1e-6;
- wire_spacing *= 1e-6;
- }
- wire_height = wire_width/w_scale*aspect_ratio;
- /*
- * assuming height does not change. wire_width = width_original*w_scale
- * So wire_height does not change as wire width increases
- */
+ if (call_from_outside) {
+ wire_width *= 1e-6;
+ wire_spacing *= 1e-6;
+ }
+ wire_height = wire_width / w_scale * aspect_ratio;
+ /*
+ * assuming height does not change. wire_width = width_original*w_scale
+ * So wire_height does not change as wire width increases
+ */
// capacitance between wires in the same level
// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
// * epsilon0;
- sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
- * epsilon0;
+ sidewall = miller_value * horiz_dielectric_constant *
+ (wire_height / wire_spacing)
+ * epsilon0;
- // capacitance between wires in adjacent levels
- //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
- //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
+ // capacitance between wires in adjacent levels
+ //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
+ //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
- adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
- //Change ild_thickness from micron to M
+ adj = miller_value * vert_dielectric_constant * wire_width /
+ (ild_thickness * 1e-6) * epsilon0;
+ //Change ild_thickness from micron to M
- //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
- tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
+ //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
+ tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
- if (call_from_outside)
- {
- wire_width *= 1e6;
- wire_spacing *= 1e6;
- }
- return (tot_cap*len); // (F)
+ if (call_from_outside) {
+ wire_width *= 1e6;
+ wire_spacing *= 1e6;
+ }
+ return (tot_cap*len); // (F)
}
- double
-Wire::wire_res (double len /*(in m)*/)
-{
-
- double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0;
- //TODO: this should be consistent with the wire_res in technology file
- //The whole computation should be consistent with the wire_res in technology.cc too!
-
- switch (wire_placement)
- {
- case outside_mat:
- {
- aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
- break;
- }
- case inside_mat :
- {
- aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
- break;
- }
- default:
- {
- aspect_ratio = g_tp.wire_local.aspect_ratio;
- break;
- }
- }
- return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)*
- (wire_width-2*barrier_thickness)));
+double
+Wire::wire_res (double len /*(in m)*/) {
+
+ double aspect_ratio;
+ double alpha_scatter = 1.05;
+ double dishing_thickness = 0;
+ double barrier_thickness = 0;
+ //TODO: this should be consistent with the wire_res in technology file
+ //The whole computation should be consistent with the wire_res in technology.cc too!
+
+ switch (wire_placement) {
+ case outside_mat: {
+ aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
+ break;
+ }
+ case inside_mat : {
+ aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
+ break;
+ }
+ default: {
+ aspect_ratio = g_tp.wire_local.aspect_ratio;
+ break;
+ }
+ }
+ return (alpha_scatter * resistivity * 1e-6 * len /
+ ((aspect_ratio*wire_width / w_scale - dishing_thickness -
+ barrier_thickness)*
+ (wire_width - 2*barrier_thickness)));
}
/*
@@ -395,438 +399,456 @@ Wire::wire_res (double len /*(in m)*/)
* low swing nmos delay, and the wire delay
* (ref: Technical report 6)
*/
- void
-Wire::low_swing_model()
-{
- double len = wire_length;
- double beta = pmos_to_nmos_sz_ratio();
-
-
- double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time;
-
- /* Final nmos low swing driver size calculation:
- * Try to size the driver such that the delay
- * is less than 8FO4.
- * If the driver size is greater than
- * the max allowable size, assume max size for the driver.
- * In either case, recalculate the delay using
- * the final driver size assuming slow input with
- * finite rise time instead of ideal step input
- *
- * (ref: Technical report 6)
- */
- double cwire = wire_cap(len); /* load capacitance */
- double rwire = wire_res(len);
+void
+Wire::low_swing_model() {
+ double len = wire_length;
+ double beta = pmos_to_nmos_sz_ratio();
+
+
+ double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time;
+
+ /* Final nmos low swing driver size calculation:
+ * Try to size the driver such that the delay
+ * is less than 8FO4.
+ * If the driver size is greater than
+ * the max allowable size, assume max size for the driver.
+ * In either case, recalculate the delay using
+ * the final driver size assuming slow input with
+ * finite rise time instead of ideal step input
+ *
+ * (ref: Technical report 6)
+ */
+ double cwire = wire_cap(len); /* load capacitance */
+ double rwire = wire_res(len);
#define RES_ADJ (8.6) // Increase in resistance due to low driving vol.
- double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ;
- double nsize = R_to_w(driver_res, NCH);
-
- nsize = MIN(nsize, g_tp.max_w_nmos_);
- nsize = MAX(nsize, g_tp.min_w_nmos_);
-
- if(rwire*cwire > 8*g_tp.FO4)
- {
- nsize = g_tp.max_w_nmos_;
- }
-
- // size the inverter appropriately to minimize the transmitter delay
- // Note - In order to minimize leakage, we are not adding a set of inverters to
- // bring down delay. Instead, we are sizing the single gate
- // based on the logical effort.
- double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0)
- + gate_C(2*min_w_pmos, 0)));
- double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff;
- double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0));
- inv_size = MAX(inv_size, 1);
-
- /* nand gate delay */
- double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1));
- double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(inv_size*g_tp.min_w_nmos_, 0) +
- gate_C(inv_size*min_w_pmos, 0);
-
- double timeconst = res_eq * cap_eq;
-
- delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- deviceType->Vth/deviceType->Vdd, RISE);
- double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd;
-
- inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */
-
- /* Inverter delay:
- * The load capacitance of this inv depends on
- * the gate capacitance of the final stage nmos
- * transistor which in turn depends on nsize
- */
- res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1);
- cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(nsize, 0);
- timeconst = res_eq * cap_eq;
-
- delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- deviceType->Vth/deviceType->Vdd, FALL);
- temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd;
-
-
- transmitter.delay = delay;
- transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/
- transmitter.power.readOp.leakage = deviceType->Vdd *
- (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
- 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
-
- transmitter.power.readOp.gate_leakage = deviceType->Vdd *
- (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
- 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
-
- inputrise = delay / deviceType->Vth;
-
- /* nmos delay + wire delay */
- cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 +
- nsense * sense_amp_input_cap(); //+receiver cap
- /*
- * NOTE: nmos is used as both pull up and pull down transistor
- * in the transmitter. This is because for low voltage swing, drive
- * resistance of nmos is less than pmos
- * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency)
- */
- timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire +
- drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) +
- rwire*cwire/2 +
- (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) *
- nsense * sense_amp_input_cap();
-
- /*
- * since we are pre-equalizing and overdriving the low
- * swing wires, the net time constant is less
- * than the actual value
- */
- delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0);
+ double driver_res = (-8 * g_tp.FO4 / (log(0.5) * cwire)) / RES_ADJ;
+ double nsize = R_to_w(driver_res, NCH);
+
+ nsize = MIN(nsize, g_tp.max_w_nmos_);
+ nsize = MAX(nsize, g_tp.min_w_nmos_);
+
+ if (rwire*cwire > 8*g_tp.FO4) {
+ nsize = g_tp.max_w_nmos_;
+ }
+
+ // size the inverter appropriately to minimize the transmitter delay
+ // Note - In order to minimize leakage, we are not adding a set of inverters to
+ // bring down delay. Instead, we are sizing the single gate
+ // based on the logical effort.
+ double st_eff = sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) /
+ (gate_C(2 * g_tp.min_w_nmos_, 0)
+ + gate_C(2 * min_w_pmos, 0)));
+ double req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff;
+ double inv_size = req_cin / (gate_C(min_w_pmos, 0) +
+ gate_C(g_tp.min_w_nmos_, 0));
+ inv_size = MAX(inv_size, 1);
+
+ /* nand gate delay */
+ double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1));
+ double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(inv_size * g_tp.min_w_nmos_, 0) +
+ gate_C(inv_size * min_w_pmos, 0);
+
+ double timeconst = res_eq * cap_eq;
+
+ delay = horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, RISE);
+ double temp_power = cap_eq * deviceType->Vdd * deviceType->Vdd;
+
+ inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */
+
+ /* Inverter delay:
+ * The load capacitance of this inv depends on
+ * the gate capacitance of the final stage nmos
+ * transistor which in turn depends on nsize
+ */
+ res_eq = tr_R_on(inv_size * min_w_pmos, PCH, 1);
+ cap_eq = drain_C_(inv_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(nsize, 0);
+ timeconst = res_eq * cap_eq;
+
+ delay += horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd,
+ deviceType->Vth / deviceType->Vdd, FALL);
+ temp_power += cap_eq * deviceType->Vdd * deviceType->Vdd;
+
+
+ transmitter.delay = delay;
+ /* since it is a diff. model*/
+ transmitter.power.readOp.dynamic = temp_power * 2;
+ transmitter.power.readOp.leakage = deviceType->Vdd *
+ (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
+ 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
+
+ transmitter.power.readOp.gate_leakage = deviceType->Vdd *
+ (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
+ 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
+
+ inputrise = delay / deviceType->Vth;
+
+ /* nmos delay + wire delay */
+ cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2 +
+ nsense * sense_amp_input_cap(); //+receiver cap
+ /*
+ * NOTE: nmos is used as both pull up and pull down transistor
+ * in the transmitter. This is because for low voltage swing, drive
+ * resistance of nmos is less than pmos
+ * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency)
+ */
+ timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * (cwire +
+ drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2) +
+ rwire * cwire / 2 +
+ (tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) *
+ nsense * sense_amp_input_cap();
+
+ /*
+ * since we are pre-equalizing and overdriving the low
+ * swing wires, the net time constant is less
+ * than the actual value
+ */
+ delay += horowitz(inputrise, timeconst, deviceType->Vth /
+ deviceType->Vdd, .25, 0);
#define VOL_SWING .1
- temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */
- temp_power *= 2; /* differential wire */
-
- l_wire.delay = delay - transmitter.delay;
- l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic;
- l_wire.power.readOp.leakage = deviceType->Vdd*
- (4* cmos_Isub_leakage(nsize, 0, 1, nmos));
-
- l_wire.power.readOp.gate_leakage = deviceType->Vdd*
- (4* cmos_Ig_leakage(nsize, 0, 1, nmos));
-
- //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth;
-
- delay += g_tp.sense_delay;
-
- sense_amp.delay = g_tp.sense_delay;
- out_rise_time = g_tp.sense_delay/(deviceType->Vth);
- sense_amp.power.readOp.dynamic = g_tp.sense_dy_power;
- sense_amp.power.readOp.leakage = 0; //FIXME
- sense_amp.power.readOp.gate_leakage = 0;
-
- power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic;
- power.readOp.leakage = transmitter.power.readOp.leakage +
- l_wire.power.readOp.leakage +
- sense_amp.power.readOp.leakage;
- power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage +
- l_wire.power.readOp.gate_leakage +
- sense_amp.power.readOp.gate_leakage;
+ temp_power += cap_eq * VOL_SWING * .400; /* .4v is the over drive voltage */
+ temp_power *= 2; /* differential wire */
+
+ l_wire.delay = delay - transmitter.delay;
+ l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic;
+ l_wire.power.readOp.leakage = deviceType->Vdd *
+ (4 * cmos_Isub_leakage(nsize, 0, 1, nmos));
+
+ l_wire.power.readOp.gate_leakage = deviceType->Vdd *
+ (4 * cmos_Ig_leakage(nsize, 0, 1, nmos));
+
+ //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
+ // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth;
+
+ delay += g_tp.sense_delay;
+
+ sense_amp.delay = g_tp.sense_delay;
+ out_rise_time = g_tp.sense_delay / (deviceType->Vth);
+ sense_amp.power.readOp.dynamic = g_tp.sense_dy_power;
+ sense_amp.power.readOp.leakage = 0; //FIXME
+ sense_amp.power.readOp.gate_leakage = 0;
+
+ power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic;
+ power.readOp.leakage = transmitter.power.readOp.leakage +
+ l_wire.power.readOp.leakage +
+ sense_amp.power.readOp.leakage;
+ power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage +
+ l_wire.power.readOp.gate_leakage +
+ sense_amp.power.readOp.gate_leakage;
}
- double
-Wire::sense_amp_input_cap()
-{
- return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def);
+double
+Wire::sense_amp_input_cap() {
+ return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
+ gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def);
}
-void Wire::delay_optimal_wire ()
-{
- double len = wire_length;
- //double min_wire_width = wire_width; //m
- double beta = pmos_to_nmos_sz_ratio();
- double switching = 0; // switching energy
- double short_ckt = 0; // short-circuit energy
- double tc = 0; // time constant
- // input cap of min sized driver
- double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
+void Wire::delay_optimal_wire () {
+ double len = wire_length;
+ //double min_wire_width = wire_width; //m
+ double beta = pmos_to_nmos_sz_ratio();
+ double switching = 0; // switching energy
+ double short_ckt = 0; // short-circuit energy
+ double tc = 0; // time constant
+ // input cap of min sized driver
+ double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
- // output parasitic capacitance of
- // the min. sized driver
- double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
- // drive resistance
- double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
- tr_R_on(min_w_pmos, PCH, 1))/2;
- double wr = wire_res(len); //ohm
+ // output parasitic capacitance of
+ // the min. sized driver
+ double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
+ // drive resistance
+ double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
+ tr_R_on(min_w_pmos, PCH, 1)) / 2;
+ double wr = wire_res(len); //ohm
- // wire cap /m
- double wc = wire_cap(len);
+ // wire cap /m
+ double wc = wire_cap(len);
- // size the repeater such that the delay of the wire is minimum
- double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel
+ // size the repeater such that the delay of the wire is minimum
+ // len will cancel
+ double repeater_scaling = sqrt(out_res * wc / (wr * input_cap));
- // calc the optimum spacing between the repeaters (m)
+ // calc the optimum spacing between the repeaters (m)
- repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap)/
- ((wr/len)*(wc/len)));
- repeater_size = repeater_scaling;
+ repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap) /
+ ((wr / len) * (wc / len)));
+ repeater_size = repeater_scaling;
- switching = (repeater_scaling * (input_cap + out_cap) +
- repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
+ switching = (repeater_scaling * (input_cap + out_cap) +
+ repeater_spacing * (wc / len)) * deviceType->Vdd *
+ deviceType->Vdd;
- tc = out_res * (input_cap + out_cap) +
- out_res * wc/len * repeater_spacing/repeater_scaling +
- wr/len * repeater_spacing * input_cap * repeater_scaling +
- 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
+ tc = out_res * (input_cap + out_cap) +
+ out_res * wc / len * repeater_spacing / repeater_scaling +
+ wr / len * repeater_spacing * input_cap * repeater_scaling +
+ 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing;
- delay = 0.693 * tc * len/repeater_spacing;
+ delay = 0.693 * tc * len / repeater_spacing;
#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
- short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
- repeater_scaling * tc;
-
- area.set_area((len/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
- g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def));
- power.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
- power.readOp.leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
- power.readOp.gate_leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
+ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
+ repeater_scaling * tc;
+
+ area.set_area((len / repeater_spacing) *
+ compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
+ g_tp.min_w_nmos_ * repeater_scaling,
+ g_tp.cell_h_def));
+ power.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt));
+ power.readOp.leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Isub_leakage(g_tp.min_w_nmos_ *
+ repeater_scaling, beta *
+ g_tp.min_w_nmos_ *
+ repeater_scaling, 1, inv));
+ power.readOp.gate_leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Ig_leakage(g_tp.min_w_nmos_ *
+ repeater_scaling, beta *
+ g_tp.min_w_nmos_ *
+ repeater_scaling, 1, inv));
}
// calculate power/delay values for wires with suboptimal repeater sizing/spacing
void
-Wire::init_wire(){
- wire_length = 1;
- delay_optimal_wire();
+Wire::init_wire() {
+ wire_length = 1;
+ delay_optimal_wire();
double sp, si;
- powerDef pow;
- si = repeater_size;
- sp = repeater_spacing;
- sp *= 1e6; // in microns
-
- double i, j, del;
- repeated_wire.push_back(Component());
- for (j=sp; j < 4*sp; j+=100) {
- for (i = si; i > 1; i--) {
- pow = wire_model(j*1e-6, i, &del);
- if (j == sp && i == si) {
- global.delay = del;
- global.power = pow;
- global.area.h = si;
- global.area.w = sp*1e-6; // m
- }
+ powerDef pow;
+ si = repeater_size;
+ sp = repeater_spacing;
+ sp *= 1e6; // in microns
+
+ double i, j, del;
+ repeated_wire.push_back(Component());
+ for (j = sp; j < 4*sp; j += 100) {
+ for (i = si; i > 1; i--) {
+ pow = wire_model(j * 1e-6, i, &del);
+ if (j == sp && i == si) {
+ global.delay = del;
+ global.power = pow;
+ global.area.h = si;
+ global.area.w = sp * 1e-6; // m
+ }
// cout << "Repeater size - "<< i <<
// " Repeater spacing - " << j <<
// " Delay - " << del <<
// " PowerD - " << pow.readOp.dynamic <<
// " PowerL - " << pow.readOp.leakage <<endl;
- repeated_wire.back().delay = del;
- repeated_wire.back().power.readOp = pow.readOp;
- repeated_wire.back().area.w = j*1e-6; //m
- repeated_wire.back().area.h = i;
- repeated_wire.push_back(Component());
+ repeated_wire.back().delay = del;
+ repeated_wire.back().power.readOp = pow.readOp;
+ repeated_wire.back().area.w = j * 1e-6; //m
+ repeated_wire.back().area.h = i;
+ repeated_wire.push_back(Component());
+ }
}
- }
- repeated_wire.pop_back();
- update_fullswing();
- Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1);
- low_swing.delay = l_wire->delay;
- low_swing.power = l_wire->power;
- delete l_wire;
+ repeated_wire.pop_back();
+ update_fullswing();
+ Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1);
+ low_swing.delay = l_wire->delay;
+ low_swing.power = l_wire->power;
+ delete l_wire;
}
-void Wire::update_fullswing()
-{
-
- list<Component>::iterator citer;
- double del[4];
- del[3] = this->global.delay + this->global.delay*.3;
- del[2] = global.delay + global.delay*.2;
- del[1] = global.delay + global.delay*.1;
- del[0] = global.delay + global.delay*.05;
- double threshold;
- double ncost;
- double cost;
- int i = 4;
- while (i>0) {
- threshold = del[i-1];
- cost = BIGNUM;
- for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++)
- {
- if (citer->delay > threshold) {
- citer = repeated_wire.erase(citer);
- citer --;
- }
- else {
- ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic +
- citer->power.readOp.leakage/global.power.readOp.leakage;
- if(ncost < cost)
- {
- cost = ncost;
- if (i == 4) {
- global_30.delay = citer->delay;
- global_30.power = citer->power;
- global_30.area = citer->area;
- }
- else if (i==3) {
- global_20.delay = citer->delay;
- global_20.power = citer->power;
- global_20.area = citer->area;
- }
- else if(i==2) {
- global_10.delay = citer->delay;
- global_10.power = citer->power;
- global_10.area = citer->area;
- }
- else if(i==1) {
- global_5.delay = citer->delay;
- global_5.power = citer->power;
- global_5.area = citer->area;
- }
+void Wire::update_fullswing() {
+
+ list<Component>::iterator citer;
+ double del[4];
+ del[3] = this->global.delay + this->global.delay * .3;
+ del[2] = global.delay + global.delay * .2;
+ del[1] = global.delay + global.delay * .1;
+ del[0] = global.delay + global.delay * .05;
+ double threshold;
+ double ncost;
+ double cost;
+ int i = 4;
+ while (i > 0) {
+ threshold = del[i-1];
+ cost = BIGNUM;
+ for (citer = repeated_wire.begin(); citer != repeated_wire.end();
+ citer++) {
+ if (citer->delay > threshold) {
+ citer = repeated_wire.erase(citer);
+ citer --;
+ } else {
+ ncost = citer->power.readOp.dynamic /
+ global.power.readOp.dynamic +
+ citer->power.readOp.leakage / global.power.readOp.leakage;
+ if (ncost < cost) {
+ cost = ncost;
+ if (i == 4) {
+ global_30.delay = citer->delay;
+ global_30.power = citer->power;
+ global_30.area = citer->area;
+ } else if (i == 3) {
+ global_20.delay = citer->delay;
+ global_20.power = citer->power;
+ global_20.area = citer->area;
+ } else if (i == 2) {
+ global_10.delay = citer->delay;
+ global_10.power = citer->power;
+ global_10.area = citer->area;
+ } else if (i == 1) {
+ global_5.delay = citer->delay;
+ global_5.power = citer->power;
+ global_5.area = citer->area;
+ }
+ }
+ }
}
- }
+ i--;
}
- i--;
- }
}
-powerDef Wire::wire_model (double space, double size, double *delay)
-{
- powerDef ptemp;
- double len = 1;
- //double min_wire_width = wire_width; //m
- double beta = pmos_to_nmos_sz_ratio();
- // switching energy
- double switching = 0;
- // short-circuit energy
- double short_ckt = 0;
- // time constant
- double tc = 0;
- // input cap of min sized driver
- double input_cap = gate_C (g_tp.min_w_nmos_ +
- min_w_pmos, 0);
-
- // output parasitic capacitance of
- // the min. sized driver
- double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
- // drive resistance
- double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
- tr_R_on(min_w_pmos, PCH, 1))/2;
- double wr = wire_res(len); //ohm
-
- // wire cap /m
- double wc = wire_cap(len);
-
- repeater_spacing = space;
- repeater_size = size;
-
- switching = (repeater_size * (input_cap + out_cap) +
- repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
-
- tc = out_res * (input_cap + out_cap) +
- out_res * wc/len * repeater_spacing/repeater_size +
- wr/len * repeater_spacing * out_cap * repeater_size +
- 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
-
- *delay = 0.693 * tc * len/repeater_spacing;
+powerDef Wire::wire_model (double space, double size, double *delay) {
+ powerDef ptemp;
+ double len = 1;
+ //double min_wire_width = wire_width; //m
+ double beta = pmos_to_nmos_sz_ratio();
+ // switching energy
+ double switching = 0;
+ // short-circuit energy
+ double short_ckt = 0;
+ // time constant
+ double tc = 0;
+ // input cap of min sized driver
+ double input_cap = gate_C (g_tp.min_w_nmos_ +
+ min_w_pmos, 0);
+
+ // output parasitic capacitance of
+ // the min. sized driver
+ double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
+ // drive resistance
+ double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
+ tr_R_on(min_w_pmos, PCH, 1)) / 2;
+ double wr = wire_res(len); //ohm
+
+ // wire cap /m
+ double wc = wire_cap(len);
+
+ repeater_spacing = space;
+ repeater_size = size;
+
+ switching = (repeater_size * (input_cap + out_cap) +
+ repeater_spacing * (wc / len)) * deviceType->Vdd *
+ deviceType->Vdd;
+
+ tc = out_res * (input_cap + out_cap) +
+ out_res * wc / len * repeater_spacing / repeater_size +
+ wr / len * repeater_spacing * out_cap * repeater_size +
+ 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing;
+
+ *delay = 0.693 * tc * len / repeater_spacing;
#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
- short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
- repeater_size * tc;
-
- ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
- ptemp.readOp.leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
-
- ptemp.readOp.gate_leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
-
- return ptemp;
+ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
+ repeater_size * tc;
+
+ ptemp.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt));
+ ptemp.readOp.leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Isub_leakage(g_tp.min_w_nmos_ *
+ repeater_size, beta *
+ g_tp.min_w_nmos_ *
+ repeater_size, 1, inv));
+
+ ptemp.readOp.gate_leakage = ((len / repeater_spacing) *
+ deviceType->Vdd *
+ cmos_Ig_leakage(g_tp.min_w_nmos_ *
+ repeater_size, beta *
+ g_tp.min_w_nmos_ *
+ repeater_size, 1, inv));
+
+ return ptemp;
}
void
-Wire::print_wire()
-{
-
- cout << "\nWire Properties:\n\n";
- cout << " Delay Optimal\n\tRepeater size - "<< global.area.h <<
- " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
- " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
-
- cout << " 5% Overhead\n\tRepeater size - "<< global_5.area.h <<
- " \n\tRepeater spacing - " << global_5.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_5.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_5.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_5.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_5.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " 10% Overhead\n\tRepeater size - "<< global_10.area.h <<
- " \n\tRepeater spacing - " << global_10.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_10.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_10.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_10.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_10.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " 20% Overhead\n\tRepeater size - "<< global_20.area.h <<
- " \n\tRepeater spacing - " << global_20.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_20.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_20.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_20.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_20.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " 30% Overhead\n\tRepeater size - "<< global_30.area.h <<
- " \n\tRepeater spacing - " << global_30.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global_30.delay *1e6<< " (ns/mm)"
- " \n\tPowerD - " << global_30.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global_30.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global_30.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <<wire_width_init*1e6 << " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init*1e6 << " microns\n";
- cout <<endl;
- cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\tdelay and power "
- "values of low-swing wires do not\n\thave a linear relationship with length." <<
- " \n\tdelay - " << low_swing.delay *1e9<< " (ns)"
- " \n\tpowerD - " << low_swing.power.readOp.dynamic *1e9<< " (nJ)"
- " \n\tPowerL - " << low_swing.power.readOp.leakage << " (mW)"
- " \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage << " (mW)\n";
- cout << "\tWire width - " <<wire_width_init * 2 /* differential */<< " microns\n";
- cout << "\tWire spacing - " <<wire_spacing_init * 2 /* differential */<< " microns\n";
- cout <<endl;
- cout <<endl;
+Wire::print_wire() {
+
+ cout << "\nWire Properties:\n\n";
+ cout << " Delay Optimal\n\tRepeater size - " << global.area.h <<
+ " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+
+ cout << " 5% Overhead\n\tRepeater size - " << global_5.area.h <<
+ " \n\tRepeater spacing - " << global_5.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_5.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_5.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_5.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_5.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " 10% Overhead\n\tRepeater size - " << global_10.area.h <<
+ " \n\tRepeater spacing - " << global_10.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_10.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_10.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_10.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_10.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " 20% Overhead\n\tRepeater size - " << global_20.area.h <<
+ " \n\tRepeater spacing - " << global_20.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_20.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_20.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_20.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_20.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " 30% Overhead\n\tRepeater size - " << global_30.area.h <<
+ " \n\tRepeater spacing - " << global_30.area.w*1e3 << " (mm)"
+ " \n\tDelay - " << global_30.delay *1e6 << " (ns/mm)"
+ " \n\tPowerD - " << global_30.power.readOp.dynamic *1e6 << " (nJ/mm)"
+ " \n\tPowerL - " << global_30.power.readOp.leakage << " (mW/mm)"
+ " \n\tPowerLgate - " << global_30.power.readOp.gate_leakage <<
+ " (mW/mm)\n";
+ cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
+ cout << endl;
+ cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\t" <<
+ "delay and power values of low-swing wires do not\n\t" <<
+ "have a linear relationship with length." <<
+ " \n\tdelay - " << low_swing.delay *1e9 << " (ns)"
+ " \n\tpowerD - " << low_swing.power.readOp.dynamic *1e9 << " (nJ)"
+ " \n\tPowerL - " << low_swing.power.readOp.leakage << " (mW)"
+ " \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage <<
+ " (mW)\n";
+ cout << "\tWire width - " << wire_width_init * 2 /* differential */ <<
+ " microns\n";
+ cout << "\tWire spacing - " << wire_spacing_init * 2 /* differential */ <<
+ " microns\n";
+ cout << endl;
+ cout << endl;
}
diff --git a/ext/mcpat/cacti/wire.h b/ext/mcpat/cacti/wire.h
index 51d55afff..906030dde 100644
--- a/ext/mcpat/cacti/wire.h
+++ b/ext/mcpat/cacti/wire.h
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -43,9 +44,8 @@
#include "component.h"
#include "parameter.h"
-class Wire : public Component
-{
- public:
+class Wire : public Component {
+public:
Wire(enum Wire_type wire_model, double len /* in u*/,
int nsense = 1/* no. of sense amps connected to the low-swing wire */,
double width_scaling = 1,
@@ -56,16 +56,16 @@ class Wire : public Component
~Wire();
Wire( double width_scaling = 1,
- double spacing_scaling = 1,
- enum Wire_placement wire_placement = outside_mat,
- double resistivity = CU_RESISTIVITY,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- ); // should be used only once for initializing static members
+ double spacing_scaling = 1,
+ enum Wire_placement wire_placement = outside_mat,
+ double resistivity = CU_RESISTIVITY,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
+ ); // should be used only once for initializing static members
void init_wire();
void calculate_wire_stats();
void delay_optimal_wire();
- double wire_cap(double len, bool call_from_outside=false);
+ double wire_cap(double len, bool call_from_outside = false);
double wire_res(double len);
void low_swing_model();
double signal_fall_time();
@@ -81,9 +81,8 @@ class Wire : public Component
double wire_length;
double in_rise_time, out_rise_time;
- void set_in_rise_time(double rt)
- {
- in_rise_time = rt;
+ void set_in_rise_time(double rt) {
+ in_rise_time = rt;
}
static Component global;
static Component global_5;
@@ -95,10 +94,10 @@ class Wire : public Component
static double wire_spacing_init;
void print_wire();
- private:
+private:
int nsense; // no. of sense amps connected to a low-swing wire if it
- // is broadcasting data to multiple destinations
+ // is broadcasting data to multiple destinations
// width and spacing scaling factor can be used
// to model low level wires or special
// fat wires
diff --git a/ext/mcpat/common.h b/ext/mcpat/common.h
new file mode 100644
index 000000000..bd401b7ed
--- /dev/null
+++ b/ext/mcpat/common.h
@@ -0,0 +1,65 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <string>
+
+#include "xmlParser.h"
+
+// Macro definitions to do string comparson to specific parameter/stat.
+// Note: These macros assume node_name and value variables of type XMLCSTR
+// to exist already.
+#define STRCMP(var, str) else if (strcmp(var, str) == 0)
+
+#define ASSIGN_INT_IF(str, lhs) STRCMP(node_name, str) \
+lhs = atoi(value)
+
+#define ASSIGN_FP_IF(str, lhs) STRCMP(node_name, str) \
+lhs = atof(value)
+
+#define ASSIGN_STR_IF(str, lhs) STRCMP(node_name, str) \
+lhs = string(value)
+
+#define ASSIGN_ENUM_IF(str, lhs, etype) STRCMP(node_name, str) \
+lhs = (etype)atoi(value)
+
+
+// Constants shared across many system components
+#define BITS_PER_BYTE 8.0
+#define MIN_BUFFER_SIZE 64
+// CAM structures do not have any associativity
+#define CAM_ASSOC 0
+
+#endif // __COMMON_H__
diff --git a/ext/mcpat/core.cc b/ext/mcpat/core.cc
index ba9106061..b25c23cac 100644
--- a/ext/mcpat/core.cc
+++ b/ext/mcpat/core.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -33,491 +34,570 @@
#include <cassert>
#include <cmath>
#include <iostream>
+#include <sstream>
#include <string>
-#include "XML_Parse.h"
#include "basic_circuit.h"
+#include "basic_components.h"
+#include "common.h"
#include "const.h"
#include "core.h"
#include "io.h"
#include "parameter.h"
-//#include "globalvar.h"
-
-InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IB (0),
- BTB (0),
- ID_inst (0),
- ID_operand (0),
- ID_misc (0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false, is_default = true;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
- //Assuming all L1 caches are virtually idxed physically tagged.
- //cache
-
- size = (int)XML->sys.core[ithCore].icache.icache_config[0];
- line = (int)XML->sys.core[ithCore].icache.icache_config[1];
- assoc = (int)XML->sys.core[ithCore].icache.icache_config[2];
- banks = (int)XML->sys.core[ithCore].icache.icache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- // interface_ip.obj_func_dyn_energy = 0;
- // interface_ip.obj_func_dyn_power = 0;
- // interface_ip.obj_func_leak_power = 0;
- // interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
- scktRatio = g_tp.sckt_co_eff;
- chip_PR_overhead = g_tp.chip_layout_overhead;
- macro_PR_overhead = g_tp.macro_layout_overhead;
- icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
- area.set_area(area.get_area()+ icache.caches->local_result.area);
- //output_data_csv(icache.caches.local_result);
-
-
- /*
- *iCache controllers
- *miss buffer Each MSHR contains enough state
- *to handle one or more accesses of any type to a single memory line.
- *Due to the generality of the MSHR mechanism,
- *the amount of state involved is non-trivial:
- *including the address, pointers to the cache entry and destination register,
- *written data, and various other pieces of state.
- */
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
- area.set_area(area.get_area()+ icache.missb->local_result.area);
- //output_data_csv(icache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = icache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
- area.set_area(area.get_area()+ icache.ifb->local_result.area);
- //output_data_csv(icache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
- //output_data_csv(icache.prefetchb.local_result);
-
- //Instruction buffer
- data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- interface_ip.pure_cam = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
- XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
- area.set_area(area.get_area()+ IB->local_result.area);
- //output_data_csv(IB.IB.local_result);
-
- // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
- // inst_decoder.init_decoder(is_default, &interface_ip);
- // inst_decoder.full_decoder_power();
-
- if (coredynp.predictionW>0)
- {
- /*
- * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
- * It is only a cache without all the buffers in the cache controller since it is more like a
- * look up table than a cache with cache controller. When access miss, no load from other places
- * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
- * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
- * 2) When BPT@ID stage predicts differently than BTB
- * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
- * 4) when EXEU find out wrong target has been provided from BTB.
- *
- */
- size = XML->sys.core[ithCore].BTB.BTB_config[0];
- line = XML->sys.core[ithCore].BTB.BTB_config[1];
- assoc = XML->sys.core[ithCore].BTB.BTB_config[2];
- banks = XML->sys.core[ithCore].BTB.BTB_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
-// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- interface_ip.pure_cam = false;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:size;
- interface_ip.line_sz = debug?64:line;
- interface_ip.assoc = debug?8:assoc;
- interface_ip.nbanks = debug?1:banks;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
- area.set_area(area.get_area()+ BTB->local_result.area);
- ///cout<<"area="<<area<<endl;
-
- BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp);
- area.set_area(area.get_area()+ BPT->area.get_area());
- }
-
- ID_inst = new inst_decoder(is_default, &interface_ip,
- coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_operand = new inst_decoder(is_default, &interface_ip,
- coredynp.arch_ireg_width, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_misc = new inst_decoder(is_default, &interface_ip,
- 8/* Prefix field etc upto 14B*/, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
- //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
- //So the dynamic power should be multiplied by a few times.
- area.set_area(area.get_area()+ (ID_inst->area.get_area()
- +ID_operand->area.get_area()
- +ID_misc->area.get_area())*coredynp.decodeW);
-}
+int RegFU::RFWIN_ACCESS_MULTIPLIER = 16;
+
+// The five bits are: busy, Issued, Finished, speculative, valid
+int SchedulerU::ROB_STATUS_BITS = 5;
+
+InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL),
+ BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int idx, tag, data, size, line, assoc, banks;
+ bool is_default = true;
+
+ clockRate = core_params.clockRate;
+ name = "Instruction Fetch Unit";
+ // Check if there is an icache child:
+ int i;
+ icache = NULL;
+ for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR name = childXML->getAttribute("name");
+ if (strcmp(name, "Instruction Cache") == 0 ||
+ strcmp(name, "icache") == 0) {
+ icache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(icache);
+ }
+ }
+ }
+ set_params_stats();
-BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- globalBPT(0),
- localBPT(0),
- L1_localBPT(0),
- L2_localBPT(0),
- chooser(0),
- RAS(0),
- exist(exist_)
-{
+ //Instruction buffer
+ data = core_params.instruction_length * core_params.peak_issueW;
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = core_params.num_hthreads * core_params.instruction_buffer_size *
+ line;
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.instruction_buffer_assoc;
+ interface_ip.nbanks = core_params.instruction_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0;
+ interface_ip.tag_w = core_params.instruction_buffer_tag_width;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports =
+ core_params.number_instruction_fetch_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+
+ IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ IB->area.set_area(IB->area.get_area() + IB->local_result.area);
+ area.set_area(area.get_area() + IB->local_result.area);
+
+ if (core_params.predictionW > 0) {
/*
- * Branch Predictor, accessed during ID stage.
- * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
- * including global predictor, local two level predictor, and Chooser.
- * The Branch predictor also includes a RAS (return address stack) for function calls
- * Branch predictors are tagged by thread ID and modeled as 1-way associative $
- * However RAS return address stacks are duplicated for each thread.
- * TODO:Data Width need to be computed more precisely *
+ * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
+ * It is only a cache without all the buffers in the cache controller since it is more like a
+ * look up table than a cache with cache controller. When access miss, no load from other places
+ * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
+ * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
+ * 2) When BPT@ID stage predicts differently than BTB
+ * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
+ * 4) when EXEU find out wrong target has been provided from BTB.
+ *
*/
- if (!exist) return;
- int tag, data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.assoc = 1;
- interface_ip.pure_cam = false;
- if (coredynp.multithreaded)
- {
-
- tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
-
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- }
- else
- {
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
-
- }
- //Global predictor
- data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ size = inst_fetch_params.btb_size;
+ line = inst_fetch_params.btb_block_size;
+ assoc = inst_fetch_params.btb_assoc;
+ banks = inst_fetch_params.btb_num_banks;
+ idx = int(ceil(log2(size / line / assoc)));
+ tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads)))
+ + EXTRA_TAG_BITS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = assoc;
+ interface_ip.nbanks = banks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 1;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
- globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
- area.set_area(area.get_area()+ globalBPT->local_result.area);
-
- //Local BPT (Level 1)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
- area.set_area(area.get_area()+ L1_localBPT->local_result.area);
-
- //Local BPT (Level 2)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
- area.set_area(area.get_area()+ L2_localBPT->local_result.area);
-
- //Chooser
- data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
- chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
- area.set_area(area.get_area()+ chooser->local_result.area);
-
- //RAS return address stacks are Duplicated for each thread.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- data = int(ceil(coredynp.pc_width/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate;
+ interface_ip.latency = inst_fetch_params.btb_latency / clockRate;
+
+ BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + BTB->local_result.area);
+
+ BPT = new BranchPredictor(xml_data, &interface_ip,
+ core_params, core_stats);
+ area.set_area(area.get_area() + BPT->area.get_area());
+ }
+
+ ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder",
+ is_default, &interface_ip,
+ core_params.opcode_width,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+
+ ID_operand = new InstructionDecoder(xml_data,
+ "Instruction Operand Decoder",
+ is_default, &interface_ip,
+ core_params.arch_ireg_width,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+
+ ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder",
+ is_default, &interface_ip,
+ core_params.micro_opcode_length,
+ core_params.decodeW,
+ core_params.x86, clockRate,
+ Core_device, core_params.core_ty);
+ area.set_area(area.get_area()+ (ID_inst->area.get_area()
+ + ID_operand->area.get_area()
+ + ID_misc->area.get_area())
+ * core_params.decodeW);
+}
+
+void
+InstFetchU::set_params_stats() {
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ memset(&inst_fetch_params,0,sizeof(InstFetchParameters));
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "BranchTargetBuffer") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("size", inst_fetch_params.btb_size);
+ ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size);
+ ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc);
+ ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks);
+ ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency);
+ ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput);
+ ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("read_accesses",
+ inst_fetch_stats.btb_read_accesses);
+ ASSIGN_FP_IF("write_accesses",
+ inst_fetch_stats.btb_write_accesses);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ }
+ }
+
+ // Parameter sanity check
+ if (inst_fetch_params.btb_size <= 0) {
+ errorNonPositiveParam("size");
+ }
+
+ if (inst_fetch_params.btb_block_size <= 0) {
+ errorNonPositiveParam("block_size");
+ }
+
+ if (inst_fetch_params.btb_assoc <= 0) {
+ errorNonPositiveParam("assoc");
+ }
+
+ if (inst_fetch_params.btb_num_banks <= 0) {
+ errorNonPositiveParam("num_banks");
+ }
+}
+
+BranchPredictor::BranchPredictor(XMLNode* _xml_data,
+ InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_)
+ : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL),
+ L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int size;
+
+ clockRate = core_params.clockRate;
+ name = "Branch Predictor";
+
+ // Common interface parameters for the branch predictor structures
+ interface_ip.pure_cam = false;
+
+ if (core_params.multithreaded) {
+ tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS);
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ } else {
+ interface_ip.specific_tag = 0;
+ interface_ip.tag_w = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ }
+
+ // Parse params and stats from XML
+ set_params_stats();
+
+ // Common interface parameters for the branch predictor structures
+ interface_ip.assoc = branch_pred_params.assoc;
+ interface_ip.nbanks = branch_pred_params.nbanks;
+
+ //Global predictor
+ data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE));
+ size = data * branch_pred_params.global_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + globalBPT->local_result.area);
+
+ //Local BPT (Level 1)
+ data = int(ceil(branch_pred_params.local_l1_predictor_size /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.local_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ L1_localBPT = new ArrayST(xml_data, &interface_ip,
+ "Local Predictor, Level 1",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ L1_localBPT->area.set_area(L1_localBPT->area.get_area() +
+ L1_localBPT->local_result.area);
+ area.set_area(area.get_area()+ L1_localBPT->local_result.area);
+
+ //Local BPT (Level 2)
+ data = int(ceil(branch_pred_params.local_l2_predictor_size /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.local_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ L2_localBPT = new ArrayST(xml_data, &interface_ip,
+ "Local Predictor, Level 2",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + L2_localBPT->local_result.area);
+
+ //Chooser
+ data = int(ceil(branch_pred_params.chooser_predictor_bits /
+ BITS_PER_BYTE));
+ size = data * branch_pred_params.chooser_predictor_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ area.set_area(area.get_area() + chooser->local_result.area);
+
+ //RAS return address stacks are Duplicated for each thread.
+ data = int(ceil(core_params.pc_width / BITS_PER_BYTE));
+ size = data * core_params.RAS_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.predictionW;
+ interface_ip.num_wr_ports = core_params.predictionW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate,
+ core_params.opt_local, core_params.core_ty);
+ RAS->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + RAS->local_result.area *
+ core_params.num_hthreads);
+
+}
+
+void
+BranchPredictor::set_params_stats() {
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "BranchPredictor") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("assoc", branch_pred_params.assoc);
+ ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks);
+ ASSIGN_INT_IF("local_l1_predictor_size",
+ branch_pred_params.local_l1_predictor_size);
+ ASSIGN_INT_IF("local_l2_predictor_size",
+ branch_pred_params.local_l2_predictor_size);
+ ASSIGN_INT_IF("local_predictor_entries",
+ branch_pred_params.local_predictor_entries);
+ ASSIGN_INT_IF("global_predictor_entries",
+ branch_pred_params.global_predictor_entries);
+ ASSIGN_INT_IF("global_predictor_bits",
+ branch_pred_params.global_predictor_bits);
+ ASSIGN_INT_IF("chooser_predictor_entries",
+ branch_pred_params.chooser_predictor_entries);
+ ASSIGN_INT_IF("chooser_predictor_bits",
+ branch_pred_params.chooser_predictor_bits);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ // The core reads in the number of branches and the number of
+ // function calls and these values are passed through the
+ // core_stats variable, so we don't need to read them in here
+ }
+ }
+}
+
+SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), int_inst_window(NULL),
+ fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL),
+ fp_instruction_selection(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int size;
+ int line;
+ bool is_default = true;
+ string tmp_name;
+
+ clockRate = core_params.clockRate;
+ name = "Instruction Scheduler";
+ if ((core_params.core_ty == Inorder && core_params.multithreaded)) {
+ //Instruction issue queue, in-order multi-issue or multithreaded
+ //processor also has this structure. Unified window for Inorder
+ //processors
+ //This tag width is the normal thread state bits based on
+ //Niagara Design
+ tag = int(log2(core_params.num_hthreads) * core_params.perThreadState);
+ data = core_params.instruction_length;
+ line = int(ceil(data / BITS_PER_BYTE));
+ size = core_params.instruction_window_size * line;
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ //NOTE: x86 inst can be very lengthy, up to 15B.
+ //Source: Intel® 64 and IA-32 Architectures
+ //Software Developer’s Manual
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_issueW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
interface_ip.num_se_rd_ports = 0;
- RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
- RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
- area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
+ interface_ip.num_search_ports = core_params.peak_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ int_inst_window = new ArrayST(xml_data, &interface_ip,
+ "InstFetchQueue", Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ int_inst_window->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + int_inst_window->local_result.area *
+ core_params.num_pipelines);
+ Iw_height = int_inst_window->local_result.cache_ht;
-}
+ /*
+ * selection logic
+ * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
+ * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
+ * at the issue stage.
+ */
-SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- int_inst_window(0),
- fp_inst_window(0),
- ROB(0),
- instruction_selection(0),
- exist(exist_)
- {
- if (!exist) return;
- int tag, data;
- bool is_default=true;
- string tmp_name;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
- {
- //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
- tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
- data = XML->sys.core[ithCore].instruction_length;
- //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
- //Software Developer’s Manual
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- //output_data_csv(iRS.RS.local_result);
- Iw_height =int_inst_window->local_result.cache_ht;
-
- /*
- * selection logic
- * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
- * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
- * at the issue stage.
- */
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
- &interface_ip, Core_device, coredynp.core_ty);
+ int_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.instruction_window_size,
+ core_params.peak_issueW *
+ core_params.num_hthreads,
+ &interface_ip,
+ "Int Instruction Selection Logic",
+ core_stats.inst_window_wakeup_accesses,
+ clockRate, Core_device, core_params.core_ty);
+
+ if (core_params.fp_instruction_window_size > 0) {
+ fp_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.fp_instruction_window_size,
+ core_params.fp_issueW *
+ core_params.num_hthreads,
+ &interface_ip,
+ "FP Instruction Selection Logic",
+ core_stats.fp_inst_window_wakeup_accesses,
+ clockRate, Core_device,
+ core_params.core_ty);
}
+ }
- if (coredynp.core_ty==OOO)
- {
+ if (core_params.core_ty == OOO) {
/*
* CAM based instruction window
* For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
@@ -525,3611 +605,3405 @@ SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* in
* It is written once and read twice(two operands) before an instruction can be issued.
* X86 instruction can be very long up to 15B. add instruction length in XML
*/
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
- tmp_name = "InstIssueQueue";
- }
- else
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
- 2*coredynp.int_data_width)/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
-
- tmp_name = "IntReservationStation";
- }
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 2*1.0/clockRate;
- interface_ip.latency = 2*1.0/clockRate;
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ tag = core_params.phy_ireg_width;
+ data = int((ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_ireg_width -
+ core_params.arch_ireg_width)) /
+ (double)NUM_SOURCE_OPERANDS) /
+ BITS_PER_BYTE));
+ tmp_name = "Integer Instruction Window";
+ } else {
+ tag = core_params.phy_ireg_width;
+ data = int(ceil(((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_ireg_width -
+ core_params.arch_ireg_width) +
+ 2 * core_params.int_data_width) /
+ (double)NUM_SOURCE_OPERANDS) /
+ BITS_PER_BYTE));
+ tmp_name = "Integer Reservation Station";
+ }
+
+ size = data * core_params.instruction_window_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_issueW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.peak_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
+ interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
+ int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name,
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ int_inst_window->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + int_inst_window->local_result.area *
+ core_params.num_pipelines);
+ Iw_height = int_inst_window->local_result.cache_ht;
+
+ //FU inst window
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width;
+ data = int(ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_freg_width -
+ core_params.arch_freg_width)) / BITS_PER_BYTE));
+ tmp_name = "FP Instruction Window";
+ } else {
+ tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width;
+ data = int(ceil((core_params.instruction_length +
+ NUM_SOURCE_OPERANDS *
+ (core_params.phy_freg_width -
+ core_params.arch_freg_width) +
+ NUM_SOURCE_OPERANDS * core_params.fp_data_width) /
+ BITS_PER_BYTE));
+ tmp_name = "FP Reservation Station";
+ }
+
+ size = data * core_params.fp_instruction_window_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.scheduler_assoc;
+ interface_ip.nbanks = core_params.scheduler_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- Iw_height =int_inst_window->local_result.cache_ht;
- //FU inst window
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
- tmp_name = "FPIssueQueue";
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.fp_issueW;
+ interface_ip.num_wr_ports = core_params.fp_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.fp_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fp_inst_window =
+ new ArrayST(xml_data, &interface_ip, tmp_name, Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ fp_inst_window->output_data.area *= core_params.num_fp_pipelines;
+ area.set_area(area.get_area() + fp_inst_window->local_result.area
+ *core_params.num_fp_pipelines);
+ fp_Iw_height = fp_inst_window->local_result.cache_ht;
+
+ if (core_params.ROB_size > 0) {
+ /*
+ * if ROB_size = 0, then the target processor does not support hardware-based
+ * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
+ * means branch must be resolved before instruction issued into instruction window, since
+ * there is no change to flush miss-predict branch path after instructions are issued in this situation.
+ *
+ * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
+ * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
+ * However, this approach is abandoned due to its high power and poor scalablility.
+ * McPAT uses current implementation of ROB as circular buffer.
+ * ROB is written once when instruction is issued and read once when the instruction is committed. *
+ */
+ int robExtra = int(ceil(ROB_STATUS_BITS +
+ log2(core_params.num_hthreads)));
+
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ //PC is to id the instruction for recover exception.
+ //inst is used to map the renamed dest. registers. so that
+ //commit stage can know which reg/RRAT to update
+ data = int(ceil((robExtra + core_params.pc_width +
+ core_params.phy_ireg_width) / BITS_PER_BYTE));
+ } else {
+ //in RS based OOO, ROB also contains value of destination reg
+ data = int(ceil((robExtra + core_params.pc_width +
+ core_params.phy_ireg_width +
+ core_params.fp_data_width) / BITS_PER_BYTE));
+ }
+
+ interface_ip.cache_sz = data * core_params.ROB_size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = core_params.ROB_assoc;
+ interface_ip.nbanks = core_params.ROB_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.ROB_tag_width > 0;
+ interface_ip.tag_w = core_params.ROB_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.peak_commitW;
+ interface_ip.num_wr_ports = core_params.peak_issueW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ROB->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + ROB->local_result.area *
+ core_params.num_pipelines);
+ ROB_height = ROB->local_result.cache_ht;
+ }
+
+ int_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.instruction_window_size,
+ core_params.peak_issueW, &interface_ip,
+ "Int Instruction Selection Logic",
+ core_stats.inst_window_wakeup_accesses,
+ clockRate, Core_device, core_params.core_ty);
+
+ if (core_params.fp_instruction_window_size > 0) {
+ fp_instruction_selection =
+ new selection_logic(xml_data, is_default,
+ core_params.fp_instruction_window_size,
+ core_params.fp_issueW, &interface_ip,
+ "FP Instruction Selection Logic",
+ core_stats.fp_inst_window_wakeup_accesses,
+ clockRate, Core_device,
+ core_params.core_ty);
}
- else
- {
- tag = 2*coredynp.phy_ireg_width;
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
- 2*coredynp.fp_data_width)/8.0));
- tmp_name = "FPReservationStation";
+
+ }
+}
+
+LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int line;
+ int size;
+ int ldst_opcode = core_params.opcode_width;
+
+ clockRate = core_params.clockRate;
+ name = "Load/Store Unit";
+
+ // Check if there is a dcache child:
+ int i;
+ dcache = NULL;
+ for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR name = childXML->getAttribute("name");
+ if (strcmp(name, "Data Cache") == 0 ||
+ strcmp(name, "dcache") == 0) {
+ dcache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(dcache);
+ }
}
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ }
+
+ /*
+ * LSU--in-order processors do not have separate load queue: unified lsq
+ * partitioned among threads
+ * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
+ */
+ tag = ldst_opcode + virtual_address_width +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ line = int(ceil(data_path_width / BITS_PER_BYTE));
+ size = core_params.store_buffer_size * line * core_params.num_hthreads;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.store_buffer_assoc;
+ interface_ip.nbanks = core_params.store_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + LSQ->local_result.area);
+ area.set_area(area.get_area()*cdb_overhead);
+ lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead);
+
+ if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) {
+ tag = ldst_opcode + virtual_address_width +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ line = int(ceil(data_path_width / BITS_PER_BYTE));
+ size = core_params.load_buffer_size * line * core_params.num_hthreads;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.load_buffer_assoc;
+ interface_ip.nbanks = core_params.load_buffer_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_issueW;
- interface_ip.num_wr_ports = coredynp.fp_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.fp_issueW;
- fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- fp_Iw_height =fp_inst_window->local_result.cache_ht;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- /*
- * if ROB_size = 0, then the target processor does not support hardware-based
- * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
- * means branch must be resolved before instruction issued into instruction window, since
- * there is no change to flush miss-predict branch path after instructions are issued in this situation.
- *
- * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
- * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
- * However, this approach is abandoned due to its high power and poor scalablility.
- * McPAT uses current implementation of ROB as circular buffer.
- * ROB is written once when instruction is issued and read once when the instruction is committed. *
- */
- int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
- //5 bits are: busy, Issued, Finished, speculative, valid
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- //PC is to id the instruction for recover exception.
- //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
- data = int(ceil((robExtra+coredynp.pc_width +
- coredynp.phy_ireg_width)/8.0));
- }
- else
- {
- //in RS based OOO, ROB also contains value of destination reg
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
- data = int(ceil((robExtra + coredynp.pc_width +
- coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
- }
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_commitW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = 0;
- ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- ROB_height =ROB->local_result.cache_ht;
- }
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device,
+ clockRate, core_params.opt_local,
+ core_params.core_ty);
+ LoadQ->area.set_area(LoadQ->area.get_area() +
+ LoadQ->local_result.area);
+ area.set_area(area.get_area()*cdb_overhead);
+ lsq_height = (LSQ->local_result.cache_ht +
+ LoadQ->local_result.cache_ht) * sqrt(cdb_overhead);
}
+
}
-LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- LSQ(0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false;
- int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];
-
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- //Dcache
- size = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
- line = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
- assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
- banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
- area.set_area(area.get_area()+ dcache.caches->local_result.area);
- //output_data_csv(dcache.caches.local_result);
-
- //dCache controllers
- //miss buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
- area.set_area(area.get_area()+ dcache.missb->local_result.area);
- //output_data_csv(dcache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
- area.set_area(area.get_area()+ dcache.ifb->local_result.area);
- //output_data_csv(dcache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
- //output_data_csv(dcache.prefetchb.local_result);
-
- //WBB
-
- if (cache_p==Write_back)
- {
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
- area.set_area(area.get_area()+ dcache.wbb->local_result.area);
- //output_data_csv(dcache.wbb.local_result);
- }
-
- /*
- * LSU--in-order processors do not have separate load queue: unified lsq
- * partitioned among threads
- * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
- */
- tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
- data = XML->sys.machine_bits;
- interface_ip.is_cache = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(LSQ.LSQ.local_result);
- lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
-
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(LoadQ.LoadQ.local_result);
- lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
- }
+MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int line;
+
+ clockRate = core_params.clockRate;
+ name = "Memory Management Unit";
+
+ set_params_stats();
+
+ // These are shared between ITLB and DTLB
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ //Itlb TLBs are partioned among threads according to Nigara and Nehalem
+ tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = mem_man_params.itlb_number_entries * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = mem_man_params.itlb_assoc;
+ interface_ip.nbanks = mem_man_params.itlb_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.number_instruction_fetch_ports;
+ interface_ip.throughput = mem_man_params.itlb_throughput / clockRate;
+ interface_ip.latency = mem_man_params.itlb_latency / clockRate;
+ itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + itlb->local_result.area);
+
+ //dtlb
+ tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
+ int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
+ data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = mem_man_params.dtlb_assoc;
+ interface_ip.nbanks = mem_man_params.dtlb_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.memory_ports;
+ interface_ip.num_wr_ports = core_params.memory_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.memory_ports;
+ interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate;
+ interface_ip.latency = mem_man_params.dtlb_latency / clockRate;
+ dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ area.set_area(area.get_area() + dtlb->local_result.area);
}
-MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- itlb(0),
- dtlb(0),
- exist(exist_)
-{
- if (!exist) return;
- int tag, data;
- bool debug= false;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.specific_tag = 1;
- //Itlb TLBs are partioned among threads according to Nigara and Nehalem
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
- area.set_area(area.get_area()+ itlb->local_result.area);
- //output_data_csv(itlb.tlb.local_result);
-
- //dtlb
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
- area.set_area(area.get_area()+ dtlb->local_result.area);
- //output_data_csv(dtlb.tlb.local_result);
+void
+MemManU::set_params_stats() {
+ memset(&mem_man_params, 0, sizeof(MemoryManagementParams));
+ memset(&mem_man_stats, 0, sizeof(MemoryManagementStats));
+ int num_children = xml_data->nChildNode("component");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* child = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = child->getAttribute("type");
+
+ if (!type)
+ warnMissingComponentType(child->getAttribute("id"));
+
+ STRCMP(type, "InstructionTLB") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("number_entries",
+ mem_man_params.itlb_number_entries);
+ ASSIGN_FP_IF("latency", mem_man_params.itlb_latency);
+ ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput);
+ ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc);
+ ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("total_accesses",
+ mem_man_stats.itlb_total_accesses);
+ ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses);
+ ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ } STRCMP(type, "DataTLB") {
+ int sub_num_children = child->nChildNode("param");
+ int j;
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* paramNode = child->getChildNodePtr("param", &j);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("number_entries",
+ mem_man_params.dtlb_number_entries);
+ ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency);
+ ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput);
+ ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc);
+ ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+ sub_num_children = child->nChildNode("stat");
+ for (j = 0; j < sub_num_children; j++) {
+ XMLNode* statNode = child->getChildNodePtr("stat", &j);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("read_accesses",
+ mem_man_stats.dtlb_read_accesses);
+ ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses);
+ ASSIGN_FP_IF("write_accesses",
+ mem_man_stats.dtlb_write_accesses);
+ ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses);
+ ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+ }
+ }
}
-RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IRF (0),
- FRF (0),
- RFWIN (0),
- exist(exist_)
- {
- /*
- * processors have separate architectural register files for each thread.
- * therefore, the bypass buses need to travel across all the register files.
- */
- if (!exist) return;
- int data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- //**********************************IRF***************************************
- data = coredynp.int_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
- area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(IRF.RF.local_result);
-
- //**********************************FRF***************************************
- data = coredynp.fp_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL),
+ interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ /*
+ * processors have separate architectural register files for each thread.
+ * therefore, the bypass buses need to travel across all the register files.
+ */
+ if (!exist) return;
+ int data;
+ int line;
+
+ clockRate = core_params.clockRate;
+ name = "Register File Unit";
+
+ //**********************************IRF************************************
+ data = core_params.int_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.num_IRF_entry * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.phy_Regs_IRF_assoc;
+ interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0;
+ interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports;
+ interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ IRF->output_data.area *= core_params.num_hthreads *
+ core_params.num_pipelines * cdb_overhead;
+ area.set_area(area.get_area() + IRF->local_result.area *
+ core_params.num_hthreads * core_params.num_pipelines *
+ cdb_overhead);
+
+ //**********************************FRF************************************
+ data = core_params.fp_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.num_FRF_entry * line;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.phy_Regs_FRF_assoc;
+ interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0;
+ interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports;
+ interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device,
+ clockRate, core_params.opt_local, core_params.core_ty);
+ FRF->output_data.area *= core_params.num_hthreads *
+ core_params.num_fp_pipelines * cdb_overhead;
+ area.set_area(area.get_area() + FRF->local_result.area *
+ core_params.num_hthreads * core_params.num_fp_pipelines *
+ cdb_overhead);
+ int_regfile_height = IRF->local_result.cache_ht *
+ core_params.num_hthreads * sqrt(cdb_overhead);
+ fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads *
+ sqrt(cdb_overhead);
+ //since a EXU is associated with each pipeline, the cdb should not have
+ //longer length.
+
+ if (core_params.regWindowing) {
+ //*********************************REG_WIN*****************************
+ //ECC, and usually 2 regs are transfered together during window
+ //shifting.Niagara Mega cell
+ data = core_params.int_data_width;
+ line = int(ceil(data / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = core_params.register_window_size *
+ IRF->l_ip.cache_sz * core_params.num_hthreads;
+ interface_ip.line_sz = line;
+ interface_ip.assoc = core_params.register_window_assoc;
+ interface_ip.nbanks = core_params.register_window_nbanks;
+ interface_ip.out_w = line * BITS_PER_BYTE;
+ interface_ip.specific_tag = core_params.register_window_tag_width > 0;
+ interface_ip.tag_w = core_params.register_window_tag_width;
+ interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.register_window_rw_ports;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
- FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
- area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(FRF.RF.local_result);
- int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
- fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
- //since a EXU is associated with each pipeline, the cdb should not have longer length.
- if (coredynp.regWindowing)
- {
- //*********************************REG_WIN************************************
- data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 4.0/clockRate;
- interface_ip.latency = 4.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
- RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- //output_data_csv(RFWIN.RF.local_result);
- }
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput =
+ core_params.register_window_throughput / clockRate;
+ interface_ip.latency =
+ core_params.register_window_latency / clockRate;
+ RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device,
+ clockRate, core_params.opt_local,
+ core_params.core_ty);
+ RFWIN->output_data.area *= core_params.num_pipelines;
+ area.set_area(area.get_area() + RFWIN->local_result.area *
+ core_params.num_pipelines);
+ }
+}
+EXECU::EXECU(XMLNode* _xml_data,
+ InputParameter* interface_ip_, double lsq_height_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL),
+ exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL),
+ int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL),
+ fpTagBypass(NULL), interface_ip(*interface_ip_),
+ lsq_height(lsq_height_), core_params(_core_params),
+ core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ double fu_height = 0.0;
+ clockRate = core_params.clockRate;
+ name = "Execution Unit";
+ rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats);
+ if (core_params.core_ty == OOO ||
+ (core_params.core_ty == Inorder && core_params.multithreaded)) {
+ scheu = new SchedulerU(xml_data, &interface_ip, core_params,
+ core_stats);
+ area.set_area(area.get_area() + scheu->area.get_area() );
+ }
+ exeu = new FunctionalUnit(xml_data, &interface_ip, core_params,
+ core_stats, ALU);
+ area.set_area(area.get_area() + exeu->area.get_area() +
+ rfu->area.get_area());
+ fu_height = exeu->FU_height;
+ if (core_params.num_fpus > 0) {
+ fp_u = new FunctionalUnit(xml_data, &interface_ip,
+ core_params, core_stats, FPU);
+ area.set_area(area.get_area() + fp_u->area.get_area());
+ }
+ if (core_params.num_muls > 0) {
+ mul = new FunctionalUnit(xml_data, &interface_ip,
+ core_params, core_stats, MUL);
+ area.set_area(area.get_area() + mul->area.get_area());
+ fu_height += mul->FU_height;
+ }
+ /*
+ * broadcast logic, including int-broadcast; int_tag-broadcast;
+ * fp-broadcast; fp_tag-broadcast
+ * integer by pass has two paths and fp has 3 paths.
+ * on the same bus there are multiple tri-state drivers and muxes that go
+ * to different components on the same bus
+ */
+ interface_ip.wt = core_params.execu_broadcast_wt;
+ interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type;
+ interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type;
+ interface_ip.throughput = core_params.broadcast_numerator / clockRate;
+ interface_ip.latency = core_params.broadcast_numerator / clockRate;
+ double scheu_Iw_height = 0.0;
+ double scheu_ROB_height = 0.0;
+ double scheu_fp_Iw_height = 0.0;
+ if (scheu) {
+ scheu_Iw_height = scheu->Iw_height;
+ scheu_ROB_height = scheu->ROB_height;
+ scheu_fp_Iw_height = scheu->fp_Iw_height;
+ }
- }
-
-EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- lsq_height(lsq_height_),
- coredynp(dyn_p_),
- rfu(0),
- scheu(0),
- fp_u(0),
- exeu(0),
- mul(0),
- int_bypass(0),
- intTagBypass(0),
- int_mul_bypass(0),
- intTag_mul_Bypass(0),
- fp_bypass(0),
- fpTagBypass(0),
- exist(exist_)
-{
- if (!exist) return;
- double fu_height = 0.0;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- rfu = new RegFU(XML, ithCore, &interface_ip,coredynp);
- scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
- exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU);
- area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
- fu_height = exeu->FU_height;
- if (coredynp.num_fpus >0)
- {
- fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU);
- area.set_area(area.get_area()+ fp_u->area.get_area());
- }
- if (coredynp.num_muls >0)
- {
- mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL);
- area.set_area(area.get_area()+ mul->area.get_area());
- fu_height += mul->FU_height;
- }
- /*
- * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
- * integer by pass has two paths and fp has 3 paths.
- * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
- */
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
- interface_ip.wire_os_mat_type = 2;
- interface_ip.throughput = 10.0/clockRate; //Do not care
- interface_ip.latency = 10.0/clockRate;
- }
-
- if (coredynp.core_ty==Inorder)
- {
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {//OOO
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- /* For physical register based OOO,
- * data broadcast interconnects cover across functional units, lsq, inst windows and register files,
- * while tag broadcast interconnects also cover across ROB
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {
- /*
- * In RS based processor both data and tag are broadcast together,
- * covering functional units, lsq, nst windows, register files, and ROBs
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
-
-
- }
- area.set_area(area.get_area()+ bypass.area.get_area());
-}
+ // Common bypass logic parameters
+ double base_w = core_params.execu_bypass_base_width;
+ double base_h = core_params.execu_bypass_base_height;
+ int level = core_params.execu_bypass_start_wiring_level;
+ double route_over_perc = core_params.execu_bypass_route_over_perc;
+ Wire_type wire_type = core_params.execu_bypass_wire_type;
+ int data_w;
+ double len;
+
+ if (core_params.core_ty == Inorder) {
+ data_w = int(ceil(data_path_width / 32.0)*32);
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate, false,
+ route_over_perc, core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate, false,
+ route_over_perc, core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(data_path_width / 32.0)*32*1.5);
+ len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
+ lsq_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
+ lsq_height + scheu_Iw_height;
+ intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
+
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(data_path_width / 32.0)*32*1.5);
+ len = rfu->fp_regfile_height + fp_u->FU_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device,
+ base_w, base_h, data_w, len,
+ &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.perThreadState;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_Iw_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ } else {//OOO
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ /* For physical register based OOO,
+ * data broadcast interconnects cover across functional units, lsq,
+ * inst windows and register files,
+ * while tag broadcast interconnects also cover across ROB
+ */
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ intTag_mul_Bypass = new Interconnect(xml_data,
+ "Mul Bypass Tag",
+ Core_device, base_w,
+ base_h, data_w, len,
+ &interface_ip, level,
+ clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
-RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- iFRAT(0),
- fFRAT(0),
- iRRAT(0),
- fRRAT(0),
- ifreeL(0),
- ffreeL(0),
- idcl(0),
- fdcl(0),
- RAHT(0),
- exist(exist_)
- {
- /*
- * Although renaming logic maybe be used in in-order processors,
- * McPAT assumes no renaming logic is used since the performance gain is very limited and
- * the only major inorder processor with renaming logic is Itainium
- * that is a VLIW processor and different from current McPAT's model.
- * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
- * i,f prefix mean int and fp
- * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
- * FRAT will be read twice and written once per instruction;
- * RRAT will be write once per instruction when committing and reads out all when context switch
- * checkpointing is implicit
- * Renaming logic is duplicated for each different hardware threads
- *
- * No Dual-RAT is needed in RS-based OOO processors,
- * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
- * to make sure all the renamings associated with the ROB to be released are updated at the same time.
- * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
- * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
- *
- * Both RAM and CAM have same DCL
- */
- if (!exist) return;
- int tag, data, out_w;
-// interface_ip.wire_is_mat_type = 0;
-// interface_ip.wire_os_mat_type = 0;
-// interface_ip.wt = Global_30;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if (coredynp.core_ty==OOO)
- {
- //integer pipeline
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions
- data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
-// data = int(ceil(coredynp.phy_ireg_width/8.0));
- out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
-// //RAHT According to Intel, combine GC with FRAT is very costly.
-// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry);
-// out_w = data;
-// interface_ip.is_cache = false;
-// interface_ip.pure_cam = false;
-// interface_ip.pure_ram = true;
-// interface_ip.line_sz = data;
-// interface_ip.cache_sz = data*coredynp.globalCheckpoint;
-// interface_ip.assoc = 1;
-// interface_ip.nbanks = 1;
-// interface_ip.out_w = out_w*8;
-// interface_ip.access_mode = 0;
-// interface_ip.throughput = 1.0/clockRate;
-// interface_ip.latency = 1.0/clockRate;
-// interface_ip.obj_func_dyn_energy = 0;
-// interface_ip.obj_func_dyn_power = 0;
-// interface_ip.obj_func_leak_power = 0;
-// interface_ip.obj_func_cycle_t = 1;
-// interface_ip.num_rw_ports = 1;//the extra one port is for GCs
-// interface_ip.num_rd_ports = 2*coredynp.decodeW;
-// interface_ip.num_wr_ports = coredynp.decodeW;
-// interface_ip.num_se_rd_ports = 0;
-// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT");
-// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
-// area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT floating point
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(core_params.fp_data_width));
+ len = rfu->fp_regfile_height + fp_u->FU_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_freg_width;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ } else {
+ /*
+ * In RS based processor both data and tag are broadcast together,
+ * covering functional units, lsq, nst windows, register files, and ROBs
+ */
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ int_bypass = new Interconnect(xml_data, "Int Bypass Data",
+ Core_device, base_w, base_h, data_w,
+ len, &interface_ip, level, clockRate,
+ false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
+ scheu_Iw_height + scheu_ROB_height;
+ intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ if (core_params.num_muls > 0) {
+ data_w = int(ceil(core_params.int_data_width));
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+
+ data_w = core_params.phy_ireg_width;
+ len = rfu->int_regfile_height + exeu->FU_height +
+ mul->FU_height + lsq_height + scheu_Iw_height +
+ scheu_ROB_height;
+ intTag_mul_Bypass = new Interconnect(xml_data,
+ "Mul Bypass Tag",
+ Core_device, base_w,
+ base_h, data_w, len,
+ &interface_ip, level,
+ clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty,
+ wire_type);
+ }
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width;
- data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.decodeW;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT for FP
- tag = coredynp.arch_freg_width;
- data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ if (core_params.num_fpus > 0) {
+ data_w = int(ceil(core_params.fp_data_width));
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip, level,
+ clockRate, false, route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+
+ data_w = core_params.phy_freg_width;
+ len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
+ scheu_fp_Iw_height + scheu_ROB_height;
+ fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
+ Core_device, base_w, base_h,
+ data_w, len, &interface_ip,
+ level, clockRate, false,
+ route_over_perc,
+ core_params.opt_local,
+ core_params.core_ty, wire_type);
+ }
+ }
+ }
+ if (int_bypass) {
+ children.push_back(int_bypass);
+ }
+ if (intTagBypass) {
+ children.push_back(intTagBypass);
+ }
+ if (int_mul_bypass) {
+ children.push_back(int_mul_bypass);
+ }
+ if (intTag_mul_Bypass) {
+ children.push_back(intTag_mul_Bypass);
+ }
+ if (fp_bypass) {
+ children.push_back(fp_bypass);
+ }
+ if (fpTagBypass) {
+ children.push_back(fpTagBypass);
+ }
- }
+ area.set_area(area.get_area() + int_bypass->area.get_area() +
+ intTagBypass->area.get_area());
+ if (core_params.num_muls > 0) {
+ area.set_area(area.get_area() + int_mul_bypass->area.get_area() +
+ intTag_mul_Bypass->area.get_area());
+ }
+ if (core_params.num_fpus > 0) {
+ area.set_area(area.get_area() + fp_bypass->area.get_area() +
+ fpTagBypass->area.get_area());
+ }
+}
- //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_)
+ : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL),
+ fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL),
+ RAHT(NULL), interface_ip(*interface_ip_),
+ core_params(_core_params), core_stats(_core_stats), exist(exist_) {
+ if (!exist) return;
+ int tag;
+ int data;
+ int out_w;
+ int size;
+
+ // Assumption:
+ // We make an implicit design assumption based on the specific structure
+ // that is being modeled.
+ // 1. RAM-based RATs are direct mapped. However, if the associated
+ // scheduler is a reservation station style, the RATs are fully
+ // associative.
+ // 2. Non-CAM based RATs and free lists do not have tags.
+ // 3. Free lists are direct mapped.
+
+ const int RAM_BASED_RAT_ASSOC = 1;
+ const int RS_RAT_ASSOC = 0;
+ const int NON_CAM_BASED_TAG_WIDTH = 0;
+ const int FREELIST_ASSOC = 1;
+
+ clockRate = core_params.clockRate;
+ name = "Rename Unit";
+ if (core_params.core_ty == OOO) {
+ //integer pipeline
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ if (core_params.rm_ty == RAMbased) {
+ //FRAT with global checkpointing (GCs) please see paper tech
+ //report for detailed explaintions
+
+ data = int(ceil(core_params.phy_ireg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+
+ size = data * core_params.archi_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
- iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iRRAT->area.get_area());
-
- //RRAT for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT floating point
+ data = int(ceil(core_params.phy_freg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fRRAT->area.get_area());
-
- //Freelist of renaming unit always RAM based
- //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
- // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
- //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+
+ } else if ((core_params.rm_ty == CAMbased)) {
+ //IRAT
+ tag = core_params.arch_ireg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil((core_params.arch_ireg_width + 1 *
+ core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
- //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
-
- //freelist for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ffreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT for FP
+ tag = core_params.arch_freg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil((core_params.arch_freg_width + 1 *
+ core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ffreeL->area.get_area());
-
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
-
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased){
- /*
- * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
- * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time.
- * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than
- * CAM based RAT so that it is more scalable as number of ROB/physical regs increases.
- */
- tag = coredynp.phy_ireg_width;
- data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= coredynp.commitW;//TODO
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->local_result.adjust_area();
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FP
- tag = coredynp.phy_freg_width;
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->local_result.adjust_area();
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width;
- data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO
- interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT
- tag = coredynp.arch_freg_width;
- data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+ }
- }
- //No RRAT for RS based OOO
- //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
+ //RRAT is always RAM based, does not have GCs, and is used only for
+ //record latest non-speculative mapping
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_IRF_size *
+ NUM_SOURCE_OPERANDS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.retire_rat_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.commitW;
+ interface_ip.num_wr_ports = core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ iRRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iRRAT->area.get_area());
+
+ //RRAT for FP
+ data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size *
+ NUM_SOURCE_OPERANDS;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RAM_BASED_RAT_ASSOC;
+ interface_ip.nbanks = core_params.retire_rat_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ fRRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fRRAT->area.get_area());
+
+ //Freelist of renaming unit always RAM based
+ //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
+ // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
+ //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ifreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports =
+ core_params.decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ifreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ifreeL->area.get_area());
+
+ //freelist for FP
+ data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ffreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.fp_decodeW;
+ interface_ip.num_wr_ports =
+ core_params.fp_decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ffreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ffreeL->area.get_area());
+
+ } else if (core_params.scheu_ty == ReservationStation) {
+ if (core_params.rm_ty == RAMbased) {
+ tag = core_params.phy_ireg_width;
+ data = int(ceil(core_params.phy_ireg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RS_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.commitW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->local_result.adjust_area();
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FP
+ tag = core_params.phy_freg_width;
+ data = int(ceil(core_params.phy_freg_width *
+ (1 + core_params.globalCheckpoint) /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
+ size = data * core_params.archi_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = RS_RAT_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = core_params.fp_issueW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->local_result.adjust_area();
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
+
+ } else if ((core_params.rm_ty == CAMbased)) {
+ //FRAT
+ //the address of CAM needed to be sent out
+ tag = core_params.arch_ireg_width;
+ data = int(ceil (core_params.arch_ireg_width +
+ 1 * core_params.globalCheckpoint /
+ BITS_PER_BYTE));
+ out_w = int(ceil (core_params.arch_ireg_width /
+ BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_IRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.decodeW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ iFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + iFRAT->area.get_area());
+
+ //FRAT
+ tag = core_params.arch_freg_width;
+ //the address of CAM needed to be sent out
+ data = int(ceil(core_params.arch_freg_width +
+ 1 * core_params.globalCheckpoint /
+ BITS_PER_BYTE));
+ out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
+ size = data * core_params.phy_Regs_FRF_size;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = CAM_ASSOC;
+ interface_ip.nbanks = core_params.front_rat_nbanks;
+ interface_ip.out_w = out_w * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
+ interface_ip.num_search_ports =
+ NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
+ Core_device, clockRate,
+ core_params.opt_local,
+ core_params.core_ty);
+ fFRAT->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + fFRAT->area.get_area());
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
+ }
+ //No RRAT for RS based OOO
+ //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
+ data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
+ size = data * core_params.num_ifreelist_entries;
+
+ interface_ip.cache_sz = size;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = FREELIST_ASSOC;
+ interface_ip.nbanks = core_params.freelist_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
+ interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
+ interface_ip.access_mode = Fast;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = core_params.freelist_rw_ports;
+ interface_ip.num_rd_ports = core_params.decodeW;
+ interface_ip.num_wr_ports =
+ core_params.decodeW - 1 + core_params.commitW;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / clockRate;
+ interface_ip.latency = 1.0 / clockRate;
+ ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List",
+ Core_device, clockRate, core_params.opt_local,
+ core_params.core_ty);
+ ifreeL->output_data.area *= core_params.num_hthreads;
+ area.set_area(area.get_area() + ifreeL->area.get_area());
}
-}
- if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
- {
- /* Dependency check logic will only present when decode(issue) width>1.
- * Multiple issue in order processor can do without renaming, but dcl is a must.
- */
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
}
+ idcl =
+ new dep_resource_conflict_check(xml_data,
+ "Instruction Dependency Check?",
+ &interface_ip, core_params,
+ core_params.phy_ireg_width,
+ clockRate);
+ fdcl =
+ new dep_resource_conflict_check(xml_data,
+ "FP Dependency Check?", &interface_ip,
+ core_params,
+ core_params.phy_freg_width, clockRate);
}
-Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- ifu (0),
- lsu (0),
- mmu (0),
- exu (0),
- rnu (0),
- corepipe (0),
- undiffCore (0),
- l2cache (0)
-{
- /*
- * initialize, compute and optimize individual components.
- */
-
- double pipeline_area_per_unit;
- if (XML->sys.Private_L2)
- {
- l2cache = new SharedCache(XML,ithCore, &interface_ip);
-
- }
-// interface_ip.wire_is_mat_type = 2;
-// interface_ip.wire_os_mat_type = 2;
-// interface_ip.wt =Global_30;
- set_core_param();
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp);
- lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp);
- mmu = new MemManU (XML, ithCore, &interface_ip,coredynp);
- exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp);
- undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp);
- if (coredynp.core_ty==OOO)
- {
- rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
- }
- corepipe = new Pipeline(&interface_ip,coredynp);
-
- if (coredynp.core_ty==OOO)
- {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
- if (rnu->exist)
- {
- rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
- }
- }
- else {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
- }
-
- //area.set_area(area.get_area()+ corepipe->area.get_area());
- if (ifu->exist)
- {
- ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + ifu->area.get_area());
- }
- if (lsu->exist)
- {
- lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + lsu->area.get_area());
- }
- if (exu->exist)
- {
- exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+exu->area.get_area());
- }
- if (mmu->exist)
- {
- mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+mmu->area.get_area());
- }
-
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
-
- area.set_area(area.get_area() + rnu->area.get_area());
- }
- }
-
- if (undiffCore->exist)
- {
- area.set_area(area.get_area() + undiffCore->area.get_area());
- }
-
- if (XML->sys.Private_L2)
- {
- area.set_area(area.get_area() + l2cache->area.get_area());
-
- }
-// //clock power
-// clockNetwork.init_wire_external(is_default, &interface_ip);
-// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
-// clockNetwork.end_wiring_level =5;//toplevel metal
-// clockNetwork.start_wiring_level =5;//toplevel metal
-// clockNetwork.num_regs = corepipe.tot_stage_vector;
-// clockNetwork.optimize_wire();
-}
+Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_)
+ : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL),
+ exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL),
+ ithCore(_ithCore), interface_ip(*interface_ip_) {
+
+ ostringstream os;
+ os << ithCore;
+ name = "Core " + os.str();
+
+ int i = 0;
+ XMLNode* childXML;
+ for (i = 0; i < xml_data->nChildNode("component"); i++) {
+ childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+ if (!type)
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ STRCMP(type, "CacheUnit") {
+ XMLCSTR comp_name = childXML->getAttribute("id");
+ if (!comp_name)
+ continue;
+
+ STRCMP(comp_name, "system.L20") {
+ l2cache = new CacheUnit(childXML, &interface_ip);
+ children.push_back(l2cache);
+ }
+ }
+ }
+ set_core_param();
+ clockRate = core_params.clockRate;
+
+ ifu = new InstFetchU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(ifu);
+ lsu = new LoadStoreU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(lsu);
+ mmu = new MemManU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(mmu);
+ exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height,
+ core_params, core_stats);
+ children.push_back(exu);
+ undiffCore = new UndiffCore(xml_data, &interface_ip, core_params);
+ children.push_back(undiffCore);
+ if (core_params.core_ty == OOO) {
+ rnu = new RENAMINGU(xml_data, &interface_ip, core_params,
+ core_stats);
+ children.push_back(rnu);
+ }
+ corepipe = new Pipeline(xml_data, &interface_ip, core_params);
+ children.push_back(corepipe);
+
+ double pipeline_area_per_unit;
+ if (core_params.core_ty == OOO) {
+ pipeline_area_per_unit = (corepipe->area.get_area() *
+ core_params.num_pipelines) / 5.0;
+ if (rnu->exist) {
+ rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
+ }
+ } else {
+ pipeline_area_per_unit = (corepipe->area.get_area() *
+ core_params.num_pipelines) / 4.0;
+ }
-void BranchPredictor::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double r_access;
- double w_access;
- if (is_tdp)
- {
- r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
- w_access = 0*coredynp.BR_duty_cycle;
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->tdp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->tdp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->tdp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->tdp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = r_access;
- RAS->stats_t.writeAc.access = w_access;
- RAS->tdp_stats = RAS->stats_t;
- }
- else
- {
- //The resolution of BPT accesses is coarse, but this is
- //because most simulators cannot track finer grained details
- r_access = XML->sys.core[ithCore].branch_instructions;
- w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->rtp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->rtp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->rtp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->rtp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls;
- RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
- RAS->rtp_stats = RAS->stats_t;
- }
-
- globalBPT->power_t.reset();
- L1_localBPT->power_t.reset();
- L2_localBPT->power_t.reset();
- chooser->power_t.reset();
- RAS->power_t.reset();
-
- globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
- globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
- L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
- L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;
-
- L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
- L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;
-
- chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
- chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
- RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
- RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
-
- power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power;
- }
- else
- {
- globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
- rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
+ // Move all of this to computeArea
+ //area.set_area(area.get_area()+ corepipe->area.get_area());
+ if (ifu->exist) {
+ ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + ifu->area.get_area());
+ }
+ if (lsu->exist) {
+ lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + lsu->area.get_area());
+ }
+ if (exu->exist) {
+ exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + exu->area.get_area());
+ }
+ if (mmu->exist) {
+ mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
+ area.set_area(area.get_area() + mmu->area.get_area());
}
-}
-void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- if (is_tdp)
- {
- cout << indent_str<< "Global Predictor:" << endl;
- cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "Local Predictor:" << endl;
- cout << indent_str << "L1_Local Predictor:" << endl;
- cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "L2_Local Predictor:" << endl;
- cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- cout << indent_str << "Chooser:" << endl;
- cout << indent_str_next << "Area = " << chooser->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "RAS:" << endl;
- cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
-// cout << indent_str_next << "Global Predictor Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Global Predictor Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Global Predictor Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Local Predictor Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Local Predictor Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Local Predictor Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Chooser Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Chooser Subthreshold Leakage = " << chooser->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "RAS Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "RAS Subthreshold Leakage = " << RAS->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" << endl;
- }
+ if (core_params.core_ty == OOO) {
+ if (rnu->exist) {
-}
-
-void InstFetchU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
- icache.caches->stats_t.readAc.miss = 0;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->tdp_stats = icache.caches->stats_t;
-
- icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports;
- icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports;
- icache.missb->tdp_stats = icache.missb->stats_t;
-
- icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.ifb->tdp_stats = icache.ifb->stats_t;
-
- icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports;
- icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
- icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;
-
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
- IB->tdp_stats = IB->stats_t;
-
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
- BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
+ area.set_area(area.get_area() + rnu->area.get_area());
}
+ }
- ID_inst->stats_t.readAc.access = coredynp.decodeW;
- ID_operand->stats_t.readAc.access = coredynp.decodeW;
- ID_misc->stats_t.readAc.access = coredynp.decodeW;
- ID_inst->tdp_stats = ID_inst->stats_t;
- ID_operand->tdp_stats = ID_operand->stats_t;
- ID_misc->tdp_stats = ID_misc->stats_t;
-
+ if (undiffCore->exist) {
+ area.set_area(area.get_area() + undiffCore->area.get_area());
+ }
+ if (l2cache) {
+ area.set_area(area.get_area() + l2cache->area.get_area());
}
- else
- {
- //init stats for Runtime Dynamic (RTP)
- icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses;
- icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->rtp_stats = icache.caches->stats_t;
+}
- icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->rtp_stats = icache.missb->stats_t;
- icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->rtp_stats = icache.ifb->stats_t;
+void BranchPredictor::computeEnergy() {
+ if (!exist) return;
+
+ // ASSUMPTION: All instructions access the branch predictors at Fetch and
+ // only branch instrucions update the predictors regardless
+ // of the correctness of the prediction.
+ double tdp_read_accesses =
+ core_params.predictionW * core_stats.BR_duty_cycle;
+ globalBPT->tdp_stats.reset();
+ globalBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ globalBPT->tdp_stats.writeAc.access = 0;
+ globalBPT->rtp_stats.reset();
+ globalBPT->rtp_stats.readAc.access = core_stats.total_instructions;
+ globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ globalBPT->power_t.reset();
+ globalBPT->power_t.readOp.dynamic +=
+ globalBPT->local_result.power.readOp.dynamic *
+ globalBPT->tdp_stats.readAc.access +
+ globalBPT->local_result.power.writeOp.dynamic *
+ globalBPT->tdp_stats.writeAc.access;
+ globalBPT->power_t = globalBPT->power_t +
+ globalBPT->local_result.power * pppm_lkg;
+ globalBPT->rt_power.reset();
+ globalBPT->rt_power.readOp.dynamic +=
+ globalBPT->local_result.power.readOp.dynamic *
+ globalBPT->rtp_stats.readAc.access +
+ globalBPT->local_result.power.writeOp.dynamic *
+ globalBPT->rtp_stats.writeAc.access;
+
+ L1_localBPT->tdp_stats.reset();
+ L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ L1_localBPT->tdp_stats.writeAc.access = 0;
+ L1_localBPT->rtp_stats.reset();
+ L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions;
+ L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ L1_localBPT->power_t.reset();
+ L1_localBPT->power_t.readOp.dynamic +=
+ L1_localBPT->local_result.power.readOp.dynamic *
+ L1_localBPT->tdp_stats.readAc.access +
+ L1_localBPT->local_result.power.writeOp.dynamic *
+ L1_localBPT->tdp_stats.writeAc.access;
+ L1_localBPT->power_t = L1_localBPT->power_t +
+ L1_localBPT->local_result.power * pppm_lkg;
+ L1_localBPT->rt_power.reset();
+ L1_localBPT->rt_power.readOp.dynamic +=
+ L1_localBPT->local_result.power.readOp.dynamic *
+ L1_localBPT->rtp_stats.readAc.access +
+ L1_localBPT->local_result.power.writeOp.dynamic *
+ L1_localBPT->rtp_stats.writeAc.access;
+
+ L2_localBPT->tdp_stats.reset();
+ L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
+ L2_localBPT->tdp_stats.writeAc.access = 0;
+ L2_localBPT->rtp_stats.reset();
+ L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions;
+ L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ L2_localBPT->power_t.reset();
+ L2_localBPT->power_t.readOp.dynamic +=
+ L2_localBPT->local_result.power.readOp.dynamic *
+ L2_localBPT->tdp_stats.readAc.access +
+ L2_localBPT->local_result.power.writeOp.dynamic *
+ L2_localBPT->tdp_stats.writeAc.access;
+ L2_localBPT->power_t = L2_localBPT->power_t +
+ L2_localBPT->local_result.power * pppm_lkg;
+ L2_localBPT->rt_power.reset();
+ L2_localBPT->rt_power.readOp.dynamic +=
+ L2_localBPT->local_result.power.readOp.dynamic *
+ L2_localBPT->rtp_stats.readAc.access +
+ L2_localBPT->local_result.power.writeOp.dynamic *
+ L2_localBPT->rtp_stats.writeAc.access;
+
+ chooser->tdp_stats.reset();
+ chooser->tdp_stats.readAc.access = tdp_read_accesses;
+ chooser->tdp_stats.writeAc.access = 0;
+ chooser->rtp_stats.reset();
+ chooser->rtp_stats.readAc.access = core_stats.total_instructions;
+ chooser->rtp_stats.writeAc.access = core_stats.branch_instructions;
+ chooser->power_t.reset();
+ chooser->power_t.readOp.dynamic +=
+ chooser->local_result.power.readOp.dynamic *
+ chooser->tdp_stats.readAc.access +
+ chooser->local_result.power.writeOp.dynamic *
+ chooser->tdp_stats.writeAc.access;
+ chooser->power_t =
+ chooser->power_t + chooser->local_result.power * pppm_lkg;
+ chooser->rt_power.reset();
+ chooser->rt_power.readOp.dynamic +=
+ chooser->local_result.power.readOp.dynamic *
+ chooser->rtp_stats.readAc.access +
+ chooser->local_result.power.writeOp.dynamic *
+ chooser->rtp_stats.writeAc.access;
+
+ RAS->tdp_stats.reset();
+ RAS->tdp_stats.readAc.access = tdp_read_accesses;
+ RAS->tdp_stats.writeAc.access = 0;
+ RAS->rtp_stats.reset();
+ RAS->rtp_stats.readAc.access = core_stats.function_calls;
+ RAS->rtp_stats.writeAc.access = core_stats.function_calls;
+ RAS->power_t.reset();
+ RAS->power_t.readOp.dynamic +=
+ RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access +
+ RAS->local_result.power.writeOp.dynamic *
+ RAS->tdp_stats.writeAc.access;
+ RAS->power_t = RAS->power_t + RAS->local_result.power *
+ core_params.pppm_lkg_multhread;
+ RAS->rt_power.reset();
+ RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic *
+ RAS->rtp_stats.readAc.access +
+ RAS->local_result.power.writeOp.dynamic *
+ RAS->rtp_stats.writeAc.access;
+
+ output_data.reset();
+ if (globalBPT) {
+ globalBPT->output_data.peak_dynamic_power =
+ globalBPT->power_t.readOp.dynamic * clockRate;
+ globalBPT->output_data.runtime_dynamic_energy =
+ globalBPT->rt_power.readOp.dynamic;
+ output_data += globalBPT->output_data;
+ }
+ if (L1_localBPT) {
+ L1_localBPT->output_data.peak_dynamic_power =
+ L1_localBPT->power_t.readOp.dynamic * clockRate;
+ L1_localBPT->output_data.runtime_dynamic_energy =
+ L1_localBPT->rt_power.readOp.dynamic;
+ output_data += L1_localBPT->output_data;
+ }
+ if (L2_localBPT) {
+ L2_localBPT->output_data.peak_dynamic_power =
+ L2_localBPT->power_t.readOp.dynamic * clockRate;
+ L2_localBPT->output_data.runtime_dynamic_energy =
+ L2_localBPT->rt_power.readOp.dynamic;
+ output_data += L2_localBPT->output_data;
+ }
+ if (chooser) {
+ chooser->output_data.peak_dynamic_power =
+ chooser->power_t.readOp.dynamic * clockRate;
+ chooser->output_data.runtime_dynamic_energy =
+ chooser->rt_power.readOp.dynamic;
+ output_data += chooser->output_data;
+ }
+ if (RAS) {
+ RAS->output_data.peak_dynamic_power =
+ RAS->power_t.readOp.dynamic * clockRate;
+ RAS->output_data.subthreshold_leakage_power =
+ RAS->power_t.readOp.leakage * core_params.num_hthreads;
+ RAS->output_data.gate_leakage_power =
+ RAS->power_t.readOp.gate_leakage * core_params.num_hthreads;
+ RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic;
+ output_data += RAS->output_data;
+ }
+}
- icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;
+void BranchPredictor::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
- IB->rtp_stats = IB->stats_t;
+ McPATComponent::displayData(indent, plevel);
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
- BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
- BTB->rtp_stats = BTB->stats_t;
- }
+ globalBPT->displayData(indent + 4, plevel);
+ L1_localBPT->displayData(indent + 4, plevel);
+ L2_localBPT->displayData(indent + 4, plevel);
+ chooser->displayData(indent + 4, plevel);
+ RAS->displayData(indent + 4, plevel);
+}
- ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_inst->rtp_stats = ID_inst->stats_t;
- ID_operand->rtp_stats = ID_operand->stats_t;
- ID_misc->rtp_stats = ID_misc->stats_t;
+void InstFetchU::computeEnergy() {
+ if (!exist) return;
+ if (BPT) {
+ BPT->computeEnergy();
}
- icache.power_t.reset();
+ IB->tdp_stats.reset();
+ IB->tdp_stats.readAc.access = core_params.peak_issueW;
+ IB->tdp_stats.writeAc.access = core_params.peak_issueW;
+ IB->rtp_stats.reset();
+ IB->rtp_stats.readAc.access = core_stats.total_instructions;
+ IB->rtp_stats.writeAc.access = core_stats.total_instructions;
IB->power_t.reset();
-// ID_inst->power_t.reset();
-// ID_operand->power_t.reset();
-// ID_misc->power_t.reset();
- if (coredynp.predictionW>0)
- {
+ IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic *
+ IB->tdp_stats.readAc.access +
+ IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access;
+ IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg;
+ IB->rt_power.reset();
+ IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic *
+ IB->rtp_stats.readAc.access +
+ IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access;
+
+ if (core_params.predictionW > 0) {
+ BTB->tdp_stats.reset();
+ BTB->tdp_stats.readAc.access = core_params.predictionW;
+ BTB->tdp_stats.writeAc.access = 0;
+ BTB->rtp_stats.reset();
+ BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses;
+ BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses;
BTB->power_t.reset();
+ BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic *
+ BTB->tdp_stats.readAc.access +
+ BTB->local_result.power.writeOp.dynamic *
+ BTB->tdp_stats.writeAc.access;
+ BTB->rt_power.reset();
+ BTB->rt_power.readOp.dynamic +=
+ BTB->local_result.power.readOp.dynamic *
+ BTB->rtp_stats.readAc.access +
+ BTB->local_result.power.writeOp.dynamic *
+ BTB->rtp_stats.writeAc.access;
}
- icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
- //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
- icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
- icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
- icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
- icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
- icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
+ ID_inst->tdp_stats.reset();
+ ID_inst->tdp_stats.readAc.access = core_params.decodeW;
+ ID_inst->power_t.reset();
+ ID_inst->power_t = ID_misc->power;
+ ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic *
+ ID_inst->tdp_stats.readAc.access;
+ ID_inst->rtp_stats.reset();
+ ID_inst->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_inst->rt_power.reset();
+ ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic *
+ ID_inst->rtp_stats.readAc.access;
+
+ ID_operand->tdp_stats.reset();
+ ID_operand->tdp_stats.readAc.access = core_params.decodeW;
+ ID_operand->power_t.reset();
+ ID_operand->power_t = ID_misc->power;
+ ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic *
+ ID_operand->tdp_stats.readAc.access;
+ ID_operand->rtp_stats.reset();
+ ID_operand->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_operand->rt_power.reset();
+ ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic *
+ ID_operand->rtp_stats.readAc.access;
+
+ ID_misc->tdp_stats.reset();
+ ID_misc->tdp_stats.readAc.access = core_params.decodeW;
+ ID_misc->power_t.reset();
+ ID_misc->power_t = ID_misc->power;
+ ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic *
+ ID_misc->tdp_stats.readAc.access;
+ ID_misc->rtp_stats.reset();
+ ID_misc->rtp_stats.readAc.access = core_stats.total_instructions;
+ ID_misc->rt_power.reset();
+ ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic *
+ ID_misc->rtp_stats.readAc.access;
+
+ power.reset();
+ rt_power.reset();
+ McPATComponent::computeEnergy();
+
+ output_data.reset();
+ if (icache) {
+ output_data += icache->output_data;
+ }
+ if (IB) {
+ IB->output_data.peak_dynamic_power =
+ IB->power_t.readOp.dynamic * clockRate;
+ IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic;
+ output_data += IB->output_data;
+ }
+ if (BTB) {
+ BTB->output_data.peak_dynamic_power =
+ BTB->power_t.readOp.dynamic * clockRate;
+ BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic;
+ output_data += BTB->output_data;
+ }
+ if (BPT) {
+ output_data += BPT->output_data;
+ }
+ if (ID_inst) {
+ ID_inst->output_data.peak_dynamic_power =
+ ID_inst->power_t.readOp.dynamic * clockRate;
+ ID_inst->output_data.runtime_dynamic_energy =
+ ID_inst->rt_power.readOp.dynamic;
+ output_data += ID_inst->output_data;
+ }
+ if (ID_operand) {
+ ID_operand->output_data.peak_dynamic_power =
+ ID_operand->power_t.readOp.dynamic * clockRate;
+ ID_operand->output_data.runtime_dynamic_energy =
+ ID_operand->rt_power.readOp.dynamic;
+ output_data += ID_operand->output_data;
+ }
+ if (ID_misc) {
+ ID_misc->output_data.peak_dynamic_power =
+ ID_misc->power_t.readOp.dynamic * clockRate;
+ ID_misc->output_data.runtime_dynamic_energy =
+ ID_misc->rt_power.readOp.dynamic;
+ output_data += ID_misc->output_data;
+ }
+}
- IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
- IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
+void InstFetchU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- if (coredynp.predictionW>0)
- {
- BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
- BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;
+ McPATComponent::displayData(indent, plevel);
- BPT->computeEnergy(is_tdp);
+ if (core_params.predictionW > 0) {
+ BTB->displayData(indent + 4, plevel);
+ if (BPT->exist) {
+ BPT->displayData(indent + 4, plevel);
}
+ }
+ IB->displayData(indent + 4, plevel);
+ ID_inst->displayData(indent + 4, plevel);
+ ID_operand->displayData(indent + 4, plevel);
+ ID_misc->displayData(indent + 4, plevel);
+}
- if (is_tdp)
- {
-// icache.power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
- icache.power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
- power = power + icache.power + IB->power;
- if (coredynp.predictionW>0)
- {
- BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- power = power + BTB->power + BPT->power;
- }
+void RENAMINGU::computeEnergy() {
+ if (!exist) return;
+
+ idcl->tdp_stats.reset();
+ idcl->rtp_stats.reset();
+ idcl->power_t.reset();
+ idcl->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ idcl->tdp_stats.readAc.access = core_params.decodeW;
+ idcl->rtp_stats.readAc.access = 3 * core_params.decodeW *
+ core_params.decodeW * core_stats.rename_reads;
+ } else if (core_params.issueW > 1) {
+ idcl->tdp_stats.readAc.access = core_params.decodeW;
+ idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions;
+ }
+ idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access *
+ idcl->power.readOp.dynamic;
+ idcl->power_t.readOp.leakage = idcl->power.readOp.leakage *
+ core_params.num_hthreads;
+ idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage *
+ core_params.num_hthreads;
+ idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access *
+ idcl->power.readOp.dynamic;
+
+ fdcl->tdp_stats.reset();
+ fdcl->rtp_stats.reset();
+ fdcl->power_t.reset();
+ fdcl->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ fdcl->tdp_stats.readAc.access = core_params.decodeW;
+ fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW *
+ core_params.fp_issueW * core_stats.fp_rename_writes;
+ } else if (core_params.issueW > 1) {
+ fdcl->tdp_stats.readAc.access = core_params.decodeW;
+ fdcl->rtp_stats.readAc.access = core_stats.fp_instructions;
+ }
+ fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access *
+ fdcl->power.readOp.dynamic;
+ fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage *
+ core_params.num_hthreads;
+ fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage *
+ core_params.num_hthreads;
+ fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access *
+ fdcl->power.readOp.dynamic;
+
+ if (iRRAT) {
+ iRRAT->tdp_stats.reset();
+ iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports;
+ iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports;
+ iRRAT->rtp_stats.reset();
+ iRRAT->rtp_stats.readAc.access = core_stats.rename_writes;
+ iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
+ iRRAT->power_t.reset();
+ iRRAT->power_t.readOp.dynamic +=
+ iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic +
+ iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
+ iRRAT->rt_power.reset();
+ iRRAT->rt_power.readOp.dynamic +=
+ iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic +
+ iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
+ iRRAT->power_t.readOp.leakage =
+ iRRAT->power.readOp.leakage * core_params.num_hthreads;
+ iRRAT->power_t.readOp.gate_leakage =
+ iRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic;
- ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
- ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic;
-
- ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access;
- ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
- ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access;
-
- power = power + (ID_inst->power +
- ID_operand->power +
- ID_misc->power);
- }
- else
- {
-// icache.rt_power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
-
- icache.rt_power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
- rt_power = rt_power + icache.rt_power + IB->rt_power;
- if (coredynp.predictionW>0)
- {
- BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- rt_power = rt_power + BTB->rt_power + BPT->rt_power;
- }
+ if (ifreeL) {
+ ifreeL->tdp_stats.reset();
+ ifreeL->tdp_stats.readAc.access = core_params.decodeW;
+ ifreeL->tdp_stats.writeAc.access = core_params.decodeW;
+ ifreeL->rtp_stats.reset();
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ ifreeL->rtp_stats.readAc.access = core_stats.rename_reads;
+ ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes;
+ } else if (core_params.scheu_ty == ReservationStation) {
+ ifreeL->rtp_stats.readAc.access =
+ core_stats.rename_reads + core_stats.fp_rename_reads;
+ ifreeL->rtp_stats.writeAc.access =
+ 2 * (core_stats.rename_writes + core_stats.fp_rename_writes);
+ }
+ ifreeL->power_t.reset();
+ ifreeL->power_t.readOp.dynamic +=
+ ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic +
+ ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
+ ifreeL->rt_power.reset();
+ ifreeL->rt_power.readOp.dynamic +=
+ ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic +
+ ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
+ ifreeL->power_t.readOp.leakage =
+ ifreeL->power.readOp.leakage * core_params.num_hthreads;
+ ifreeL->power_t.readOp.gate_leakage =
+ ifreeL->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
- ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
- ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;
+ if (fRRAT) {
+ fRRAT->tdp_stats.reset();
+ fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports;
+ fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports;
+ fRRAT->rtp_stats.reset();
+ fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes;
+ fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
+ fRRAT->power_t.reset();
+ fRRAT->power_t.readOp.dynamic +=
+ fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic +
+ fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
+ fRRAT->rt_power.reset();
+ fRRAT->rt_power.readOp.dynamic +=
+ fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic +
+ fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
+ fRRAT->power_t.readOp.leakage =
+ fRRAT->power.readOp.leakage * core_params.num_hthreads;
+ fRRAT->power_t.readOp.gate_leakage =
+ fRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ }
- rt_power = rt_power + (ID_inst->rt_power +
- ID_operand->rt_power +
- ID_misc->rt_power);
+ if (ffreeL) {
+ ffreeL->tdp_stats.reset();
+ ffreeL->tdp_stats.readAc.access = core_params.decodeW;
+ ffreeL->tdp_stats.writeAc.access = core_params.decodeW;
+ ffreeL->rtp_stats.reset();
+ ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads;
+ ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes;
+ ffreeL->power_t.reset();
+ ffreeL->power_t.readOp.dynamic +=
+ ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic +
+ ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
+ ffreeL->rt_power.reset();
+ ffreeL->rt_power.readOp.dynamic +=
+ ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic +
+ ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
+ ffreeL->power_t.readOp.leakage =
+ ffreeL->power.readOp.leakage * core_params.num_hthreads;
+ ffreeL->power_t.readOp.gate_leakage =
+ ffreeL->power.readOp.gate_leakage * core_params.num_hthreads;
}
-}
-void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
-
- cout << indent_str<< "Instruction Cache:" << endl;
- cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.predictionW>0)
- {
- cout << indent_str<< "Branch Target Buffer:" << endl;
- cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (BPT->exist)
- {
- cout << indent_str<< "Branch Predictor:" << endl;
- cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3)
- {
- BPT->displayEnergy(indent+4, plevel, is_tdp);
- }
- }
- }
- cout << indent_str<< "Instruction Buffer:" << endl;
- cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Instruction Decoder:" << endl;
- cout << indent_str_next << "Area = " << (ID_inst->area.get_area() +
- ID_operand->area.get_area() +
- ID_misc->area.get_area())*coredynp.decodeW*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic +
- ID_operand->power.readOp.dynamic +
- ID_misc->power.readOp.dynamic)*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? (ID_inst->power.readOp.longer_channel_leakage +
- ID_operand->power.readOp.longer_channel_leakage +
- ID_misc->power.readOp.longer_channel_leakage):
- (ID_inst->power.readOp.leakage +
- ID_operand->power.readOp.leakage +
- ID_misc->power.readOp.leakage)) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage +
- ID_operand->power.readOp.gate_leakage +
- ID_misc->power.readOp.gate_leakage) << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic +
- ID_operand->rt_power.readOp.dynamic +
- ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
-// cout << indent_str_next << "Instruction Cache Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Cache Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Subthreshold Leakage = " << IB->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Instruction Buffer Gate Leakage = " << IB->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Subthreshold Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Branch Target Buffer Gate Leakage = " << BTB->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Subthreshold Leakage = " << BPT->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Branch Predictor Gate Leakage = " << BPT->rt_power.readOp.gate_leakage << " W" << endl;
+ if (iFRAT) {
+ tdp_stats.reset();
+ if (core_params.rm_ty == RAMbased) {
+ iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports;
+ iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
+ iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports;
+ } else if ((core_params.rm_ty == CAMbased)) {
+ iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports;
+ iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
+ }
+ rtp_stats.reset();
+ iFRAT->rtp_stats.readAc.access = core_stats.rename_reads;
+ iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
+ if (core_params.scheu_ty == ReservationStation &&
+ core_params.rm_ty == RAMbased) {
+ iFRAT->rtp_stats.searchAc.access =
+ core_stats.committed_int_instructions;
+ }
+ iFRAT->power_t.reset();
+ iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access
+ * (iFRAT->local_result.power.readOp.dynamic
+ + idcl->power.readOp.dynamic)
+ + iFRAT->tdp_stats.writeAc.access
+ * iFRAT->local_result.power.writeOp.dynamic
+ + iFRAT->tdp_stats.searchAc.access
+ * iFRAT->local_result.power.searchOp.dynamic;
+ iFRAT->power_t.readOp.leakage =
+ iFRAT->power.readOp.leakage * core_params.num_hthreads;
+ iFRAT->power_t.readOp.gate_leakage =
+ iFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ iFRAT->rt_power.reset();
+ iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access
+ * (iFRAT->local_result.power.readOp.dynamic
+ + idcl->power.readOp.dynamic)
+ + iFRAT->rtp_stats.writeAc.access
+ * iFRAT->local_result.power.writeOp.dynamic
+ + iFRAT->rtp_stats.searchAc.access
+ * iFRAT->local_result.power.searchOp.dynamic;
+ }
+
+ if (fFRAT) {
+ tdp_stats.reset();
+ fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports;
+ if ((core_params.rm_ty == CAMbased)) {
+ fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports;
+ } else if (core_params.rm_ty == RAMbased) {
+ fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports;
+ if (core_params.scheu_ty == ReservationStation) {
+ fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports;
+ }
}
+ rtp_stats.reset();
+ fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads;
+ fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
+ if (core_params.scheu_ty == ReservationStation &&
+ core_params.rm_ty == RAMbased) {
+ fFRAT->rtp_stats.searchAc.access =
+ core_stats.committed_fp_instructions;
+ }
+ fFRAT->power_t.reset();
+ fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access
+ * (fFRAT->local_result.power.readOp.dynamic
+ + fdcl->power.readOp.dynamic)
+ + fFRAT->tdp_stats.writeAc.access
+ * fFRAT->local_result.power.writeOp.dynamic
+ + fFRAT->tdp_stats.searchAc.access
+ * fFRAT->local_result.power.searchOp.dynamic;
+ fFRAT->power_t.readOp.leakage =
+ fFRAT->power.readOp.leakage * core_params.num_hthreads;
+ fFRAT->power_t.readOp.gate_leakage =
+ fFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
+ fFRAT->rt_power.reset();
+ fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access
+ * (fFRAT->local_result.power.readOp.dynamic
+ + fdcl->power.readOp.dynamic)
+ + fFRAT->rtp_stats.writeAc.access
+ * fFRAT->local_result.power.writeOp.dynamic
+ + fFRAT->rtp_stats.searchAc.access
+ * fFRAT->local_result.power.searchOp.dynamic;
+ }
+ output_data.reset();
+ if (iFRAT) {
+ iFRAT->output_data.peak_dynamic_power =
+ iFRAT->power_t.readOp.dynamic * clockRate;
+ iFRAT->output_data.subthreshold_leakage_power =
+ iFRAT->power_t.readOp.leakage;
+ iFRAT->output_data.gate_leakage_power =
+ iFRAT->power_t.readOp.gate_leakage;
+ iFRAT->output_data.runtime_dynamic_energy =
+ iFRAT->rt_power.readOp.dynamic;
+ output_data += iFRAT->output_data;
+ }
+ if (fFRAT) {
+ fFRAT->output_data.peak_dynamic_power =
+ fFRAT->power_t.readOp.dynamic * clockRate;
+ fFRAT->output_data.subthreshold_leakage_power =
+ fFRAT->power_t.readOp.leakage;
+ fFRAT->output_data.gate_leakage_power =
+ fFRAT->power_t.readOp.gate_leakage;
+ fFRAT->output_data.runtime_dynamic_energy =
+ fFRAT->rt_power.readOp.dynamic;
+ output_data += fFRAT->output_data;
+ }
+ if (iRRAT) {
+ iRRAT->output_data.peak_dynamic_power =
+ iRRAT->power_t.readOp.dynamic * clockRate;
+ iRRAT->output_data.subthreshold_leakage_power =
+ iRRAT->power_t.readOp.leakage;
+ iRRAT->output_data.gate_leakage_power =
+ iRRAT->power_t.readOp.gate_leakage;
+ iRRAT->output_data.runtime_dynamic_energy =
+ iRRAT->rt_power.readOp.dynamic;
+ output_data += iRRAT->output_data;
+ }
+ if (fRRAT) {
+ fRRAT->output_data.peak_dynamic_power =
+ fRRAT->power_t.readOp.dynamic * clockRate;
+ fRRAT->output_data.subthreshold_leakage_power =
+ fRRAT->power_t.readOp.leakage;
+ fRRAT->output_data.gate_leakage_power =
+ fRRAT->power_t.readOp.gate_leakage;
+ fRRAT->output_data.runtime_dynamic_energy =
+ fRRAT->rt_power.readOp.dynamic;
+ output_data += fRRAT->output_data;
+ }
+ if (ifreeL) {
+ ifreeL->output_data.peak_dynamic_power =
+ ifreeL->power_t.readOp.dynamic * clockRate;
+ ifreeL->output_data.subthreshold_leakage_power =
+ ifreeL->power_t.readOp.leakage;
+ ifreeL->output_data.gate_leakage_power =
+ ifreeL->power_t.readOp.gate_leakage;
+ ifreeL->output_data.runtime_dynamic_energy =
+ ifreeL->rt_power.readOp.dynamic;
+ output_data += ifreeL->output_data;
+ }
+ if (ffreeL) {
+ ffreeL->output_data.peak_dynamic_power =
+ ffreeL->power_t.readOp.dynamic * clockRate;
+ ffreeL->output_data.subthreshold_leakage_power =
+ ffreeL->power_t.readOp.leakage;
+ ffreeL->output_data.gate_leakage_power =
+ ffreeL->power_t.readOp.gate_leakage;
+ ffreeL->output_data.runtime_dynamic_energy =
+ ffreeL->rt_power.readOp.dynamic;
+ output_data += ffreeL->output_data;
+ }
+ if (idcl) {
+ idcl->output_data.peak_dynamic_power =
+ idcl->power_t.readOp.dynamic * clockRate;
+ idcl->output_data.subthreshold_leakage_power =
+ idcl->power_t.readOp.leakage;
+ idcl->output_data.gate_leakage_power =
+ idcl->power_t.readOp.gate_leakage;
+ idcl->output_data.runtime_dynamic_energy =
+ idcl->rt_power.readOp.dynamic;
+ output_data += idcl->output_data;
+ }
+ if (fdcl) {
+ fdcl->output_data.peak_dynamic_power =
+ fdcl->power_t.readOp.dynamic * clockRate;
+ fdcl->output_data.subthreshold_leakage_power =
+ fdcl->power_t.readOp.leakage;
+ fdcl->output_data.gate_leakage_power =
+ fdcl->power_t.readOp.gate_leakage;
+ fdcl->output_data.runtime_dynamic_energy =
+ fdcl->rt_power.readOp.dynamic;
+ output_data += fdcl->output_data;
+ }
+ if (RAHT) {
+ output_data += RAHT->output_data;
+ }
}
-void RENAMINGU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double pppm_t[4] = {1,1,1,1};
- if (is_tdp)
- {//init stats for Peak
- if (coredynp.core_ty==OOO){
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
- }
-
- iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports;
- iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports;
- iRRAT->tdp_stats = iRRAT->stats_t;
-
- fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports;
- fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports;
- fRRAT->tdp_stats = fRRAT->stats_t;
-
- ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;;
- ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
- ifreeL->tdp_stats = ifreeL->stats_t;
-
- ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports;
- ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports;
- ffreeL->tdp_stats = ffreeL->stats_t;
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
- iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
- iFRAT->tdp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
- fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
- fFRAT->tdp_stats = fFRAT->stats_t;
- }
- //Unified free list for both int and fp
- ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;
- ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
- ifreeL->tdp_stats = ifreeL->stats_t;
- }
- idcl->stats_t.readAc.access = coredynp.decodeW;
- fdcl->stats_t.readAc.access = coredynp.decodeW;
- idcl->tdp_stats = idcl->stats_t;
- fdcl->tdp_stats = fdcl->stats_t;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->stats_t.readAc.access = coredynp.decodeW;
- fdcl->stats_t.readAc.access = coredynp.decodeW;
- idcl->tdp_stats = idcl->stats_t;
- fdcl->tdp_stats = fdcl->stats_t;
- }
- }
+void RENAMINGU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- }
- else
- {//init stats for Runtime Dynamic (RTP)
- if (coredynp.core_ty==OOO){
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
-
- iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16
- iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iRRAT->rtp_stats = iRRAT->stats_t;
-
- fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16
- fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fRRAT->rtp_stats = fRRAT->stats_t;
-
- ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes;
- ifreeL->rtp_stats = ifreeL->stats_t;
-
- ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes;
- ffreeL->rtp_stats = ffreeL->stats_t;
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs.
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
- iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
- iFRAT->rtp_stats = iFRAT->stats_t;
-
- fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
- fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
- fFRAT->rtp_stats = fFRAT->stats_t;
- }
- //Unified free list for both int and fp since the ROB act as physcial registers
- ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads +
- XML->sys.core[ithCore].fp_rename_reads;
- ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes +
- XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group
- //are terminated early
- ifreeL->rtp_stats = ifreeL->stats_t;
- }
- idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads;
- fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes;
- idcl->rtp_stats = idcl->stats_t;
- fdcl->rtp_stats = fdcl->stats_t;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions;
- fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions;
- idcl->rtp_stats = idcl->stats_t;
- fdcl->rtp_stats = fdcl->stats_t;
- }
- }
+ McPATComponent::displayData(indent, plevel);
+ if (core_params.core_ty == OOO) {
+ iFRAT->displayData(indent + 4, plevel);
+ fFRAT->displayData(indent + 4, plevel);
+ ifreeL->displayData(indent + 4, plevel);
+
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ iRRAT->displayData(indent + 4, plevel);
+ fRRAT->displayData(indent + 4, plevel);
+ ffreeL->displayData(indent + 4, plevel);
}
- /* Compute engine */
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
-
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
-
- iRRAT->power_t.reset();
- fRRAT->power_t.reset();
- ifreeL->power_t.reset();
- ffreeL->power_t.reset();
-
- iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic
- +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic);
- fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic
- +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic);
- ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
- +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
- ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic
- +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic);
+ }
+ idcl->displayData(indent + 4, plevel);
+ fdcl->displayData(indent + 4, plevel);
+}
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- if (coredynp.rm_ty ==RAMbased)
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
-
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic
- +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic
- +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic);
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- iFRAT->power_t.reset();
- fFRAT->power_t.reset();
- iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
- *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
- +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
- fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
- *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
- +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
- }
- ifreeL->power_t.reset();
- ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
- +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
- }
+void SchedulerU::computeEnergy() {
+ if (!exist) return;
- }
- else
- {
- if (coredynp.issueW>1)
- {
- idcl->power_t.reset();
- fdcl->power_t.reset();
- set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
- idcl->power_t = idcl->power * pppm_t;
- set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
- fdcl->power_t = fdcl->power * pppm_t;
- }
+ double ROB_duty_cycle;
+ ROB_duty_cycle = 1;
- }
+ if (int_instruction_selection) {
+ int_instruction_selection->computeEnergy();
+ }
- //assign value to tpd and rtp
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
- power = power + (iFRAT->power + fFRAT->power)
- + (iRRAT->power + fRRAT->power)
- + (ifreeL->power + ffreeL->power);
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- power = power + (iFRAT->power + fFRAT->power)
- + ifreeL->power;
- }
- }
- else
- {
- power = power + idcl->power_t + fdcl->power_t;
- }
+ if (fp_instruction_selection) {
+ fp_instruction_selection->computeEnergy();
+ }
+ if (int_inst_window) {
+ int_inst_window->tdp_stats.reset();
+ int_inst_window->rtp_stats.reset();
+ int_inst_window->power_t.reset();
+ int_inst_window->rt_power.reset();
+ if (core_params.core_ty == OOO) {
+ int_inst_window->tdp_stats.readAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.writeAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.searchAc.access =
+ core_params.issueW * core_params.num_pipelines;
+
+ int_inst_window->power_t.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->tdp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->tdp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->tdp_stats.writeAc.access;
+
+ int_inst_window->rtp_stats.readAc.access =
+ core_stats.inst_window_reads;
+ int_inst_window->rtp_stats.writeAc.access =
+ core_stats.inst_window_writes;
+ int_inst_window->rtp_stats.searchAc.access =
+ core_stats.inst_window_wakeup_accesses;
+
+ int_inst_window->rt_power.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->rtp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->rtp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->rtp_stats.writeAc.access;
+ } else if (core_params.multithreaded) {
+ int_inst_window->tdp_stats.readAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.writeAc.access =
+ core_params.issueW * core_params.num_pipelines;
+ int_inst_window->tdp_stats.searchAc.access =
+ core_params.issueW * core_params.num_pipelines;
+
+ int_inst_window->power_t.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->tdp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->tdp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->tdp_stats.writeAc.access;
+
+ int_inst_window->rtp_stats.readAc.access =
+ core_stats.int_instructions + core_stats.fp_instructions;
+ int_inst_window->rtp_stats.writeAc.access =
+ core_stats.int_instructions + core_stats.fp_instructions;
+ int_inst_window->rtp_stats.searchAc.access =
+ 2 * (core_stats.int_instructions + core_stats.fp_instructions);
+
+ int_inst_window->rt_power.readOp.dynamic +=
+ int_inst_window->local_result.power.readOp.dynamic *
+ int_inst_window->rtp_stats.readAc.access +
+ int_inst_window->local_result.power.searchOp.dynamic *
+ int_inst_window->rtp_stats.searchAc.access +
+ int_inst_window->local_result.power.writeOp.dynamic *
+ int_inst_window->rtp_stats.writeAc.access;
}
- else
- {
- if (coredynp.core_ty==OOO)
- {
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
- ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
- + (iRRAT->rt_power + fRRAT->rt_power)
- + (ifreeL->rt_power + ffreeL->rt_power);
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {
- iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
- fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
- ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
- + ifreeL->rt_power;
- }
- }
- else
- {
- rt_power = rt_power + idcl->power_t + fdcl->power_t;
- }
+ }
- }
-}
+ if (fp_inst_window) {
+ fp_inst_window->tdp_stats.reset();
+ fp_inst_window->tdp_stats.readAc.access =
+ fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines;
+ fp_inst_window->tdp_stats.writeAc.access =
+ fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines;
+ fp_inst_window->tdp_stats.searchAc.access =
+ fp_inst_window->l_ip.num_search_ports *
+ core_params.num_fp_pipelines;
+
+ fp_inst_window->rtp_stats.reset();
+ fp_inst_window->rtp_stats.readAc.access =
+ core_stats.fp_inst_window_reads;
+ fp_inst_window->rtp_stats.writeAc.access =
+ core_stats.fp_inst_window_writes;
+ fp_inst_window->rtp_stats.searchAc.access =
+ core_stats.fp_inst_window_wakeup_accesses;
+
+ fp_inst_window->power_t.reset();
+ fp_inst_window->power_t.readOp.dynamic +=
+ fp_inst_window->power.readOp.dynamic *
+ fp_inst_window->tdp_stats.readAc.access +
+ fp_inst_window->power.searchOp.dynamic *
+ fp_inst_window->tdp_stats.searchAc.access +
+ fp_inst_window->power.writeOp.dynamic *
+ fp_inst_window->tdp_stats.writeAc.access;
+
+ fp_inst_window->rt_power.reset();
+ fp_inst_window->rt_power.readOp.dynamic +=
+ fp_inst_window->power.readOp.dynamic *
+ fp_inst_window->rtp_stats.readAc.access +
+ fp_inst_window->power.searchOp.dynamic *
+ fp_inst_window->rtp_stats.searchAc.access +
+ fp_inst_window->power.writeOp.dynamic *
+ fp_inst_window->rtp_stats.writeAc.access;
+ }
-void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
-
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str<< "Int Front End RAT:" << endl;
- cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Front End RAT:" << endl;
- cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<<"Free List:" << endl;
- cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- cout << indent_str<< "Int Retire RAT: " << endl;
- cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Retire RAT:" << endl;
- cout << indent_str_next << "Area = " << fRRAT->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "FP Free List:" << endl;
- cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str<< "Int DCL:" << endl;
- cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout << indent_str<<"FP DCL:" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
- }
- }
- else
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str_next << "Int Front End RAT Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Int Front End RAT Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Front End RAT Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Free List Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Free List Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- cout << indent_str_next << "Int Retire RAT Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Int Retire RAT Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Retire RAT Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Free List Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Free List Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
- else
- {
- cout << indent_str_next << "Int DCL Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Int DCL Subthreshold Leakage = " << idcl->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP DCL Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ if (ROB) {
+ ROB->tdp_stats.reset();
+ ROB->tdp_stats.readAc.access = core_params.commitW *
+ core_params.num_pipelines * ROB_duty_cycle;
+ ROB->tdp_stats.writeAc.access = core_params.issueW *
+ core_params.num_pipelines * ROB_duty_cycle;
+ ROB->rtp_stats.reset();
+ ROB->rtp_stats.readAc.access = core_stats.ROB_reads;
+ ROB->rtp_stats.writeAc.access = core_stats.ROB_writes;
+ ROB->power_t.reset();
+ ROB->power_t.readOp.dynamic +=
+ ROB->local_result.power.readOp.dynamic *
+ ROB->tdp_stats.readAc.access +
+ ROB->local_result.power.writeOp.dynamic *
+ ROB->tdp_stats.writeAc.access;
+ ROB->rt_power.reset();
+ ROB->rt_power.readOp.dynamic +=
+ ROB->local_result.power.readOp.dynamic *
+ ROB->rtp_stats.readAc.access +
+ ROB->local_result.power.writeOp.dynamic *
+ ROB->rtp_stats.writeAc.access;
+ }
+
+ output_data.reset();
+ if (int_inst_window) {
+ int_inst_window->output_data.subthreshold_leakage_power =
+ int_inst_window->power_t.readOp.leakage;
+ int_inst_window->output_data.gate_leakage_power =
+ int_inst_window->power_t.readOp.gate_leakage;
+ int_inst_window->output_data.peak_dynamic_power =
+ int_inst_window->power_t.readOp.dynamic * clockRate;
+ int_inst_window->output_data.runtime_dynamic_energy =
+ int_inst_window->rt_power.readOp.dynamic;
+ output_data += int_inst_window->output_data;
+ }
+ if (fp_inst_window) {
+ fp_inst_window->output_data.subthreshold_leakage_power =
+ fp_inst_window->power_t.readOp.leakage;
+ fp_inst_window->output_data.gate_leakage_power =
+ fp_inst_window->power_t.readOp.gate_leakage;
+ fp_inst_window->output_data.peak_dynamic_power =
+ fp_inst_window->power_t.readOp.dynamic * clockRate;
+ fp_inst_window->output_data.runtime_dynamic_energy =
+ fp_inst_window->rt_power.readOp.dynamic;
+ output_data += fp_inst_window->output_data;
+ }
+ if (ROB) {
+ ROB->output_data.peak_dynamic_power =
+ ROB->power_t.readOp.dynamic * clockRate;
+ ROB->output_data.runtime_dynamic_energy =
+ ROB->rt_power.readOp.dynamic;
+ output_data += ROB->output_data;
+ }
+ // Integer and FP instruction selection logic is not included in the
+ // roll-up due to the uninitialized area
+ /*
+ if (int_instruction_selection) {
+ output_data += int_instruction_selection->output_data;
+ }
+ if (fp_instruction_selection) {
+ output_data += fp_instruction_selection->output_data;
+ }
+ */
}
+void SchedulerU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
-void SchedulerU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double ROB_duty_cycle;
-// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
-// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
-// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1;
- ROB_duty_cycle = 1;
- //init stats
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
- int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
- int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
- int_inst_window->tdp_stats = int_inst_window->stats_t;
- fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines;
- fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines;
- fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines;
- fp_inst_window->tdp_stats = fp_inst_window->stats_t;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle;
- ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle;
- ROB->tdp_stats = ROB->stats_t;
-
- /*
- * When inst commits, ROB must be read.
- * Because for Physcial register based cores, physical register tag in ROB
- * need to be read out and write into RRAT/CAM based RAT.
- * For RS based cores, register content that stored in ROB must be
- * read out and stored in architectural registers.
- *
- * if no-register is involved, the ROB read out operation when instruction commits can be ignored.
- * assuming 20% insts. belong this type.
- * TODO: ROB duty_cycle need to be revisited
- */
- }
+ McPATComponent::displayData(indent, plevel);
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
- int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
- int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
- int_inst_window->tdp_stats = int_inst_window->stats_t;
- }
+ if (core_params.core_ty == OOO) {
+ int_inst_window->displayData(indent + 4, plevel);
+ fp_inst_window->displayData(indent + 4, plevel);
+ if (core_params.ROB_size > 0) {
+ ROB->displayData(indent + 4, plevel);
+ }
+ } else if (core_params.multithreaded) {
+ int_inst_window->displayData(indent + 4, plevel);
+ }
- }
- else
- {//rtp
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads;
- int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes;
- int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses;
- int_inst_window->rtp_stats = int_inst_window->stats_t;
- fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads;
- fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes;
- fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses;
- fp_inst_window->rtp_stats = fp_inst_window->stats_t;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
-
- ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads;
- ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes;
- /* ROB need to be updated in RS based OOO when new values are produced,
- * this update may happen before the commit stage when ROB entry is released
- * 1. ROB write at instruction inserted in
- * 2. ROB write as results produced (for RS based OOO only)
- * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF
- * For Physical reg based OOO, no data stored in ROB, but register tags need to be
- * read out and used to set the RRAT and to recycle the register tag to free list buffer
- */
- ROB->rtp_stats = ROB->stats_t;
- }
+ // Integer and FP instruction selection logic is not included in the
+ // roll-up due to the uninitialized area
+ /*
+ if (int_instruction_selection) {
+ int_instruction_selection->displayData(indent + 4, plevel);
+ }
+ if (fp_instruction_selection) {
+ fp_instruction_selection->displayData(indent + 4, plevel);
+ }
+ */
+}
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
- int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
- int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions);
- int_inst_window->rtp_stats = int_inst_window->stats_t;
- }
+void LoadStoreU::computeEnergy() {
+ if (!exist) return;
+
+ LSQ->tdp_stats.reset();
+ LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LSQ->rtp_stats.reset();
+ // Flush overhead conidered
+ LSQ->rtp_stats.readAc.access = (core_stats.load_instructions +
+ core_stats.store_instructions) * 2;
+ LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions +
+ core_stats.store_instructions) * 2;
+ LSQ->power_t.reset();
+ //every memory access invloves at least two operations on LSQ
+ LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access *
+ (LSQ->local_result.power.searchOp.dynamic +
+ LSQ->local_result.power.readOp.dynamic) +
+ LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
+ LSQ->rt_power.reset();
+ //every memory access invloves at least two operations on LSQ
+ LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access *
+ (LSQ->local_result.power.searchOp.dynamic +
+ LSQ->local_result.power.readOp.dynamic) +
+ LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
+
+ if (LoadQ) {
+ LoadQ->tdp_stats.reset();
+ LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ LoadQ->rtp_stats.reset();
+ LoadQ->rtp_stats.readAc.access = core_stats.load_instructions +
+ core_stats.store_instructions;
+ LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions +
+ core_stats.store_instructions;
+ LoadQ->power_t.reset();
+ //every memory access invloves at least two operations on LoadQ
+ LoadQ->power_t.readOp.dynamic +=
+ LoadQ->tdp_stats.readAc.access *
+ (LoadQ->local_result.power.searchOp.dynamic +
+ LoadQ->local_result.power.readOp.dynamic) +
+ LoadQ->tdp_stats.writeAc.access *
+ LoadQ->local_result.power.writeOp.dynamic;
+ LoadQ->rt_power.reset();
+ //every memory access invloves at least two operations on LoadQ
+ LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access *
+ (LoadQ->local_result.power.searchOp.dynamic +
+ LoadQ->local_result.power.readOp.dynamic) +
+ LoadQ->rtp_stats.writeAc.access *
+ LoadQ->local_result.power.writeOp.dynamic;
}
- //computation engine
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->power_t.reset();
- fp_inst_window->power_t.reset();
-
- /* each instruction needs to write to scheduler, read out when all resources and source operands are ready
- * two search ops with one for each source operand
- *
- */
- int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
- + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
- + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
- + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic;
-
- fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access
- + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access
- + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access
- + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->power_t.reset();
- ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access +
- ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic;
- }
+ McPATComponent::computeEnergy();
+
+ output_data.reset();
+ if (dcache) {
+ output_data += dcache->output_data;
+ }
+ if (LSQ) {
+ LSQ->output_data.peak_dynamic_power =
+ LSQ->power_t.readOp.dynamic * clockRate;
+ LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic;
+ output_data += LSQ->output_data;
+ }
+ if (LoadQ) {
+ LoadQ->output_data.peak_dynamic_power =
+ LoadQ->power_t.readOp.dynamic * clockRate;
+ LoadQ->output_data.runtime_dynamic_energy =
+ LoadQ->rt_power.readOp.dynamic;
+ output_data += LoadQ->output_data;
+ }
+}
+void LoadStoreU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
+ McPATComponent::displayData(indent, plevel);
+ if (LoadQ) {
+ LoadQ->displayData(indent + 4, plevel);
+ }
+ LSQ->displayData(indent + 4, plevel);
- }
- else if (coredynp.multithreaded)
- {
- int_inst_window->power_t.reset();
- int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
- + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
- + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
- + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
- }
+}
- //assign values
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- power = power + int_inst_window->power + fp_inst_window->power;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg;
- power = power + ROB->power;
- }
+void MemManU::computeEnergy() {
+ if (!exist) return;
- }
- else if (coredynp.multithreaded)
- {
- // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
- int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- power = power + int_inst_window->power;
- }
+ itlb->tdp_stats.reset();
+ itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports;
+ itlb->tdp_stats.readAc.miss = 0;
+ itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access -
+ itlb->tdp_stats.readAc.miss;
+ itlb->rtp_stats.reset();
+ itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses;
+ itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses;
- }
- else
- {//rtp
- if (coredynp.core_ty==OOO)
- {
- int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg;
- rt_power = rt_power + ROB->rt_power;
- }
+ itlb->power_t.reset();
+ //FA spent most power in tag, so use total access not hits
+ itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access *
+ itlb->local_result.power.searchOp.dynamic +
+ itlb->tdp_stats.readAc.miss *
+ itlb->local_result.power.writeOp.dynamic;
+ itlb->rt_power.reset();
+ //FA spent most power in tag, so use total access not hits
+ itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access *
+ itlb->local_result.power.searchOp.dynamic +
+ itlb->rtp_stats.writeAc.access *
+ itlb->local_result.power.writeOp.dynamic;
+
+ dtlb->tdp_stats.reset();
+ dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports *
+ core_stats.LSU_duty_cycle;
+ dtlb->tdp_stats.readAc.miss = 0;
+ dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access -
+ dtlb->tdp_stats.readAc.miss;
+ dtlb->rtp_stats.reset();
+ dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses +
+ mem_man_stats.dtlb_write_misses;
+ dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses +
+ mem_man_stats.dtlb_read_misses;
- }
- else if (coredynp.multithreaded)
- {
- // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
- int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
- rt_power = rt_power + int_inst_window->rt_power;
- }
+ dtlb->power_t.reset();
+ //FA spent most power in tag, so use total access not hits
+ dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access *
+ dtlb->local_result.power.searchOp.dynamic +
+ dtlb->tdp_stats.readAc.miss *
+ dtlb->local_result.power.writeOp.dynamic;
+ dtlb->rt_power.reset();
+ //FA spent most power in tag, so use total access not hits
+ dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access *
+ dtlb->local_result.power.searchOp.dynamic +
+ dtlb->rtp_stats.writeAc.access *
+ dtlb->local_result.power.writeOp.dynamic;
+
+ output_data.reset();
+ if (itlb) {
+ itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic *
+ clockRate;
+ itlb->output_data.runtime_dynamic_energy =
+ itlb->rt_power.readOp.dynamic;
+ output_data += itlb->output_data;
+ }
+ if (dtlb) {
+ dtlb->output_data.peak_dynamic_power =
+ dtlb->power_t.readOp.dynamic * clockRate;
+ dtlb->output_data.runtime_dynamic_energy =
+ dtlb->rt_power.readOp.dynamic;
+ output_data += dtlb->output_data;
}
-// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
-// cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl;
-// cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access +
-// + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl;
-// cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl;
}
-void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str << "Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "FP Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- cout << indent_str<<"ROB:" << endl;
- cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else if (coredynp.multithreaded)
- {
- cout << indent_str << "Instruction Window:" << endl;
- cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- if (coredynp.core_ty==OOO)
- {
- cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
- else if (coredynp.multithreaded)
- {
- cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+void MemManU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
+
+ McPATComponent::displayData(indent, plevel);
+ itlb->displayData(indent + 4, plevel);
+ dtlb->displayData(indent + 4, plevel);
}
-void LoadStoreU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
- dcache.caches->stats_t.readAc.miss = 0;
- dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
- dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
- dcache.caches->stats_t.writeAc.miss = 0;
- dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
- dcache.caches->tdp_stats = dcache.caches->stats_t;
-
- dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports;
- dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports;
- dcache.missb->tdp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.ifb->tdp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports;
- dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
- dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t;
- if (cache_p==Write_back)
- {
- dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports;
- dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports;
- dcache.wbb->tdp_stats = dcache.wbb->stats_t;
- }
+void RegFU::computeEnergy() {
+ /*
+ * Architecture RF and physical RF cannot be present at the same time.
+ * Therefore, the RF stats can only refer to either ARF or PRF;
+ * And the same stats can be used for both.
+ */
+ if (!exist) return;
+
+ IRF->tdp_stats.reset();
+ IRF->tdp_stats.readAc.access =
+ core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS *
+ (core_stats.ALU_duty_cycle * 1.1 +
+ (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
+ core_params.num_pipelines;
+ IRF->tdp_stats.writeAc.access =
+ core_params.issueW *
+ (core_stats.ALU_duty_cycle * 1.1 +
+ (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
+ core_params.num_pipelines;
+ IRF->rtp_stats.reset();
+ IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads;
+ IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes;
+ if (core_params.regWindowing) {
+ IRF->rtp_stats.readAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ IRF->rtp_stats.writeAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ }
+ IRF->power_t.reset();
+ IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access *
+ IRF->local_result.power.readOp.dynamic +
+ IRF->tdp_stats.writeAc.access *
+ IRF->local_result.power.writeOp.dynamic;
+ IRF->rt_power.reset();
+ IRF->rt_power.readOp.dynamic +=
+ IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic +
+ IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic;
+
+ FRF->tdp_stats.reset();
+ FRF->tdp_stats.readAc.access =
+ FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 *
+ core_params.num_fp_pipelines;
+ FRF->tdp_stats.writeAc.access =
+ FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 *
+ core_params.num_fp_pipelines;
+ FRF->rtp_stats.reset();
+ FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads;
+ FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes;
+ if (core_params.regWindowing) {
+ FRF->rtp_stats.readAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ FRF->rtp_stats.writeAc.access += core_stats.function_calls *
+ RFWIN_ACCESS_MULTIPLIER;
+ }
+ FRF->power_t.reset();
+ FRF->power_t.readOp.dynamic +=
+ FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
+ FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
+ FRF->rt_power.reset();
+ FRF->rt_power.readOp.dynamic +=
+ FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
+ FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
+
+ if (core_params.regWindowing) {
+ RFWIN->tdp_stats.reset();
+ RFWIN->tdp_stats.readAc.access = 0;
+ RFWIN->tdp_stats.writeAc.access = 0;
+ RFWIN->rtp_stats.reset();
+ RFWIN->rtp_stats.readAc.access =
+ core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
+ RFWIN->rtp_stats.writeAc.access =
+ core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
+ RFWIN->power_t.reset();
+ RFWIN->power_t.readOp.dynamic +=
+ RFWIN->tdp_stats.readAc.access *
+ RFWIN->local_result.power.readOp.dynamic +
+ RFWIN->tdp_stats.writeAc.access *
+ RFWIN->local_result.power.writeOp.dynamic;
+ RFWIN->rt_power.reset();
+ RFWIN->rt_power.readOp.dynamic +=
+ RFWIN->rtp_stats.readAc.access *
+ RFWIN->local_result.power.readOp.dynamic +
+ RFWIN->rtp_stats.writeAc.access *
+ RFWIN->local_result.power.writeOp.dynamic;
+ }
- LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- LSQ->tdp_stats = LSQ->stats_t;
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- LoadQ->tdp_stats = LoadQ->stats_t;
- }
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses;
- dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses;
- dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
- dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses;
- dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses;
- dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
- dcache.caches->rtp_stats = dcache.caches->stats_t;
-
- if (cache_p==Write_back)
- {
- dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.missb->rtp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.ifb->rtp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
-
- dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
- dcache.wbb->rtp_stats = dcache.wbb->stats_t;
- }
- else
- {
- dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.missb->rtp_stats = dcache.missb->stats_t;
-
- dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.ifb->rtp_stats = dcache.ifb->stats_t;
-
- dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
- dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
- }
+ output_data.reset();
+ if (IRF) {
+ IRF->output_data.peak_dynamic_power =
+ IRF->power_t.readOp.dynamic * clockRate;
+ IRF->output_data.subthreshold_leakage_power *=
+ core_params.num_hthreads;
+ IRF->output_data.gate_leakage_power *= core_params.num_hthreads;
+ IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic;
+ output_data += IRF->output_data;
+ }
+ if (FRF) {
+ FRF->output_data.peak_dynamic_power =
+ FRF->power_t.readOp.dynamic * clockRate;
+ FRF->output_data.subthreshold_leakage_power *=
+ core_params.num_hthreads;
+ FRF->output_data.gate_leakage_power *= core_params.num_hthreads;
+ FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic;
+ output_data += FRF->output_data;
+ }
+ if (RFWIN) {
+ RFWIN->output_data.peak_dynamic_power =
+ RFWIN->power_t.readOp.dynamic * clockRate;
+ RFWIN->output_data.runtime_dynamic_energy =
+ RFWIN->rt_power.readOp.dynamic;
+ output_data += RFWIN->output_data;
+ }
+}
- LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered
- LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;
- LSQ->rtp_stats = LSQ->stats_t;
+void RegFU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
- LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
- LoadQ->rtp_stats = LoadQ->stats_t;
- }
+ McPATComponent::displayData(indent, plevel);
- }
+ IRF->displayData(indent + 4, plevel);
+ FRF->displayData(indent + 4, plevel);
+ if (core_params.regWindowing) {
+ RFWIN->displayData(indent + 4, plevel);
+ }
+}
- dcache.power_t.reset();
- LSQ->power_t.reset();
- dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+
- dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+
- dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+
- dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic);
+void EXECU::computeEnergy() {
+ if (!exist) return;
- if (cache_p==Write_back)
- {//write miss will generate a write later
- dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic;
- }
+ int_bypass->set_params_stats(core_params.execu_int_bypass_ports,
+ core_stats.ALU_cdb_duty_cycle,
+ core_stats.cdb_alu_accesses);
- dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic +
- dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic +
- dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic;
- dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic +
- dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic;
- if (cache_p==Write_back)
- {
- dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic
- + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic;
- }
+ intTagBypass->set_params_stats(core_params.execu_int_bypass_ports,
+ core_stats.ALU_cdb_duty_cycle,
+ core_stats.cdb_alu_accesses);
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->power_t.reset();
- LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+
- LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ
-
- LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
- + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
-
- }
- else
- {
- LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
- + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
-
- }
-
- if (is_tdp)
- {
-// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg +
-// (dcache.missb->local_result.power +
-// dcache.ifb->local_result.power +
-// dcache.prefetchb->local_result.power +
-// dcache.wbb->local_result.power)*pppm_Isub;
- dcache.power = dcache.power_t + (dcache.caches->local_result.power +
- dcache.missb->local_result.power +
- dcache.ifb->local_result.power +
- dcache.prefetchb->local_result.power) *pppm_lkg;
- if (cache_p==Write_back)
- {
- dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg;
- }
+ if (core_params.num_muls > 0) {
+ int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports,
+ core_stats.MUL_cdb_duty_cycle,
+ core_stats.cdb_mul_accesses);
- LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
- power = power + dcache.power + LSQ->power;
+ intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports,
+ core_stats.MUL_cdb_duty_cycle,
+ core_stats.cdb_mul_accesses);
+ }
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
- power = power + LoadQ->power;
- }
+ if (core_params.num_fpus > 0) {
+ fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports,
+ core_stats.FPU_cdb_duty_cycle,
+ core_stats.cdb_fpu_accesses);
+
+ fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports,
+ core_stats.FPU_cdb_duty_cycle,
+ core_stats.cdb_fpu_accesses);
}
- else
- {
-// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
-// dcache.missb->local_result.power +
-// dcache.ifb->local_result.power +
-// dcache.prefetchb->local_result.power +
-// dcache.wbb->local_result.power)*pppm_lkg;
- dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
- dcache.missb->local_result.power +
- dcache.ifb->local_result.power +
- dcache.prefetchb->local_result.power )*pppm_lkg;
-
- if (cache_p==Write_back)
- {
- dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg;
- }
- LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
- rt_power = rt_power + dcache.rt_power + LSQ->rt_power;
+ McPATComponent::computeEnergy();
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
- rt_power = rt_power + LoadQ->rt_power;
- }
+ if (rfu) {
+ rfu->computeEnergy();
+ output_data += rfu->output_data;
+ }
+ if (scheu) {
+ scheu->computeEnergy();
+ output_data += scheu->output_data;
+ }
+ if (fp_u) {
+ fp_u->computeEnergy();
+ output_data += fp_u->output_data;
+ }
+ if (exeu) {
+ exeu->computeEnergy();
+ output_data += exeu->output_data;
+ }
+ if (mul) {
+ mul->computeEnergy();
+ output_data += mul->output_data;
}
}
+void EXECU::displayData(uint32_t indent, int plevel) {
+ if (!exist) return;
-void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
- if (is_tdp)
- {
- cout << indent_str << "Data Cache:" << endl;
- cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.core_ty==Inorder)
- {
- cout << indent_str << "Load/Store Queue:" << endl;
- cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
-
- {
- if (XML->sys.core[ithCore].load_buffer_size >0)
- {
- cout << indent_str << "LoadQ:" << endl;
- cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- cout << indent_str<< "StoreQ:" << endl;
- cout << indent_str_next << "Area = " << LSQ->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str_next << "Data Cache Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Data Cache Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Data Cache Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.core_ty==Inorder)
- {
- cout << indent_str_next << "Load/Store Queue Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Load/Store Queue Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
- }
- else
- {
- cout << indent_str_next << "LoadQ Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "LoadQ Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "StoreQ Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ McPATComponent::displayData(indent, plevel);
+ rfu->displayData(indent + 4, plevel);
+ if (scheu) {
+ scheu->displayData(indent + 4, plevel);
+ }
+ exeu->displayData(indent + 4, plevel);
+ if (core_params.num_fpus > 0) {
+ fp_u->displayData(indent + 4, plevel);
+ }
+ if (core_params.num_muls > 0) {
+ mul->displayData(indent + 4, plevel);
+ }
}
-void MemManU::computeEnergy(bool is_tdp)
-{
-
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports;
- itlb->stats_t.readAc.miss = 0;
- itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
- itlb->tdp_stats = itlb->stats_t;
-
- dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
- dtlb->stats_t.readAc.miss = 0;
- dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
- dtlb->tdp_stats = dtlb->stats_t;
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses;
- itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses;
- itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
- itlb->rtp_stats = itlb->stats_t;
-
- dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses;
- dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses;
- dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
- dtlb->rtp_stats = dtlb->stats_t;
+void Core::computeEnergy() {
+ ifu->computeEnergy();
+ lsu->computeEnergy();
+ mmu->computeEnergy();
+ exu->computeEnergy();
+ if (core_params.core_ty == OOO) {
+ rnu->computeEnergy();
}
- itlb->power_t.reset();
- dtlb->power_t.reset();
- itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
- +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic;
- dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
- +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg;
- dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
- power = power + itlb->power + dtlb->power;
- }
- else
- {
- itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg;
- dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
- rt_power = rt_power + itlb->rt_power + dtlb->rt_power;
- }
+ output_data.reset();
+ if (ifu) {
+ output_data += ifu->output_data;
+ }
+ if (lsu) {
+ output_data += lsu->output_data;
+ }
+ if (mmu) {
+ output_data += mmu->output_data;
+ }
+ if (exu) {
+ output_data += exu->output_data;
+ }
+ if (rnu) {
+ output_data += rnu->output_data;
+ }
+ if (corepipe) {
+ output_data += corepipe->output_data;
+ }
+ if (undiffCore) {
+ output_data += undiffCore->output_data;
+ }
+ if (l2cache) {
+ output_data += l2cache->output_data;
+ }
}
-void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
-
-
- if (is_tdp)
- {
- cout << indent_str << "Itlb:" << endl;
- cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Dtlb:" << endl;
- cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str_next << "Itlb Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Dtlb Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage << " W" << endl;
- }
-
-}
+InstFetchU ::~InstFetchU() {
-void RegFU::computeEnergy(bool is_tdp)
-{
-/*
- * Architecture RF and physical RF cannot be present at the same time.
- * Therefore, the RF stats can only refer to either ARF or PRF;
- * And the same stats can be used for both.
- */
- if (!exist) return;
- if (is_tdp)
- {
- //init stats for Peak
- IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+
- (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
- IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+
- (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
- //Rule of Thumb: about 10% RF related instructions do not need to access ALUs
- IRF->tdp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
- FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
- FRF->tdp_stats = FRF->stats_t;
- if (coredynp.regWindowing)
- {
- RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
- RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
- RFWIN->tdp_stats = RFWIN->stats_t;
- }
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy
- IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes;
- IRF->rtp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads;
- FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes;
- FRF->rtp_stats = FRF->stats_t;
- if (coredynp.regWindowing)
- {
- RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16;
- RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16;
- RFWIN->rtp_stats = RFWIN->stats_t;
-
- IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads +
- XML->sys.core[ithCore].function_calls*16;
- IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes +
- XML->sys.core[ithCore].function_calls*16;
- IRF->rtp_stats = IRF->stats_t;
-
- FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads +
- XML->sys.core[ithCore].function_calls*16;;
- FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+
- XML->sys.core[ithCore].function_calls*16;;
- FRF->rtp_stats = FRF->stats_t;
- }
+ if (!exist) return;
+ if (IB) {
+ delete IB;
+ IB = NULL;
}
- IRF->power_t.reset();
- FRF->power_t.reset();
- IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic
- +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic);
- FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic
- +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic);
- if (coredynp.regWindowing)
- {
- RFWIN->power_t.reset();
- RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic +
- RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic);
- }
-
- if (is_tdp)
- {
- IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
- FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
- power = power + (IRF->power + FRF->power);
- if (coredynp.regWindowing)
- {
- RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
- power = power + RFWIN->power;
- }
+ if (ID_inst) {
+ delete ID_inst;
+ ID_inst = NULL;
+ }
+ if (ID_operand) {
+ delete ID_operand;
+ ID_operand = NULL;
+ }
+ if (ID_misc) {
+ delete ID_misc;
+ ID_misc = NULL;
+ }
+ if (core_params.predictionW > 0) {
+ if (BTB) {
+ delete BTB;
+ BTB = NULL;
}
- else
- {
- IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
- FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
- rt_power = rt_power + (IRF->power_t + FRF->power_t);
- if (coredynp.regWindowing)
- {
- RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
- rt_power = rt_power + RFWIN->rt_power;
- }
+ if (BPT) {
+ delete BPT;
+ BPT = NULL;
}
+ }
+ if (icache) {
+ delete icache;
+ }
}
+BranchPredictor ::~BranchPredictor() {
-void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- { cout << indent_str << "Integer RF:" << endl;
- cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Floating Point RF:" << endl;
- cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (coredynp.regWindowing)
- {
- cout << indent_str << "Register Windows:" << endl;
- cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << indent_str_next << "Integer RF Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Integer RF Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Floating Point RF Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " << FRF->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Floating Point RF Gate Leakage = " << FRF->rt_power.readOp.gate_leakage << " W" << endl;
- if (coredynp.regWindowing)
- {
- cout << indent_str_next << "Register Windows Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Register Windows Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Register Windows Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage << " W" << endl;
- }
- }
+ if (!exist) return;
+ if (globalBPT) {
+ delete globalBPT;
+ globalBPT = NULL;
+ }
+ if (localBPT) {
+ delete localBPT;
+ localBPT = NULL;
+ }
+ if (L1_localBPT) {
+ delete L1_localBPT;
+ L1_localBPT = NULL;
+ }
+ if (L2_localBPT) {
+ delete L2_localBPT;
+ L2_localBPT = NULL;
+ }
+ if (chooser) {
+ delete chooser;
+ chooser = NULL;
+ }
+ if (RAS) {
+ delete RAS;
+ RAS = NULL;
+ }
}
+RENAMINGU ::~RENAMINGU() {
-void EXECU::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double pppm_t[4] = {1,1,1,1};
-// rfu->power.reset();
-// rfu->rt_power.reset();
-// scheu->power.reset();
-// scheu->rt_power.reset();
-// exeu->power.reset();
-// exeu->rt_power.reset();
-
- rfu->computeEnergy(is_tdp);
- scheu->computeEnergy(is_tdp);
- exeu->computeEnergy(is_tdp);
- if (coredynp.num_fpus >0)
- {
- fp_u->computeEnergy(is_tdp);
- }
- if (coredynp.num_muls >0)
- {
- mul->computeEnergy(is_tdp);
- }
-
- if (is_tdp)
- {
- set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
- bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t;
- if (coredynp.num_muls >0)
- {
- set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
- bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
- power = power + mul->power;
- }
- if (coredynp.num_fpus>0)
- {
- set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction.
- bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ;
- power = power + fp_u->power;
- }
-
- power = power + rfu->power + exeu->power + bypass.power + scheu->power;
- }
- else
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses);
- bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t;
- bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t;
-
- if (coredynp.num_muls >0)
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction.
- bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
- rt_power = rt_power + mul->rt_power;
- }
-
- if (coredynp.num_fpus>0)
- {
- set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses);
- bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t;
- bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t;
- rt_power = rt_power + fp_u->rt_power;
- }
- rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power;
- }
+ if (!exist) return;
+ if (iFRAT) {
+ delete iFRAT;
+ iFRAT = NULL;
+ }
+ if (fFRAT) {
+ delete fFRAT;
+ fFRAT = NULL;
+ }
+ if (iRRAT) {
+ delete iRRAT;
+ iRRAT = NULL;
+ }
+ if (iFRAT) {
+ delete iFRAT;
+ iFRAT = NULL;
+ }
+ if (ifreeL) {
+ delete ifreeL;
+ ifreeL = NULL;
+ }
+ if (ffreeL) {
+ delete ffreeL;
+ ffreeL = NULL;
+ }
+ if (idcl) {
+ delete idcl;
+ idcl = NULL;
+ }
+ if (fdcl) {
+ delete fdcl;
+ fdcl = NULL;
+ }
+ if (RAHT) {
+ delete RAHT;
+ RAHT = NULL;
+ }
}
-void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-
-// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
- if (is_tdp)
- {
- cout << indent_str << "Register Files:" << endl;
- cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3){
- rfu->displayEnergy(indent+4,is_tdp);
- }
- cout << indent_str << "Instruction Scheduler:" << endl;
- cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel>3){
- scheu->displayEnergy(indent+4,is_tdp);
- }
- exeu->displayEnergy(indent,is_tdp);
- if (coredynp.num_fpus>0)
- {
- fp_u->displayEnergy(indent,is_tdp);
- }
- if (coredynp.num_muls >0)
- {
- mul->displayEnergy(indent,is_tdp);
- }
- cout << indent_str << "Results Broadcast Bus:" << endl;
- cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str_next << "Register Files Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Register Files Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Register Files Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " << scheu->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Instruction Sheduler Gate Leakage = " << scheu->rt_power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " << bypass.rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " << bypass.rt_power.readOp.gate_leakage << " W" << endl;
- }
+LoadStoreU ::~LoadStoreU() {
+ if (!exist) return;
+ if (LSQ) {
+ delete LSQ;
+ LSQ = NULL;
+ }
+ if (dcache) {
+ delete dcache;
+ dcache = NULL;
+ }
}
-void Core::computeEnergy(bool is_tdp)
-{
- //power_point_product_masks
- double pppm_t[4] = {1,1,1,1};
- double rtp_pipeline_coe;
- double num_units = 4.0;
- if (is_tdp)
- {
- ifu->computeEnergy(is_tdp);
- lsu->computeEnergy(is_tdp);
- mmu->computeEnergy(is_tdp);
- exu->computeEnergy(is_tdp);
-
- if (coredynp.core_ty==OOO)
- {
- num_units = 5.0;
- rnu->computeEnergy(is_tdp);
- set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- if (rnu->exist)
- {
- rnu->power = rnu->power + corepipe->power*pppm_t;
- power = power + rnu->power;
- }
- }
-
- if (ifu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
-// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
- ifu->power = ifu->power + corepipe->power*pppm_t;
-// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
-// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl;
- power = power + ifu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (lsu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- lsu->power = lsu->power + corepipe->power*pppm_t;
-// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + lsu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (exu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- exu->power = exu->power + corepipe->power*pppm_t;
-// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + exu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
- if (mmu->exist)
- {
- set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- mmu->power = mmu->power + corepipe->power*pppm_t;
-// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
- power = power + mmu->power;
-// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
- }
+MemManU ::~MemManU() {
- power = power + undiffCore->power;
+ if (!exist) return;
+ if (itlb) {
+ delete itlb;
+ itlb = NULL;
+ }
+ if (dtlb) {
+ delete dtlb;
+ dtlb = NULL;
+ }
+}
- if (XML->sys.Private_L2)
- {
+RegFU ::~RegFU() {
- l2cache->computeEnergy(is_tdp);
- set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1);
- //l2cache->power = l2cache->power*pppm_t;
- power = power + l2cache->power*pppm_t;
- }
- }
- else
- {
- ifu->computeEnergy(is_tdp);
- lsu->computeEnergy(is_tdp);
- mmu->computeEnergy(is_tdp);
- exu->computeEnergy(is_tdp);
- if (coredynp.core_ty==OOO)
- {
- num_units = 5.0;
- rnu->computeEnergy(is_tdp);
- set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- if (rnu->exist)
- {
- rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t;
-
- rt_power = rt_power + rnu->rt_power;
- }
- }
- else
- {
- if (XML->sys.homogeneous_cores==1)
- {
- rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
- }
- else
- {
- rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
- }
- set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
- }
+ if (!exist) return;
+ if (IRF) {
+ delete IRF;
+ IRF = NULL;
+ }
+ if (FRF) {
+ delete FRF;
+ FRF = NULL;
+ }
+ if (RFWIN) {
+ delete RFWIN;
+ RFWIN = NULL;
+ }
+}
- if (ifu->exist)
- {
- ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + ifu->rt_power ;
- }
- if (lsu->exist)
- {
- lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + lsu->rt_power;
- }
- if (exu->exist)
- {
- exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + exu->rt_power;
- }
- if (mmu->exist)
- {
- mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
- rt_power = rt_power + mmu->rt_power ;
- }
+SchedulerU ::~SchedulerU() {
- rt_power = rt_power + undiffCore->power;
-// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- if (XML->sys.Private_L2)
- {
+ if (!exist) return;
+ if (int_inst_window) {
+ delete int_inst_window;
+ int_inst_window = NULL;
+ }
+ if (fp_inst_window) {
+ delete int_inst_window;
+ int_inst_window = NULL;
+ }
+ if (ROB) {
+ delete ROB;
+ ROB = NULL;
+ }
+ if (int_instruction_selection) {
+ delete int_instruction_selection;
+ int_instruction_selection = NULL;
+ }
+ if (fp_instruction_selection) {
+ delete fp_instruction_selection;
+ fp_instruction_selection = NULL;
+ }
+}
- l2cache->computeEnergy(is_tdp);
- //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1);
- //l2cache->rt_power = l2cache->rt_power*pppm_t;
- rt_power = rt_power + l2cache->rt_power;
- }
- }
+EXECU ::~EXECU() {
+ if (!exist) return;
+ if (int_bypass) {
+ delete int_bypass;
+ int_bypass = NULL;
+ }
+ if (intTagBypass) {
+ delete intTagBypass;
+ intTagBypass = NULL;
+ }
+ if (int_mul_bypass) {
+ delete int_mul_bypass;
+ int_mul_bypass = NULL;
+ }
+ if (intTag_mul_Bypass) {
+ delete intTag_mul_Bypass;
+ intTag_mul_Bypass = NULL;
+ }
+ if (fp_bypass) {
+ delete fp_bypass;
+ fp_bypass = NULL;
+ }
+ if (fpTagBypass) {
+ delete fpTagBypass;
+ fpTagBypass = NULL;
+ }
+ if (fp_u) {
+ delete fp_u;
+ fp_u = NULL;
+ }
+ if (exeu) {
+ delete exeu;
+ exeu = NULL;
+ }
+ if (mul) {
+ delete mul;
+ mul = NULL;
+ }
+ if (rfu) {
+ delete rfu;
+ rfu = NULL;
+ }
+ if (scheu) {
+ delete scheu;
+ scheu = NULL;
+ }
}
-void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- if (is_tdp)
- {
- cout << "Core:" << endl;
- cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str << "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout<<endl;
- if (ifu->exist)
- {
- cout << indent_str << "Instruction Fetch Unit:" << endl;
- cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- ifu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
- cout << indent_str<< "Renaming Unit:" << endl;
- cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- rnu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
+Core::~Core() {
- }
- if (lsu->exist)
- {
- cout << indent_str<< "Load Store Unit:" << endl;
- cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- lsu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (mmu->exist)
- {
- cout << indent_str<< "Memory Management Unit:" << endl;
- cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- mmu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
- if (exu->exist)
- {
- cout << indent_str<< "Execution Unit:" << endl;
- cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- exu->displayEnergy(indent+4,plevel,is_tdp);
- }
- }
-// if (plevel >2)
-// {
-// if (undiffCore->exist)
-// {
-// cout << indent_str << "Undifferentiated Core" << endl;
-// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl;
-// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl;
-//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Subthreshold Leakage = "
-// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl;
-// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl;
-// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl;
-// cout <<endl;
-// }
-// }
- if (XML->sys.Private_L2)
- {
-
- l2cache->displayEnergy(4,is_tdp);
- }
-
- }
- else
- {
-// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
- }
-}
-InstFetchU ::~InstFetchU(){
-
- if (!exist) return;
- if(IB) {delete IB; IB = 0;}
- if(ID_inst) {delete ID_inst; ID_inst = 0;}
- if(ID_operand) {delete ID_operand; ID_operand = 0;}
- if(ID_misc) {delete ID_misc; ID_misc = 0;}
- if (coredynp.predictionW>0)
- {
- if(BTB) {delete BTB; BTB = 0;}
- if(BPT) {delete BPT; BPT = 0;}
- }
+ if (ifu) {
+ delete ifu;
+ ifu = NULL;
+ }
+ if (lsu) {
+ delete lsu;
+ lsu = NULL;
+ }
+ if (rnu) {
+ delete rnu;
+ rnu = NULL;
+ }
+ if (mmu) {
+ delete mmu;
+ mmu = NULL;
+ }
+ if (exu) {
+ delete exu;
+ exu = NULL;
+ }
+ if (corepipe) {
+ delete corepipe;
+ corepipe = NULL;
+ }
+ if (undiffCore) {
+ delete undiffCore;
+ undiffCore = NULL;
+ }
+ if (l2cache) {
+ delete l2cache;
+ l2cache = NULL;
+ }
}
-BranchPredictor ::~BranchPredictor(){
+void Core::initialize_params() {
+ memset(&core_params, 0, sizeof(CoreParameters));
+ core_params.peak_issueW = -1;
+ core_params.peak_commitW = -1;
+}
- if (!exist) return;
- if(globalBPT) {delete globalBPT; globalBPT = 0;}
- if(localBPT) {delete localBPT; localBPT = 0;}
- if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;}
- if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;}
- if(chooser) {delete chooser; chooser = 0;}
- if(RAS) {delete RAS; RAS = 0;}
- }
+void Core::initialize_stats() {
+ memset(&core_stats, 0, sizeof(CoreStatistics));
+ core_stats.IFU_duty_cycle = 1.0;
+ core_stats.ALU_duty_cycle = 1.0;
+ core_stats.FPU_duty_cycle = 1.0;
+ core_stats.MUL_duty_cycle = 1.0;
+ core_stats.ALU_cdb_duty_cycle = 1.0;
+ core_stats.FPU_cdb_duty_cycle = 1.0;
+ core_stats.MUL_cdb_duty_cycle = 1.0;
+ core_stats.pipeline_duty_cycle = 1.0;
+ core_stats.IFU_duty_cycle = 1.0;
+ core_stats.LSU_duty_cycle = 1.0;
+ core_stats.MemManU_D_duty_cycle = 1.0;
+ core_stats.MemManU_I_duty_cycle = 1.0;
+}
-RENAMINGU ::~RENAMINGU(){
-
- if (!exist) return;
- if(iFRAT ) {delete iFRAT; iFRAT = 0;}
- if(fFRAT ) {delete fFRAT; fFRAT =0;}
- if(iRRAT) {delete iRRAT; iRRAT = 0;}
- if(iFRAT) {delete iFRAT; iFRAT = 0;}
- if(ifreeL) {delete ifreeL;ifreeL= 0;}
- if(ffreeL) {delete ffreeL;ffreeL= 0;}
- if(idcl) {delete idcl; idcl = 0;}
- if(fdcl) {delete fdcl; fdcl = 0;}
- if(RAHT) {delete RAHT; RAHT = 0;}
+void Core::set_core_param() {
+ initialize_params();
+ initialize_stats();
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_STR_IF("name", name);
+ ASSIGN_INT_IF("opt_local", core_params.opt_local);
+ ASSIGN_FP_IF("clock_rate", core_params.clockRate);
+ ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
+ ASSIGN_INT_IF("opcode_width", core_params.opcode_width);
+ ASSIGN_INT_IF("x86", core_params.x86);
+ ASSIGN_INT_IF("Embedded", core_params.Embedded);
+ ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type);
+ ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length);
+ ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads);
+ ASSIGN_INT_IF("fetch_width", core_params.fetchW);
+ ASSIGN_INT_IF("decode_width", core_params.decodeW);
+ ASSIGN_INT_IF("issue_width", core_params.issueW);
+ ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW);
+ ASSIGN_INT_IF("commit_width", core_params.commitW);
+ ASSIGN_INT_IF("prediction_width", core_params.predictionW);
+ ASSIGN_INT_IF("ALU_per_core", core_params.num_alus);
+ ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus);
+ ASSIGN_INT_IF("MUL_per_core", core_params.num_muls);
+ ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW);
+ ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty,
+ Scheduler_type);
+ ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type);
+ ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size);
+ ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size);
+ ASSIGN_INT_IF("ROB_size", core_params.ROB_size);
+ ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc);
+ ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks);
+ ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width);
+ ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc);
+ ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks);
+ ASSIGN_INT_IF("register_window_size",
+ core_params.register_window_size);
+ ASSIGN_INT_IF("register_window_throughput",
+ core_params.register_window_throughput);
+ ASSIGN_INT_IF("register_window_latency",
+ core_params.register_window_latency);
+ ASSIGN_INT_IF("register_window_assoc",
+ core_params.register_window_assoc);
+ ASSIGN_INT_IF("register_window_nbanks",
+ core_params.register_window_nbanks);
+ ASSIGN_INT_IF("register_window_tag_width",
+ core_params.register_window_tag_width);
+ ASSIGN_INT_IF("register_window_rw_ports",
+ core_params.register_window_rw_ports);
+ ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size);
+ ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc);
+ ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks);
+ ASSIGN_INT_IF("phy_Regs_IRF_tag_width",
+ core_params.phy_Regs_IRF_tag_width);
+ ASSIGN_INT_IF("phy_Regs_IRF_rd_ports",
+ core_params.phy_Regs_IRF_rd_ports);
+ ASSIGN_INT_IF("phy_Regs_IRF_wr_ports",
+ core_params.phy_Regs_IRF_wr_ports);
+ ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size);
+ ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc);
+ ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks);
+ ASSIGN_INT_IF("phy_Regs_FRF_tag_width",
+ core_params.phy_Regs_FRF_tag_width);
+ ASSIGN_INT_IF("phy_Regs_FRF_rd_ports",
+ core_params.phy_Regs_FRF_rd_ports);
+ ASSIGN_INT_IF("phy_Regs_FRF_wr_ports",
+ core_params.phy_Regs_FRF_wr_ports);
+ ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks);
+ ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports);
+ ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks);
+ ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports);
+ ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks);
+ ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports);
+ ASSIGN_INT_IF("memory_ports", core_params.memory_ports);
+ ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size);
+ ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc);
+ ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks);
+ ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size);
+ ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc);
+ ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks);
+ ASSIGN_INT_IF("instruction_window_size",
+ core_params.instruction_window_size);
+ ASSIGN_INT_IF("fp_instruction_window_size",
+ core_params.fp_instruction_window_size);
+ ASSIGN_INT_IF("instruction_buffer_size",
+ core_params.instruction_buffer_size);
+ ASSIGN_INT_IF("instruction_buffer_assoc",
+ core_params.instruction_buffer_assoc);
+ ASSIGN_INT_IF("instruction_buffer_nbanks",
+ core_params.instruction_buffer_nbanks);
+ ASSIGN_INT_IF("instruction_buffer_tag_width",
+ core_params.instruction_buffer_tag_width);
+ ASSIGN_INT_IF("number_instruction_fetch_ports",
+ core_params.number_instruction_fetch_ports);
+ ASSIGN_INT_IF("RAS_size", core_params.RAS_size);
+ ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt,
+ Wire_type);
+ ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type);
+ ASSIGN_INT_IF("execu_int_bypass_ports",
+ core_params.execu_int_bypass_ports);
+ ASSIGN_INT_IF("execu_mul_bypass_ports",
+ core_params.execu_mul_bypass_ports);
+ ASSIGN_INT_IF("execu_fp_bypass_ports",
+ core_params.execu_fp_bypass_ports);
+ ASSIGN_ENUM_IF("execu_bypass_wire_type",
+ core_params.execu_bypass_wire_type, Wire_type);
+ ASSIGN_FP_IF("execu_bypass_base_width",
+ core_params.execu_bypass_base_width);
+ ASSIGN_FP_IF("execu_bypass_base_height",
+ core_params.execu_bypass_base_height);
+ ASSIGN_INT_IF("execu_bypass_start_wiring_level",
+ core_params.execu_bypass_start_wiring_level);
+ ASSIGN_FP_IF("execu_bypass_route_over_perc",
+ core_params.execu_bypass_route_over_perc);
+ ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator);
+ ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages);
+ ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages);
+ ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines);
+ ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines);
+ ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint);
+ ASSIGN_INT_IF("perThreadState", core_params.perThreadState);
+ ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
+
+ else {
+ warnUnrecognizedParam(node_name);
}
+ }
-LoadStoreU ::~LoadStoreU(){
-
- if (!exist) return;
- if(LSQ) {delete LSQ; LSQ = 0;}
+ // Change from MHz to Hz
+ core_params.clockRate *= 1e6;
+ clockRate = core_params.clockRate;
+
+ core_params.peak_commitW = core_params.peak_issueW;
+ core_params.fp_decodeW = core_params.fp_issueW;
+
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle);
+ ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle);
+ ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle);
+ ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle);
+ ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle);
+ ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle);
+ ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle);
+ ASSIGN_FP_IF("total_cycles", core_stats.total_cycles);
+ ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles);
+ ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles);
+ ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle);
+ ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle);
+ ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle);
+ ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle);
+ ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle);
+ ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses);
+ ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses);
+ ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses);
+ ASSIGN_FP_IF("function_calls", core_stats.function_calls);
+ ASSIGN_FP_IF("total_instructions", core_stats.total_instructions);
+ ASSIGN_FP_IF("int_instructions", core_stats.int_instructions);
+ ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions);
+ ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions);
+ ASSIGN_FP_IF("branch_mispredictions",
+ core_stats.branch_mispredictions);
+ ASSIGN_FP_IF("load_instructions", core_stats.load_instructions);
+ ASSIGN_FP_IF("store_instructions", core_stats.store_instructions);
+ ASSIGN_FP_IF("committed_instructions",
+ core_stats.committed_instructions);
+ ASSIGN_FP_IF("committed_int_instructions",
+ core_stats.committed_int_instructions);
+ ASSIGN_FP_IF("committed_fp_instructions",
+ core_stats.committed_fp_instructions);
+ ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads);
+ ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes);
+ ASSIGN_FP_IF("rename_reads", core_stats.rename_reads);
+ ASSIGN_FP_IF("rename_writes", core_stats.rename_writes);
+ ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads);
+ ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes);
+ ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads);
+ ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes);
+ ASSIGN_FP_IF("inst_window_wakeup_accesses",
+ core_stats.inst_window_wakeup_accesses);
+ ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads);
+ ASSIGN_FP_IF("fp_inst_window_writes",
+ core_stats.fp_inst_window_writes);
+ ASSIGN_FP_IF("fp_inst_window_wakeup_accesses",
+ core_stats.fp_inst_window_wakeup_accesses);
+ ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads);
+ ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads);
+ ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes);
+ ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes);
+ ASSIGN_FP_IF("context_switches", core_stats.context_switches);
+ ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses);
+ ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses);
+ ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses);
+
+ else {
+ warnUnrecognizedStat(node_name);
}
+ }
-MemManU ::~MemManU(){
+ // Initialize a few variables
+ core_params.multithreaded = core_params.num_hthreads > 1 ? true : false;
+ core_params.pc_width = virtual_address_width;
+ core_params.v_address_width = virtual_address_width;
+ core_params.p_address_width = physical_address_width;
+ core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32;
+ core_params.fp_data_width = core_params.int_data_width;
+ core_params.arch_ireg_width =
+ int(ceil(log2(core_params.archi_Regs_IRF_size)));
+ core_params.arch_freg_width
+ = int(ceil(log2(core_params.archi_Regs_FRF_size)));
+ core_params.num_IRF_entry = core_params.archi_Regs_IRF_size;
+ core_params.num_FRF_entry = core_params.archi_Regs_FRF_size;
+
+ if (core_params.instruction_length <= 0) {
+ errorNonPositiveParam("instruction_length");
+ }
- if (!exist) return;
- if(itlb) {delete itlb; itlb = 0;}
- if(dtlb) {delete dtlb; dtlb = 0;}
- }
+ if (core_params.num_hthreads <= 0) {
+ errorNonPositiveParam("number_hardware_threads");
+ }
-RegFU ::~RegFU(){
+ if (core_params.opcode_width <= 0) {
+ errorNonPositiveParam("opcode_width");
+ }
- if (!exist) return;
- if(IRF) {delete IRF; IRF = 0;}
- if(FRF) {delete FRF; FRF = 0;}
- if(RFWIN) {delete RFWIN; RFWIN = 0;}
- }
+ if (core_params.instruction_buffer_size <= 0) {
+ errorNonPositiveParam("instruction_buffer_size");
+ }
-SchedulerU ::~SchedulerU(){
+ if (core_params.number_instruction_fetch_ports <= 0) {
+ errorNonPositiveParam("number_instruction_fetch_ports");
+ }
- if (!exist) return;
- if(int_inst_window) {delete int_inst_window; int_inst_window = 0;}
- if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;}
- if(ROB) {delete ROB; ROB = 0;}
- if(instruction_selection) {delete instruction_selection;instruction_selection = 0;}
- }
+ if (core_params.peak_issueW <= 0) {
+ errorNonPositiveParam("peak_issue_width");
+ } else {
+ assert(core_params.peak_commitW > 0);
+ }
-EXECU ::~EXECU(){
-
- if (!exist) return;
- if(int_bypass) {delete int_bypass; int_bypass = 0;}
- if(intTagBypass) {delete intTagBypass; intTagBypass =0;}
- if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;}
- if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;}
- if(fp_bypass) {delete fp_bypass;fp_bypass = 0;}
- if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;}
- if(fp_u) {delete fp_u;fp_u = 0;}
- if(exeu) {delete exeu;exeu = 0;}
- if(mul) {delete mul;mul = 0;}
- if(rfu) {delete rfu;rfu = 0;}
- if(scheu) {delete scheu; scheu = 0;}
+ if (core_params.core_ty == OOO) {
+ if (core_params.scheu_ty == PhysicalRegFile) {
+ core_params.phy_ireg_width =
+ int(ceil(log2(core_params.phy_Regs_IRF_size)));
+ core_params.phy_freg_width =
+ int(ceil(log2(core_params.phy_Regs_FRF_size)));
+ core_params.num_ifreelist_entries =
+ core_params.num_IRF_entry = core_params.phy_Regs_IRF_size;
+ core_params.num_ffreelist_entries =
+ core_params.num_FRF_entry = core_params.phy_Regs_FRF_size;
+ } else if (core_params.scheu_ty == ReservationStation) {
+ core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size)));
+ core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size)));
+ core_params.num_ifreelist_entries = core_params.ROB_size;
+ core_params.num_ffreelist_entries = core_params.ROB_size;
}
+ }
-Core ::~Core(){
+ core_params.regWindowing =
+ (core_params.register_window_size > 0 &&
+ core_params.core_ty == Inorder) ? true : false;
- if(ifu) {delete ifu; ifu = 0;}
- if(lsu) {delete lsu; lsu = 0;}
- if(rnu) {delete rnu; rnu = 0;}
- if(mmu) {delete mmu; mmu = 0;}
- if(exu) {delete exu; exu = 0;}
- if(corepipe) {delete corepipe; corepipe = 0;}
- if(undiffCore) {delete undiffCore;undiffCore = 0;}
- if(l2cache) {delete l2cache;l2cache = 0;}
+ if (core_params.regWindowing) {
+ if (core_params.register_window_throughput <= 0) {
+ errorNonPositiveParam("register_window_throughput");
+ } else if (core_params.register_window_latency <= 0) {
+ errorNonPositiveParam("register_window_latency");
}
+ }
-void Core::set_core_param()
-{
- coredynp.opt_local = XML->sys.core[ithCore].opt_local;
- coredynp.x86 = XML->sys.core[ithCore].x86;
- coredynp.Embedded = XML->sys.Embedded;
- coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type;
- coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme;
- coredynp.fetchW = XML->sys.core[ithCore].fetch_width;
- coredynp.decodeW = XML->sys.core[ithCore].decode_width;
- coredynp.issueW = XML->sys.core[ithCore].issue_width;
- coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width;
- coredynp.commitW = XML->sys.core[ithCore].commit_width;
- coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width;
- coredynp.predictionW = XML->sys.core[ithCore].prediction_width;
- coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width;
- coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width;
- coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core;
- coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core;
- coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core;
-
-
- coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads;
- coredynp.multithreaded = coredynp.num_hthreads>1? true:false;
- coredynp.instruction_length = XML->sys.core[ithCore].instruction_length;
- coredynp.pc_width = XML->sys.virtual_address_width;
-
- coredynp.opcode_length = XML->sys.core[ithCore].opcode_width;
- coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width;
- coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0];
- coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0];
- coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1];
- coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1];
- coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32;
- coredynp.fp_data_width = coredynp.int_data_width;
- coredynp.v_address_width = XML->sys.virtual_address_width;
- coredynp.p_address_width = XML->sys.physical_address_width;
-
- coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme;
- coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size)));
- coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size)));
- coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size;
- coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size;
- coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle;
- coredynp.total_cycles = XML->sys.core[ithCore].total_cycles;
- coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles;
- coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles;
-
- //Max power duty cycle for peak power estimation
-// if (coredynp.core_ty==OOO)
-// {
-// coredynp.IFU_duty_cycle = 1;
-// coredynp.LSU_duty_cycle = 1;
-// coredynp.MemManU_I_duty_cycle =1;
-// coredynp.MemManU_D_duty_cycle =1;
-// coredynp.ALU_duty_cycle =1;
-// coredynp.MUL_duty_cycle =1;
-// coredynp.FPU_duty_cycle =1;
-// coredynp.ALU_cdb_duty_cycle =1;
-// coredynp.MUL_cdb_duty_cycle =1;
-// coredynp.FPU_cdb_duty_cycle =1;
-// }
-// else
-// {
- coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle;
- coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle;
- coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle;
- coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle;
- coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle;
- coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle;
- coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle;
- coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle;
- coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle;
- coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle;
- coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle;
-// }
-
-
- if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder)))
- {
- cout<<"Invalid Core Type"<<endl;
- exit(0);
- }
-// if (coredynp.core_ty==OOO)
-// {
-// cout<<"OOO processor models are being updated and will be available in next release"<<endl;
-// exit(0);
-// }
- if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation)))
- {
- cout<<"Invalid OOO Scheduler Type"<<endl;
- exit(0);
- }
+ set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads,
+ core_params.num_hthreads, 0);
- if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased)))
- {
- cout<<"Invalid OOO Renaming Type"<<endl;
- exit(0);
- }
+ if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) {
+ cout << "Invalid Core Type" << endl;
+ exit(0);
+ }
-if (coredynp.core_ty==OOO)
-{
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size)));
- coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size)));
- coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size;
- coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size;
- }
- else if (coredynp.scheu_ty==ReservationStation)
- {//ROB serves as Phy RF in RS based OOO
- coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
- coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
- coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size;
- coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size;
+ if (!((core_params.scheu_ty == PhysicalRegFile) ||
+ (core_params.scheu_ty == ReservationStation))) {
+ cout << "Invalid OOO Scheduler Type" << endl;
+ exit(0);
+ }
- }
+ if (!((core_params.rm_ty == RAMbased) ||
+ (core_params.rm_ty == CAMbased))) {
+ cout << "Invalid OOO Renaming Type" << endl;
+ exit(0);
+ }
}
- coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference.
- coredynp.perThreadState = 8;
- coredynp.instruction_length = 32;
- coredynp.clockRate = XML->sys.core[ithCore].clock_rate;
- coredynp.clockRate *= 1e6;
- coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false;
- coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate;
- set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0);
-}
diff --git a/ext/mcpat/core.h b/ext/mcpat/core.h
index 8ef3babdd..206fe6d58 100644
--- a/ext/mcpat/core.h
+++ b/ext/mcpat/core.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -33,230 +34,305 @@
#ifndef CORE_H_
#define CORE_H_
-#include "XML_Parse.h"
#include "array.h"
#include "basic_components.h"
+#include "cacheunit.h"
#include "interconnect.h"
#include "logic.h"
#include "parameter.h"
-#include "sharedcache.h"
-
-class BranchPredictor :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- ArrayST * globalBPT;
- ArrayST * localBPT;
- ArrayST * L1_localBPT;
- ArrayST * L2_localBPT;
- ArrayST * chooser;
- ArrayST * RAS;
- bool exist;
-
- BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~BranchPredictor();
+
+// Macros used in the various core-related classes
+#define NUM_SOURCE_OPERANDS 2
+#define NUM_INT_INST_SOURCE_OPERANDS 2
+
+class BranchPredictorParameters {
+public:
+ int assoc;
+ int nbanks;
+ int local_l1_predictor_size;
+ int local_l2_predictor_size;
+ int local_predictor_entries;
+ int global_predictor_bits;
+ int global_predictor_entries;
+ int chooser_predictor_bits;
+ int chooser_predictor_entries;
+};
+
+class BranchPredictor : public McPATComponent {
+public:
+ ArrayST* globalBPT;
+ ArrayST* localBPT;
+ ArrayST* L1_localBPT;
+ ArrayST* L2_localBPT;
+ ArrayST* chooser;
+ ArrayST* RAS;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ BranchPredictorParameters branch_pred_params;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ bool exist;
+
+ BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exsit = true);
+ void set_params_stats();
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~BranchPredictor();
+};
+
+class InstFetchParameters {
+public:
+ int btb_size;
+ int btb_block_size;
+ int btb_assoc;
+ int btb_num_banks;
+ int btb_latency;
+ int btb_throughput;
+ int btb_rw_ports;
};
+class InstFetchStatistics {
+public:
+ double btb_read_accesses;
+ double btb_write_accesses;
+};
+
+class InstFetchU : public McPATComponent {
+public:
+ CacheUnit* icache;
+ ArrayST* IB;
+ ArrayST* BTB;
+ BranchPredictor* BPT;
+ InstructionDecoder* ID_inst;
+ InstructionDecoder* ID_operand;
+ InstructionDecoder* ID_misc;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ InstFetchParameters inst_fetch_params;
+ InstFetchStatistics inst_fetch_stats;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ enum Cache_policy cache_p;
+ bool exist;
+
+ InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exsit = true);
+ void set_params_stats();
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~InstFetchU();
+};
+
+
+class SchedulerU : public McPATComponent {
+public:
+ static int ROB_STATUS_BITS;
-class InstFetchU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- enum Cache_policy cache_p;
- InstCache icache;
- ArrayST * IB;
- ArrayST * BTB;
- BranchPredictor * BPT;
- inst_decoder * ID_inst;
- inst_decoder * ID_operand;
- inst_decoder * ID_misc;
- bool exist;
-
- InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~InstFetchU();
+ ArrayST* int_inst_window;
+ ArrayST* fp_inst_window;
+ ArrayST* ROB;
+ selection_logic* int_instruction_selection;
+ selection_logic* fp_instruction_selection;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ double Iw_height, fp_Iw_height, ROB_height;
+ bool exist;
+
+ SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_ = true);
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~SchedulerU();
};
+class RENAMINGU : public McPATComponent {
+public:
+ ArrayST* iFRAT;
+ ArrayST* fFRAT;
+ ArrayST* iRRAT;
+ ArrayST* fRRAT;
+ ArrayST* ifreeL;
+ ArrayST* ffreeL;
+ dep_resource_conflict_check* idcl;
+ dep_resource_conflict_check* fdcl;
+ ArrayST* RAHT;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ bool exist;
-class SchedulerU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- double Iw_height, fp_Iw_height,ROB_height;
- ArrayST * int_inst_window;
- ArrayST * fp_inst_window;
- ArrayST * ROB;
- selection_logic * instruction_selection;
+ RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_ = true);
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~RENAMINGU();
+};
+
+class LoadStoreU : public McPATComponent {
+public:
+ CacheUnit* dcache;
+ ArrayST* LSQ;
+ ArrayST* LoadQ;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ enum Cache_policy cache_p;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ double lsq_height;
bool exist;
- SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~SchedulerU();
+ LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_ = true);
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~LoadStoreU();
};
-class RENAMINGU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- double clockRate,executionTime;
- CoreDynParam coredynp;
- ArrayST * iFRAT;
- ArrayST * fFRAT;
- ArrayST * iRRAT;
- ArrayST * fRRAT;
- ArrayST * ifreeL;
- ArrayST * ffreeL;
- dep_resource_conflict_check * idcl;
- dep_resource_conflict_check * fdcl;
- ArrayST * RAHT;//register alias history table Used to store GC
- bool exist;
-
-
- RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~RENAMINGU();
+class MemoryManagementParams {
+public:
+ int itlb_number_entries;
+ double itlb_latency;
+ double itlb_throughput;
+ int itlb_assoc;
+ int itlb_nbanks;
+ int dtlb_number_entries;
+ double dtlb_latency;
+ double dtlb_throughput;
+ int dtlb_assoc;
+ int dtlb_nbanks;
};
-class LoadStoreU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- enum Cache_policy cache_p;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- double lsq_height;
- DataCache dcache;
- ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
- ArrayST * LoadQ;
- bool exist;
-
- LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~LoadStoreU();
+class MemoryManagementStats {
+public:
+ double itlb_total_accesses;
+ double itlb_total_misses;
+ double itlb_conflicts;
+ double dtlb_read_accesses;
+ double dtlb_read_misses;
+ double dtlb_write_accesses;
+ double dtlb_write_misses;
+ double dtlb_conflicts;
};
-class MemManU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- ArrayST * itlb;
- ArrayST * dtlb;
- bool exist;
-
- MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~MemManU();
+class MemManU : public McPATComponent {
+public:
+ ArrayST* itlb;
+ ArrayST* dtlb;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ MemoryManagementParams mem_man_params;
+ MemoryManagementStats mem_man_stats;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ bool exist;
+
+ MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_ = true);
+ void set_params_stats();
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~MemManU();
};
-class RegFU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- double int_regfile_height, fp_regfile_height;
- ArrayST * IRF;
- ArrayST * FRF;
- ArrayST * RFWIN;
- bool exist;
-
- RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~RegFU();
+class RegFU : public McPATComponent {
+public:
+ static int RFWIN_ACCESS_MULTIPLIER;
+
+ ArrayST* IRF;
+ ArrayST* FRF;
+ ArrayST* RFWIN;
+
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ double int_regfile_height, fp_regfile_height;
+ bool exist;
+
+ RegFU(XMLNode* _xml_data,
+ InputParameter* interface_ip_, const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ bool exist_ = true);
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~RegFU();
};
-class EXECU :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- double lsq_height;
- CoreDynParam coredynp;
- RegFU * rfu;
- SchedulerU * scheu;
- FunctionalUnit * fp_u;
- FunctionalUnit * exeu;
- FunctionalUnit * mul;
- interconnect * int_bypass;
- interconnect * intTagBypass;
- interconnect * int_mul_bypass;
- interconnect * intTag_mul_Bypass;
- interconnect * fp_bypass;
- interconnect * fpTagBypass;
-
- Component bypass;
- bool exist;
-
- EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~EXECU();
+class EXECU : public McPATComponent {
+public:
+ RegFU* rfu;
+ SchedulerU* scheu;
+ FunctionalUnit* fp_u;
+ FunctionalUnit* exeu;
+ FunctionalUnit* mul;
+ Interconnect* int_bypass;
+ Interconnect* intTagBypass;
+ Interconnect* int_mul_bypass;
+ Interconnect* intTag_mul_Bypass;
+ Interconnect* fp_bypass;
+ Interconnect* fpTagBypass;
+
+ InputParameter interface_ip;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ double lsq_height;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ bool exist;
+
+ EXECU(XMLNode* _xml_data, InputParameter* interface_ip_,
+ double lsq_height_, const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, bool exist_ = true);
+ void computeEnergy();
+ void displayData(uint32_t indent = 0, int plevel = 100);
+ ~EXECU();
};
-class Core :public Component {
- public:
-
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- InstFetchU * ifu;
- LoadStoreU * lsu;
- MemManU * mmu;
- EXECU * exu;
- RENAMINGU * rnu;
- Pipeline * corepipe;
- UndiffCore * undiffCore;
- SharedCache * l2cache;
- CoreDynParam coredynp;
- //full_decoder inst_decoder;
- //clock_network clockNetwork;
- Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_);
- void set_core_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~Core();
+class Core : public McPATComponent {
+public:
+ InstFetchU* ifu;
+ LoadStoreU* lsu;
+ MemManU* mmu;
+ EXECU* exu;
+ RENAMINGU* rnu;
+ Pipeline* corepipe;
+ UndiffCore* undiffCore;
+ CacheUnit* l2cache;
+
+ int ithCore;
+ InputParameter interface_ip;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+
+ // TODO: Migrate component ID handling into the XML data to remove this
+ // ithCore variable
+ Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_);
+ void initialize_params();
+ void initialize_stats();
+ void set_core_param();
+ void computeEnergy();
+ ~Core();
};
#endif /* CORE_H_ */
diff --git a/ext/mcpat/interconnect.cc b/ext/mcpat/interconnect.cc
index ba502b6a8..98fbc3e54 100644
--- a/ext/mcpat/interconnect.cc
+++ b/ext/mcpat/interconnect.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -33,173 +34,178 @@
#include <cassert>
#include <iostream>
-#include "globalvar.h"
+#include "basic_components.h"
#include "interconnect.h"
#include "wire.h"
-interconnect::interconnect(
- string name_,
- enum Device_ty device_ty_,
- double base_w, double base_h,
- int data_w, double len,const InputParameter *configure_interface,
- int start_wiring_level_,
- bool pipelinable_ ,
- double route_over_perc_ ,
- bool opt_local_,
- enum Core_type core_ty_,
- enum Wire_type wire_model,
- double width_s, double space_s,
- TechnologyParameter::DeviceType *dt
-)
- :name(name_),
- device_ty(device_ty_),
- in_rise_time(0),
- out_rise_time(0),
- base_width(base_w),
- base_height(base_h),
- data_width(data_w),
- wt(wire_model),
- width_scaling(width_s),
- space_scaling(space_s),
- start_wiring_level(start_wiring_level_),
- length(len),
- //interconnect_latency(1e-12),
- //interconnect_throughput(1e-12),
- opt_local(opt_local_),
- core_ty(core_ty_),
- pipelinable(pipelinable_),
- route_over_perc(route_over_perc_),
- deviceType(dt)
-{
-
- wt = Global;
- l_ip=*configure_interface;
- local_result = init_interface(&l_ip);
-
-
- max_unpipelined_link_delay = 0; //TODO
- min_w_nmos = g_tp.min_w_nmos_;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
-
-
-
- latency = l_ip.latency;
- throughput = l_ip.throughput;
- latency_overflow=false;
- throughput_overflow=false;
-
- /*
- * TODO: Add wiring option from semi-global to global automatically
- * And directly jump to global if semi-global cannot satisfy timing
- * Fat wires only available for global wires, thus
- * if signal wiring layer starts from semi-global,
- * the next layer up will be global, i.e., semi-global does
- * not have fat wires.
- */
- if (pipelinable == false)
- //Non-pipelinable wires, such as bypass logic, care latency
- {
- compute();
- if (opt_for_clk && opt_local)
- {
- while (delay > latency && width_scaling<3.0)
- {
- width_scaling *= 2;
- space_scaling *= 2;
- Wire winit(width_scaling, space_scaling);
- compute();
- }
- if (delay > latency)
- {
- latency_overflow=true;
- }
- }
- }
- else //Pipelinable wires, such as bus, does not care latency but throughput
- {
- /*
- * TODO: Add pipe regs power, area, and timing;
- * Pipelinable wires optimize latency first.
- */
- compute();
- if (opt_for_clk && opt_local)
- {
- while (delay > throughput && width_scaling<3.0)
- {
- width_scaling *= 2;
- space_scaling *= 2;
- Wire winit(width_scaling, space_scaling);
- compute();
- }
- if (delay > throughput)
- // insert pipeline stages
- {
- num_pipe_stages = (int)ceil(delay/throughput);
- assert(num_pipe_stages>0);
- delay = delay/num_pipe_stages + num_pipe_stages*0.05*delay;
- }
- }
- }
+double Interconnect::width_scaling_threshold = 3.0;
+
+Interconnect::Interconnect(XMLNode* _xml_data, string name_,
+ enum Device_ty device_ty_, double base_w,
+ double base_h, int data_w,
+ double len,
+ const InputParameter *configure_interface,
+ int start_wiring_level_, double _clockRate,
+ bool pipelinable_, double route_over_perc_,
+ bool opt_local_, enum Core_type core_ty_,
+ enum Wire_type wire_model,
+ double width_s, double space_s,
+ TechnologyParameter::DeviceType *dt)
+ : McPATComponent(_xml_data), device_ty(device_ty_), in_rise_time(0),
+ out_rise_time(0), base_width(base_w), base_height(base_h),
+ data_width(data_w), wt(wire_model), width_scaling(width_s),
+ space_scaling(space_s), start_wiring_level(start_wiring_level_),
+ length(len), opt_local(opt_local_), core_ty(core_ty_),
+ pipelinable(pipelinable_), route_over_perc(route_over_perc_),
+ deviceType(dt) {
+ name = name_;
+ clockRate = _clockRate;
+ l_ip = *configure_interface;
+ local_result = init_interface(&l_ip, name);
+
+ max_unpipelined_link_delay = 0;
+ min_w_nmos = g_tp.min_w_nmos_;
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
+
+
+
+ latency = l_ip.latency;
+ throughput = l_ip.throughput;
+ latency_overflow = false;
+ throughput_overflow = false;
+
+ if (pipelinable == false) {
+ //Non-pipelinable wires, such as bypass logic, care latency
+ calcWireData();
+ if (opt_for_clk && opt_local) {
+ while (delay > latency &&
+ width_scaling < width_scaling_threshold) {
+ width_scaling *= 2;
+ space_scaling *= 2;
+ Wire winit(width_scaling, space_scaling);
+ calcWireData();
+ }
+ if (delay > latency) {
+ latency_overflow = true;
+ }
+ }
+ } else {
+ //Pipelinable wires, such as bus, does not care latency but throughput
+ calcWireData();
+ if (opt_for_clk && opt_local) {
+ while (delay > throughput &&
+ width_scaling < width_scaling_threshold) {
+ width_scaling *= 2;
+ space_scaling *= 2;
+ Wire winit(width_scaling, space_scaling);
+ calcWireData();
+ }
+ if (delay > throughput) {
+ // insert pipeline stages
+ num_pipe_stages = (int)ceil(delay / throughput);
+ assert(num_pipe_stages > 0);
+ delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay;
+ }
+ }
+ }
+
+ power_bit = power;
+ power.readOp.dynamic *= data_width;
+ power.readOp.leakage *= data_width;
+ power.readOp.gate_leakage *= data_width;
+ area.set_area(area.get_area()*data_width);
+ no_device_under_wire_area.h *= data_width;
+
+ if (latency_overflow == true) {
+ cout << "Warning: " << name
+ << " wire structure cannot satisfy latency constraint." << endl;
+ }
+
+ assert(power.readOp.dynamic > 0);
+ assert(power.readOp.leakage > 0);
+ assert(power.readOp.gate_leakage > 0);
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, core_ty);
+
+ double sckRation = g_tp.sckt_co_eff;
+ power.readOp.dynamic *= sckRation;
+ power.writeOp.dynamic *= sckRation;
+ power.searchOp.dynamic *= sckRation;
+
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+
+ //Only global wires has the option to choose whether routing over or not
+ if (pipelinable)
+ area.set_area(area.get_area() * route_over_perc +
+ no_device_under_wire_area.get_area() *
+ (1 - route_over_perc));
+
+ Wire wreset();
+}
- power_bit = power;
- power.readOp.dynamic *= data_width;
- power.readOp.leakage *= data_width;
- power.readOp.gate_leakage *= data_width;
- area.set_area(area.get_area()*data_width);
- no_device_under_wire_area.h *= data_width;
- if (latency_overflow==true)
- cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl;
+void
+Interconnect::calcWireData() {
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
+ Wire *wtemp1 = 0;
+ wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
+ delay = wtemp1->delay;
+ power.readOp.dynamic = wtemp1->power.readOp.dynamic;
+ power.readOp.leakage = wtemp1->power.readOp.leakage;
+ power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage;
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
+ area.set_area(wtemp1->area.get_area());
+ no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing);
+ no_device_under_wire_area.w = length;
- double sckRation = g_tp.sckt_co_eff;
- power.readOp.dynamic *= sckRation;
- power.writeOp.dynamic *= sckRation;
- power.searchOp.dynamic *= sckRation;
+ if (wtemp1)
+ delete wtemp1;
- power.readOp.longer_channel_leakage =
- power.readOp.leakage*long_channel_device_reduction;
-
- if (pipelinable)//Only global wires has the option to choose whether routing over or not
- area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc));
-
- Wire wreset();
}
-
-
void
-interconnect::compute()
-{
-
- Wire *wtemp1 = 0;
- wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
- delay = wtemp1->delay;
- power.readOp.dynamic = wtemp1->power.readOp.dynamic;
- power.readOp.leakage = wtemp1->power.readOp.leakage;
- power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage;
-
- area.set_area(wtemp1->area.get_area());
- no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing);
- no_device_under_wire_area.w = length;
+Interconnect::computeEnergy() {
+ double pppm_t[4] = {1, 1, 1, 1};
+
+ // Compute TDP
+ power_t.reset();
+ set_pppm(pppm_t, int_params.active_ports * int_stats.duty_cycle,
+ int_params.active_ports, int_params.active_ports,
+ int_params.active_ports * int_stats.duty_cycle);
+ power_t = power * pppm_t;
+
+ rt_power.reset();
+ set_pppm(pppm_t, int_stats.accesses, int_params.active_ports,
+ int_params.active_ports, int_stats.accesses);
+ rt_power = power * pppm_t;
+
+ output_data.peak_dynamic_power = power_t.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power = power_t.readOp.leakage;
+ output_data.gate_leakage_power = power_t.readOp.gate_leakage;
+ output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
+}
- if (wtemp1)
- delete wtemp1;
+void
+Interconnect::computeArea() {
+ output_data.area = area.get_area() / 1e6;
+}
+void
+Interconnect::set_params_stats(double active_ports,
+ double duty_cycle, double accesses) {
+ int_params.active_ports = active_ports;
+ int_stats.duty_cycle = duty_cycle;
+ int_stats.accesses = accesses;
}
-void interconnect::leakage_feedback(double temperature)
-{
+void Interconnect::leakage_feedback(double temperature) {
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
- uca_org_t init_result = init_interface(&l_ip); // init_result is dummy
+ uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
- compute();
+ calcWireData();
power_bit = power;
power.readOp.dynamic *= data_width;
@@ -210,13 +216,15 @@ void interconnect::leakage_feedback(double temperature)
assert(power.readOp.leakage > 0);
assert(power.readOp.gate_leakage > 0);
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty,core_ty);
double sckRation = g_tp.sckt_co_eff;
power.readOp.dynamic *= sckRation;
power.writeOp.dynamic *= sckRation;
power.searchOp.dynamic *= sckRation;
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage*long_channel_device_reduction;
}
diff --git a/ext/mcpat/interconnect.h b/ext/mcpat/interconnect.h
index 4cf42dafd..2ae39c5a2 100644
--- a/ext/mcpat/interconnect.h
+++ b/ext/mcpat/interconnect.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -42,46 +43,31 @@
#include "subarray.h"
#include "wire.h"
-// leakge power includes entire htree in a bank (when uca_tree == false)
-// leakge power includes only part to one bank when uca_tree == true
+class InterconnectParameters {
+public:
+ double active_ports;
+};
-class interconnect : public Component
-{
- public:
- interconnect(
- string name_,
- enum Device_ty device_ty_,
- double base_w, double base_h, int data_w, double len,
- const InputParameter *configure_interface, int start_wiring_level_,
- bool pipelinable_ = false,
- double route_over_perc_ =0.5,
- bool opt_local_=true,
- enum Core_type core_ty_=Inorder,
- enum Wire_type wire_model=Global,
- double width_s=1.0, double space_s=1.0,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- );
+class InterconnectStatistics {
+public:
+ double duty_cycle;
+ double accesses;
+};
- ~interconnect() {};
+class Interconnect : public McPATComponent {
+public:
+ static double width_scaling_threshold;
- void compute();
- string name;
- enum Device_ty device_ty;
+ enum Device_ty device_ty;
double in_rise_time, out_rise_time;
- InputParameter l_ip;
- uca_org_t local_result;
+ InputParameter l_ip;
+ uca_org_t local_result;
Area no_device_under_wire_area;
- void set_in_rise_time(double rt)
- {
- in_rise_time = rt;
- }
-
- void leakage_feedback(double temperature);
double max_unpipelined_link_delay;
powerDef power_bit;
double wire_bw;
- double init_wire_bw; // bus width at root
+ double init_wire_bw;
double base_width;
double base_height;
int data_width;
@@ -92,19 +78,39 @@ class interconnect : public Component
double min_w_nmos;
double min_w_pmos;
double latency, throughput;
- bool latency_overflow;
- bool throughput_overflow;
- double interconnect_latency;
- double interconnect_throughput;
+ bool latency_overflow;
+ bool throughput_overflow;
+ double interconnect_latency;
+ double interconnect_throughput;
bool opt_local;
enum Core_type core_ty;
bool pipelinable;
double route_over_perc;
- int num_pipe_stages;
-
- private:
- TechnologyParameter::DeviceType *deviceType;
+ int num_pipe_stages;
+ TechnologyParameter::DeviceType* deviceType;
+ InterconnectParameters int_params;
+ InterconnectStatistics int_stats;
+ Interconnect(XMLNode* _xml_data, string name_,
+ enum Device_ty device_ty_, double base_w,
+ double base_h, int data_w, double len,
+ const InputParameter *configure_interface,
+ int start_wiring_level_,
+ double _clockRate = 0.0f,
+ bool pipelinable_ = false, double route_over_perc_ = 0.5,
+ bool opt_local_ = true, enum Core_type core_ty_ = Inorder,
+ enum Wire_type wire_model = Global, double width_s = 1.0,
+ double space_s = 1.0,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+private:
+ void calcWireData();
+public:
+ void computeArea();
+ void computeEnergy();
+ void set_params_stats(double active_ports,
+ double duty_cycle, double accesses);
+ void leakage_feedback(double temperature);
+ ~Interconnect() {};
};
#endif
diff --git a/ext/mcpat/iocontrollers.cc b/ext/mcpat/iocontrollers.cc
index 70b0f2dcb..4a175d841 100644
--- a/ext/mcpat/iocontrollers.cc
+++ b/ext/mcpat/iocontrollers.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <algorithm>
@@ -34,14 +35,12 @@
#include <iostream>
#include <string>
-#include "XML_Parse.h"
#include "basic_circuit.h"
-#include "basic_components.h"
+#include "common.h"
#include "const.h"
#include "io.h"
#include "iocontrollers.h"
#include "logic.h"
-#include "parameter.h"
/*
SUN Niagara 2 I/O power analysis:
@@ -69,378 +68,473 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F
*
*/
-NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_)
-:XML(XML_interface),
- interface_ip(*interface_ip_)
- {
- local_result = init_interface(&interface_ip);
-
- double frontend_area, phy_area, mac_area, SerDer_area;
- double frontend_dyn, mac_dyn, SerDer_dyn;
- double frontend_gates, mac_gates, SerDer_gates;
- double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- double NMOS_sizing, PMOS_sizing;
-
- set_niu_param();
-
- if (niup.type == 0) //high performance NIU
- {
- //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm.
- mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
- //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS
- frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
- //SerDer is very hard to scale
- SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
- phy_area = frontend_area + SerDer_area;
- //total area
- area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
- //Power
- //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
- mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
- //Cadence ChipEstimate using 65nm soft IP;
- frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
- //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
- //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
- SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
- SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
-
- //Cadence ChipEstimate using 65nm
- mac_gates = 111700;
- frontend_gates = 320000;
- SerDer_gates = 200000;
- NMOS_sizing = 5*g_tp.min_w_nmos_;
- PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
-
- }
- else
- {//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect
- // ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not
- // a simple summation of all IPs. Ignore this effect
- mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer
- SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique"
- //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology
- //total area
- area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
- //Power
- //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
- mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
- //Cadence ChipEstimate using 65nm soft IP;
- frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
- //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
- SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
- SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
-
- mac_gates = 111700;
- frontend_gates = 52000;
- SerDer_gates = 199260;
-
- NMOS_sizing = g_tp.min_w_nmos_;
- PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
- }
-
- power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
- power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
- power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
- power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
-
-void NIUController::computeEnergy(bool is_tdp)
-{
- if (is_tdp)
- {
+NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
+ : McPATComponent(_xml_data, interface_ip_) {
+ name = "NIU";
+ set_niu_param();
+}
+void NIUController::computeArea() {
+ double mac_area;
+ double frontend_area;
+ double SerDer_area;
+
+ if (niup.type == 0) { //high performance NIU
+ //Area estimation based on average of die photo from Niagara 2 and
+ //Cadence ChipEstimate using 65nm.
+ mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ //Area estimation based on average of die photo from Niagara 2, ISSCC
+ //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
+ //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
+ //With Robust VCO Tuning Technique" Frontend is PCS
+ frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
+ (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
+ //Area estimation based on average of die photo from Niagara 2 and
+ //Cadence ChipEstimate hard IP @65nm.
+ //SerDer is very hard to scale
+ SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
+ 0.065);//* (interface_ip.F_sz_um/0.065);
+ } else {
+ //Low power implementations are mostly from Cadence ChipEstimator;
+ //Ignore the multiple IP effect
+ // ---When there are multiple IP (same kind or not) selected, Cadence
+ //ChipEstimator results are not a simple summation of all IPs.
+ //Ignore this effect
+ mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
+ SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um/0.065);
+ //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
+ //Transceiver and XAUI Interface With Robust VCO Tuning Technique"
+ //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
+ //scale perfectly with the technology
+ }
- power = power_t;
- power.readOp.dynamic *= niup.duty_cycle;
+ //total area
+ output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
+ }
+void NIUController::computeEnergy() {
+ double mac_dyn;
+ double frontend_dyn;
+ double SerDer_dyn;
+ double frontend_gates;
+ double mac_gates;
+ double SerDer_gates;
+ double NMOS_sizing;
+ double PMOS_sizing;
+ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+
+ if (niup.type == 0) { //high performance NIU
+ //Power
+ //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
+ //E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
+ //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
+ mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
+ 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
+ //Cadence ChipEstimate using 65nm soft IP;
+ frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
+ g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+ //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
+ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+ SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
+ g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+
+ //Cadence ChipEstimate using 65nm
+ mac_gates = 111700;
+ frontend_gates = 320000;
+ SerDer_gates = 200000;
+ NMOS_sizing = 5 * g_tp.min_w_nmos_;
+ PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+ } else {
+ //Power
+ //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
+ ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
+ //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
+ mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
+ / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
+ //Cadence ChipEstimate using 65nm soft IP;
+ frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
+ g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+ //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
+ SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
+ g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+
+ mac_gates = 111700;
+ frontend_gates = 52000;
+ SerDer_gates = 199260;
+ NMOS_sizing = g_tp.min_w_nmos_;
+ PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
}
- else
- {
- rt_power = power_t;
- rt_power.readOp.dynamic *= niup.perc_load;
- }
+
+ //covert to energy per clock cycle of whole NIU
+ SerDer_dyn /= niup.clockRate;
+
+ power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
+ power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Uncore_device);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+ power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+
+ // Output power
+ output_data.subthreshold_leakage_power =
+ longer_channel_device ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+ output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
+ output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
}
-void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- {
- cout << "NIU:" << endl;
- cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl;
- cout << indent_str<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl;
- cout<<endl;
- }
- else
- {
+void NIUController::set_niu_param() {
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
- }
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
-}
+ ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
+ ASSIGN_INT_IF("num_units", niup.num_units);
+ ASSIGN_INT_IF("type", niup.type);
-void NIUController::set_niu_param()
-{
- niup.clockRate = XML->sys.niu.clockrate;
- niup.clockRate *= 1e6;
- niup.num_units = XML->sys.niu.number_units;
- niup.duty_cycle = XML->sys.niu.duty_cycle;
- niup.perc_load = XML->sys.niu.total_load_perc;
- niup.type = XML->sys.niu.type;
-// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
-}
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
-PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_)
-:XML(XML_interface),
- interface_ip(*interface_ip_)
- {
- local_result = init_interface(&interface_ip);
- double frontend_area, phy_area, ctrl_area, SerDer_area;
- double ctrl_dyn, frontend_dyn, SerDer_dyn;
- double ctrl_gates,frontend_gates, SerDer_gates;
- double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- double NMOS_sizing, PMOS_sizing;
-
- /* Assuming PCIe is bit-slice based architecture
- * This is the reason for /8 in both area and power calculation
- * to get per lane numbers
- */
-
- set_pcie_param();
- if (pciep.type == 0) //high performance NIU
- {
- //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm.
- ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
- frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
- //SerDer is very hard to scale
- SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
- phy_area = frontend_area + SerDer_area;
- //total area
- //Power
- //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
- ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
- // //Cadence ChipEstimate using 65nm soft IP;
- // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
- //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
- SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
- SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
-
- //power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels;
- //Cadence ChipEstimate using 65nm
- ctrl_gates = 900000/8*pciep.num_channels;
- // frontend_gates = 120000/8;
- // SerDer_gates = 200000/8;
- NMOS_sizing = 5*g_tp.min_w_nmos_;
- PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
- }
- else
- {
- ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
- SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //total area
- //Power
- //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
- ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
- // //Cadence ChipEstimate using 65nm soft IP;
- // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
- //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
- SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
- SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
-
- //Cadence ChipEstimate using 65nm
- ctrl_gates = 200000/8*pciep.num_channels;
- // frontend_gates = 120000/8;
- SerDer_gates = 200000/8*pciep.num_channels;
- NMOS_sizing = g_tp.min_w_nmos_;
- PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
- }
- area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6);
- power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels;
- power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
- power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
- power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
+ // Change from MHz to Hz
+ niup.clockRate *= 1e6;
-void PCIeController::computeEnergy(bool is_tdp)
-{
- if (is_tdp)
- {
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
- power = power_t;
- power.readOp.dynamic *= pciep.duty_cycle;
+ ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
+ ASSIGN_FP_IF("perc_load", nius.perc_load);
- }
- else
- {
- rt_power = power_t;
- rt_power.readOp.dynamic *= pciep.perc_load;
+ else {
+ warnUnrecognizedStat(node_name);
+ }
}
}
-void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- {
- cout << "PCIe:" << endl;
- cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl;
- cout << indent_str<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl;
- cout<<endl;
- }
- else
- {
+PCIeController::PCIeController(XMLNode* _xml_data,
+ InputParameter* interface_ip_)
+ : McPATComponent(_xml_data, interface_ip_) {
+ name = "PCIe";
+ set_pcie_param();
+}
- }
+void PCIeController::computeArea() {
+ double ctrl_area;
+ double SerDer_area;
+
+ /* Assuming PCIe is bit-slice based architecture
+ * This is the reason for /8 in both area and power calculation
+ * to get per lane numbers
+ */
+
+ if (pciep.type == 0) { //high performance PCIe
+ //Area estimation based on average of die photo from Niagara 2 and
+ //Cadence ChipEstimate @ 65nm.
+ ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ //Area estimation based on average of die photo from Niagara 2 and
+ //Cadence ChipEstimate hard IP @65nm.
+ //SerDer is very hard to scale
+ SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
+ 0.065);//* (interface_ip.F_sz_um/0.065);
+ } else {
+ ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ //Area estimation based on average of die photo from Niagara 2, and
+ //Cadence ChipEstimate @ 65nm.
+ SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ }
+ // Total area
+ output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
+ pciep.num_channels) * 1e6;
}
-void PCIeController::set_pcie_param()
-{
- pciep.clockRate = XML->sys.pcie.clockrate;
- pciep.clockRate *= 1e6;
- pciep.num_units = XML->sys.pcie.number_units;
- pciep.num_channels = XML->sys.pcie.num_channels;
- pciep.duty_cycle = XML->sys.pcie.duty_cycle;
- pciep.perc_load = XML->sys.pcie.total_load_perc;
- pciep.type = XML->sys.pcie.type;
- pciep.withPHY = XML->sys.pcie.withPHY;
-// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
+void PCIeController::computeEnergy() {
+ double ctrl_dyn;
+ double SerDer_dyn;
+ double ctrl_gates;
+ double SerDer_gates = 0;
+ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ double NMOS_sizing;
+ double PMOS_sizing;
+
+ /* Assuming PCIe is bit-slice based architecture
+ * This is the reason for /8 in both area and power calculation
+ * to get per lane numbers
+ */
+
+ if (pciep.type == 0) { //high performance PCIe
+ //Power
+ //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
+ ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
+ g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+ // //Cadence ChipEstimate using 65nm soft IP;
+ // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
+ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+ //PCIe 2.0 max per lane speed is 4Gb/s
+ SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
+ g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
+
+ //Cadence ChipEstimate using 65nm
+ ctrl_gates = 900000 / 8 * pciep.num_channels;
+ // frontend_gates = 120000/8;
+ // SerDer_gates = 200000/8;
+ NMOS_sizing = 5 * g_tp.min_w_nmos_;
+ PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+ } else {
+ //Power
+ //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
+ ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
+ g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
+ // //Cadence ChipEstimate using 65nm soft IP;
+ // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
+ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+ //PCIe 2.0 max per lane speed is 4Gb/s
+ SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
+ g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
+
+ //Cadence ChipEstimate using 65nm
+ ctrl_gates = 200000 / 8 * pciep.num_channels;
+ // frontend_gates = 120000/8;
+ SerDer_gates = 200000 / 8 * pciep.num_channels;
+ NMOS_sizing = g_tp.min_w_nmos_;
+ PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+ }
+
+ //covert to energy per clock cycle
+ SerDer_dyn /= pciep.clockRate;
+
+ power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
+ pciep.num_channels;
+ power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Uncore_device);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+ power.readOp.gate_leakage = (ctrl_gates +
+ (pciep.withPHY ? SerDer_gates : 0)) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+
+ // Output power
+ output_data.subthreshold_leakage_power =
+ longer_channel_device ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+ output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
+ output_data.runtime_dynamic_energy =
+ power.readOp.dynamic * pcies.perc_load;
}
-FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_)
-:XML(XML_interface),
- interface_ip(*interface_ip_)
- {
- local_result = init_interface(&interface_ip);
- double frontend_area, phy_area, ctrl_area, SerDer_area;
- double ctrl_dyn, frontend_dyn, SerDer_dyn;
- double ctrl_gates,frontend_gates, SerDer_gates;
- double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- double NMOS_sizing, PMOS_sizing;
-
- /* Assuming PCIe is bit-slice based architecture
- * This is the reason for /8 in both area and power calculation
- * to get per lane numbers
- */
-
- set_fc_param();
- if (fcp.type == 0) //high performance NIU
- {
- cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<<endl;
- exit(0);
- NMOS_sizing = 5*g_tp.min_w_nmos_;
- PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
- }
- else
- {
- ctrl_area = 0.243 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from CAST
- SerDer_area = 0.36/8 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
- //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support 8x lanes with each lane
- //speed up to 250MB/s (PCIe1.1x) This is already saturate the 200MB/s of the flash controller core above.
- ctrl_gates = 129267;
- SerDer_gates = 200000/8;
- NMOS_sizing = g_tp.min_w_nmos_;
- PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-
- //Power
- //Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s This is power not energy!
- ctrl_dyn = 0.125*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
- //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
- SerDer_dyn = 0.01*1.6*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
- //max Per controller speed is 1.6Gb/s (200MB/s)
- }
- double number_channel = 1+(fcp.num_channels-1)*0.2;
- area.set_area((ctrl_area + (fcp.withPHY? SerDer_area:0))*1e6*number_channel);
- power_t.readOp.dynamic = (ctrl_dyn + (fcp.withPHY? SerDer_dyn:0))*number_channel;
- power_t.readOp.leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
- power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
- power_t.readOp.gate_leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
+void PCIeController::set_pcie_param() {
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
+ ASSIGN_INT_IF("num_units", pciep.num_units);
+ ASSIGN_INT_IF("num_channels", pciep.num_channels);
+ ASSIGN_INT_IF("type", pciep.type);
+ ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
-void FlashController::computeEnergy(bool is_tdp)
-{
- if (is_tdp)
- {
+ // Change from MHz to Hz
+ pciep.clockRate *= 1e6;
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
- power = power_t;
- power.readOp.dynamic *= fcp.duty_cycle;
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
- }
- else
- {
- rt_power = power_t;
- rt_power.readOp.dynamic *= fcp.perc_load;
+ ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
+ ASSIGN_FP_IF("perc_load", pcies.perc_load);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
}
}
-void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- {
- cout << "Flash Controller:" << endl;
- cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already
- cout << indent_str<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
- cout<<endl;
- }
- else
- {
+FlashController::FlashController(XMLNode* _xml_data,
+ InputParameter* interface_ip_)
+ : McPATComponent(_xml_data, interface_ip_) {
+ name = "Flash Controller";
+ set_fc_param();
+}
- }
+void FlashController::computeArea() {
+ double ctrl_area;
+ double SerDer_area;
+
+ /* Assuming Flash is bit-slice based architecture
+ * This is the reason for /8 in both area and power calculation
+ * to get per lane numbers
+ */
+
+ if (fcp.type == 0) { //high performance flash controller
+ cout << "Current McPAT does not support high performance flash "
+ << "controller since even low power designs are enough for "
+ << "maintain throughput" <<endl;
+ exit(0);
+ } else {
+ ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
+ //from CAST
+ SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
+ (interface_ip.F_sz_um / 0.065);
+ }
+
+ double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
+ output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
+ 1e6 * number_channel;
+}
+void FlashController::computeEnergy() {
+ double ctrl_dyn;
+ double SerDer_dyn;
+ double ctrl_gates;
+ double SerDer_gates;
+ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ double NMOS_sizing;
+ double PMOS_sizing;
+
+ /* Assuming Flash is bit-slice based architecture
+ * This is the reason for /8 in both area and power calculation
+ * to get per lane numbers
+ */
+
+ if (fcp.type == 0) { //high performance flash controller
+ cout << "Current McPAT does not support high performance flash "
+ << "controller since even low power designs are enough for "
+ << "maintain throughput" <<endl;
+ exit(0);
+ NMOS_sizing = 5 * g_tp.min_w_nmos_;
+ PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+ } else {
+ //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
+ //support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
+ //This is already saturate the 200MB/s of the flash controller core
+ //above.
+ ctrl_gates = 129267;
+ SerDer_gates = 200000 / 8;
+ NMOS_sizing = g_tp.min_w_nmos_;
+ PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+
+ //Power
+ //Cadence ChipEstimate using 65nm the controller 125mW for every
+ //200MB/s This is power not energy!
+ ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
+ 1.1 * (interface_ip.F_sz_nm / 65.0);
+ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
+ SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
+ g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+ //max Per controller speed is 1.6Gb/s (200MB/s)
+ }
+
+ double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
+ power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
+ number_channel;
+ power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
+ number_channel) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Uncore_device);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+ power.readOp.gate_leakage =
+ ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+
+ // Output power
+ output_data.subthreshold_leakage_power =
+ longer_channel_device ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+ output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
+ output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;
}
void FlashController::set_fc_param()
{
-// fcp.clockRate = XML->sys.flashc.mc_clock;
-// fcp.clockRate *= 1e6;
- fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
- fcp.num_channels = ceil(fcp.peakDataTransferRate/200);
- fcp.num_mcs = XML->sys.flashc.number_mcs;
- fcp.duty_cycle = XML->sys.flashc.duty_cycle;
- fcp.perc_load = XML->sys.flashc.total_load_perc;
- fcp.type = XML->sys.flashc.type;
- fcp.withPHY = XML->sys.flashc.withPHY;
-// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("num_channels", fcp.num_channels);
+ ASSIGN_INT_IF("type", fcp.type);
+ ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+ ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
+ ASSIGN_FP_IF("perc_load", fcs.perc_load);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
}
diff --git a/ext/mcpat/iocontrollers.h b/ext/mcpat/iocontrollers.h
index 818580abb..39cfb0eb3 100644
--- a/ext/mcpat/iocontrollers.h
+++ b/ext/mcpat/iocontrollers.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,63 +26,52 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef IOCONTROLLERS_H_
#define IOCONTROLLERS_H_
-
-#endif /* IOCONTROLLERS_H_ */
-
-#include "XML_Parse.h"
-#include "parameter.h"
-//#include "io.h"
-#include "array.h"
-//#include "Undifferentiated_Core_Area.h"
#include <vector>
+#include "array.h"
#include "basic_components.h"
+#include "parameter.h"
-class NIUController : public Component {
+class NIUController : public McPATComponent {
public:
- ParseXML *XML;
- InputParameter interface_ip;
- NIUParam niup;
- powerDef power_t;
- uca_org_t local_result;
- NIUController(ParseXML *XML_interface,InputParameter* interface_ip_);
+ NIUParameters niup;
+ NIUStatistics nius;
+
+ NIUController(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_niu_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
+ void computeArea();
+ void computeEnergy();
~NIUController(){};
};
-class PCIeController : public Component {
+class PCIeController : public McPATComponent {
public:
- ParseXML *XML;
- InputParameter interface_ip;
- PCIeParam pciep;
- powerDef power_t;
- uca_org_t local_result;
- PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_);
+ PCIeParameters pciep;
+ PCIeStatistics pcies;
+
+ PCIeController(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_pcie_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
+ void computeArea();
+ void computeEnergy();
~PCIeController(){};
};
-class FlashController : public Component {
+class FlashController : public McPATComponent {
public:
- ParseXML *XML;
- InputParameter interface_ip;
- MCParam fcp;
- powerDef power_t;
- uca_org_t local_result;
- FlashController(ParseXML *XML_interface,InputParameter* interface_ip_);
+ MCParameters fcp;
+ MCStatistics fcs;
+
+ FlashController(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_fc_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
+ void computeArea();
+ void computeEnergy();
~FlashController(){};
};
+#endif /* IOCONTROLLERS_H_ */
diff --git a/ext/mcpat/logic.cc b/ext/mcpat/logic.cc
index 11519d863..43823e77b 100644
--- a/ext/mcpat/logic.cc
+++ b/ext/mcpat/logic.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,416 +26,500 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
+#include "common.h"
#include "logic.h"
-
//selection_logic
-selection_logic::selection_logic(
- bool _is_default,
- int win_entries_,
- int issue_width_,
- const InputParameter *configure_interface,
- enum Device_ty device_ty_,
- enum Core_type core_ty_)
- //const ParseXML *_XML_interface)
- :is_default(_is_default),
- win_entries(win_entries_),
- issue_width(issue_width_),
- device_ty(device_ty_),
- core_ty(core_ty_)
- {
- //uca_org_t result2;
- l_ip=*configure_interface;
- local_result = init_interface(&l_ip);
- //init_tech_params(l_ip.F_sz_um, false);
- //win_entries=numIBEntries;//IQentries;
- //issue_width=issueWidth;
- selection_power();
- double sckRation = g_tp.sckt_co_eff;
- power.readOp.dynamic *= sckRation;
- power.writeOp.dynamic *= sckRation;
- power.searchOp.dynamic *= sckRation;
-
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
- }
-
-void selection_logic::selection_power()
-{//based on cost effective superscalar processor TR pp27-31
- double Ctotal, Cor, Cpencode;
- int num_arbiter;
- double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp;
-
- //TODO: the 0.8um process data is used.
- WSelORn = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process
- WSelORprequ = 50 * l_ip.F_sz_um;//this was 40 micron for the 0.8 micron process
- WSelPn = 12.5 * l_ip.F_sz_um;//this was 10mcron for the 0.8 micron process
- WSelPp = 18.75 * l_ip.F_sz_um;//this was 15 micron for the 0.8 micron process
- WSelEnn = 6.25 * l_ip.F_sz_um;//this was 5 micron for the 0.8 micron process
- WSelEnp = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process
-
-
- Ctotal=0;
- num_arbiter=1;
- while(win_entries > 4)
- {
- win_entries = (int)ceil((double)win_entries / 4.0);
- num_arbiter += win_entries;
- }
- //the 4-input OR logic to generate anyreq
- Cor = 4 * drain_C_(WSelORn,NCH,1,1, g_tp.cell_h_def) + drain_C_(WSelORprequ,PCH,1,1, g_tp.cell_h_def);
- power.readOp.gate_leakage = cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor)*g_tp.peri_global.Vdd;
-
- //The total capacity of the 4-bit priority encoder
- Cpencode = drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,1, 1, g_tp.cell_h_def) +
- 2*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,2, 1, g_tp.cell_h_def) +
- 3*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,3, 1, g_tp.cell_h_def) +
- 4*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,4, 1, g_tp.cell_h_def) +//precompute priority logic
- 2*4*gate_C(WSelEnn+WSelEnp,20.0)+
- 4*drain_C_(WSelEnn,NCH,1, 1, g_tp.cell_h_def) + 2*4*drain_C_(WSelEnp,PCH,1, 1, g_tp.cell_h_def)+//enable logic
- (2*4+2*3+2*2+2)*gate_C(WSelPn+WSelPp,10.0);//requests signal
-
- Ctotal += issue_width * num_arbiter*(Cor+Cpencode);
-
- power.readOp.dynamic = Ctotal*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*2;//2 means the abitration signal need to travel round trip
- power.readOp.leakage = issue_width * num_arbiter *
- (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
- + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p
- + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p
- + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
- + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals
- )*g_tp.peri_global.Vdd;
- power.readOp.gate_leakage = issue_width * num_arbiter *
- (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
- + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p
- + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p
- + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
- + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals
- )*g_tp.peri_global.Vdd;
+selection_logic::selection_logic(XMLNode* _xml_data, bool _is_default,
+ int _win_entries, int issue_width_,
+ const InputParameter *configure_interface,
+ string _name, double _accesses,
+ double clockRate_, enum Device_ty device_ty_,
+ enum Core_type core_ty_)
+ : McPATComponent(_xml_data), is_default(_is_default),
+ win_entries(_win_entries),
+ issue_width(issue_width_),
+ accesses(_accesses),
+ device_ty(device_ty_),
+ core_ty(core_ty_) {
+ clockRate = clockRate_;
+ name = _name;
+ l_ip = *configure_interface;
+ local_result = init_interface(&l_ip, name);
+}
+
+void selection_logic::computeArea() {
+ output_data.area = local_result.area;
}
+void selection_logic::computeEnergy() {
+ //based on cost effective superscalar processor TR pp27-31
+ double Ctotal, Cor, Cpencode;
+ int num_arbiter;
+ double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp;
+
+ //the 0.8um process data is used.
+ //this was 10 micron for the 0.8 micron process
+ WSelORn = 12.5 * l_ip.F_sz_um;
+ //this was 40 micron for the 0.8 micron process
+ WSelORprequ = 50 * l_ip.F_sz_um;
+ //this was 10mcron for the 0.8 micron process
+ WSelPn = 12.5 * l_ip.F_sz_um;
+ //this was 15 micron for the 0.8 micron process
+ WSelPp = 18.75 * l_ip.F_sz_um;
+ //this was 5 micron for the 0.8 micron process
+ WSelEnn = 6.25 * l_ip.F_sz_um;
+ //this was 10 micron for the 0.8 micron process
+ WSelEnp = 12.5 * l_ip.F_sz_um;
+
+ Ctotal = 0;
+ num_arbiter = 1;
+ while (win_entries > 4) {
+ win_entries = (int)ceil((double)win_entries / 4.0);
+ num_arbiter += win_entries;
+ }
+ //the 4-input OR logic to generate anyreq
+ Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def);
+ power.readOp.gate_leakage =
+ cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd;
+
+ //The total capacity of the 4-bit priority encoder
+ Cpencode = drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) +
+ 2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) +
+ 3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) +
+ 4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(WSelPp, PCH, 4, 1, g_tp.cell_h_def) +//precompute priority logic
+ 2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) +
+ 4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) +
+ 2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) +//enable logic
+ (2 * 4 + 2 * 3 + 2 * 2 + 2) *
+ gate_C(WSelPn + WSelPp, 10.0);//requests signal
+
+ Ctotal += issue_width * num_arbiter * (Cor + Cpencode);
+
+ //2 means the abitration signal need to travel round trip
+ power.readOp.dynamic =
+ Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 2;
+ power.readOp.leakage = issue_width * num_arbiter *
+ (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
+ + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p
+ + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p
+ + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
+ + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals
+ ) * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage = issue_width * num_arbiter *
+ (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
+ + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p
+ + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p
+ + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
+ + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals
+ ) * g_tp.peri_global.Vdd;
+ double sckRation = g_tp.sckt_co_eff;
+ power.readOp.dynamic *= sckRation;
+ power.writeOp.dynamic *= sckRation;
+ power.searchOp.dynamic *= sckRation;
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, core_ty);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+
+ output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power = power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+ output_data.runtime_dynamic_energy = power.readOp.dynamic * accesses;
+}
dep_resource_conflict_check::dep_resource_conflict_check(
- const InputParameter *configure_interface,
- const CoreDynParam & dyn_p_,
- int compare_bits_,
- bool _is_default)
- : l_ip(*configure_interface),
- coredynp(dyn_p_),
- compare_bits(compare_bits_),
- is_default(_is_default)
-{
- Wcompn = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process
- Wevalinvp = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process
- Wevalinvn = 100 * l_ip.F_sz_um;//this was 80.0 mcron for the 0.8 micron process
- Wcomppreequ = 50 * l_ip.F_sz_um;//this was 40.0 micron for the 0.8 micron process
- WNORn = 6.75 * l_ip.F_sz_um;//this was 5.4 micron for the 0.8 micron process
- WNORp = 38.125 * l_ip.F_sz_um;//this was 30.5 micron for the 0.8 micron process
-
- local_result = init_interface(&l_ip);
-
- if (coredynp.core_ty==Inorder)
- compare_bits += 16 + 8 + 8;//TODO: opcode bits + log(shared resources) + REG TAG BITS-->opcode comparator
- else
- compare_bits += 16 + 8 + 8;
-
- conflict_check_power();
- double sckRation = g_tp.sckt_co_eff;
- power.readOp.dynamic *= sckRation;
- power.writeOp.dynamic *= sckRation;
- power.searchOp.dynamic *= sckRation;
+ XMLNode* _xml_data, const string _name,
+ const InputParameter *configure_interface,
+ const CoreParameters & dyn_p_, int compare_bits_,
+ double clockRate_, bool _is_default)
+ : McPATComponent(_xml_data), l_ip(*configure_interface),
+ coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) {
+
+ name = _name;
+ clockRate = clockRate_;
+ //this was 20.0 micron for the 0.8 micron process
+ Wcompn = 25 * l_ip.F_sz_um;
+ //this was 20.0 micron for the 0.8 micron process
+ Wevalinvp = 25 * l_ip.F_sz_um;
+ //this was 80.0 mcron for the 0.8 micron process
+ Wevalinvn = 100 * l_ip.F_sz_um;
+ //this was 40.0 micron for the 0.8 micron process
+ Wcomppreequ = 50 * l_ip.F_sz_um;
+ //this was 5.4 micron for the 0.8 micron process
+ WNORn = 6.75 * l_ip.F_sz_um;
+ //this was 30.5 micron for the 0.8 micron process
+ WNORp = 38.125 * l_ip.F_sz_um;
+
+ // To make CACTI happy.
+ l_ip.cache_sz = MIN_BUFFER_SIZE;
+ local_result = init_interface(&l_ip, name);
+
+ if (coredynp.core_ty == Inorder)
+ //TODO: opcode bits + log(shared resources) + REG TAG BITS -->
+ //opcode comparator
+ compare_bits += 16 + 8 + 8;
+ else
+ compare_bits += 16 + 8 + 8;
+
+ conflict_check_power();
+ double sckRation = g_tp.sckt_co_eff;
+ power.readOp.dynamic *= sckRation;
+ power.writeOp.dynamic *= sckRation;
+ power.searchOp.dynamic *= sckRation;
}
-void dep_resource_conflict_check::conflict_check_power()
-{
- double Ctotal;
- int num_comparators;
- num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision.
- //When decode-width ==1, no dcl logic
+void dep_resource_conflict_check::conflict_check_power() {
+ double Ctotal;
+ int num_comparators;
+ //2(N*N-N) is used for source to dest comparison, (N*N-N) is used for
+ //dest to dest comparision.
+ num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) -
+ coredynp.decodeW);
- Ctotal = num_comparators * compare_cap();
- //printf("%i,%s\n",XML_interface->sys.core[0].predictor.predictor_entries,XML_interface->sys.core[0].predictor.prediction_scheme);
+ Ctotal = num_comparators * compare_cap();
- power.readOp.dynamic=Ctotal*/*CLOCKRATE*/g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/*AF*/;
- power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false);
+ power.readOp.dynamic = Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd /*AF*/;
+ power.readOp.leakage = num_comparators * compare_bits * 2 *
+ simplified_nmos_leakage(Wcompn, false);
- double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
- power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos);
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Core_device, coredynp.core_ty);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+ power.readOp.gate_leakage = num_comparators * compare_bits * 2 *
+ cmos_Ig_leakage(Wcompn, 0, 2, nmos);
}
/* estimate comparator power consumption (this comparator is similar
to the tag-match structure in a CAM */
-double dep_resource_conflict_check::compare_cap()
-{
- double c1, c2;
-
- WNORp = WNORp * compare_bits/2.0;//resize the big NOR gate at the DCL according to fan in.
- /* bottom part of comparator */
- c2 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def))+
- drain_C_(Wevalinvp,PCH,1,1, g_tp.cell_h_def) + drain_C_(Wevalinvn,NCH,1,1, g_tp.cell_h_def);
-
- /* top part of comparator */
- c1 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def)+
- drain_C_(Wcomppreequ,NCH,1,1, g_tp.cell_h_def)) + gate_C(WNORn + WNORp,10.0) +
- drain_C_(WNORp,NCH,2,1, g_tp.cell_h_def) + compare_bits*drain_C_(WNORn,NCH,2,1, g_tp.cell_h_def);
- return(c1 + c2);
+double dep_resource_conflict_check::compare_cap() {
+ double c1, c2;
+
+ //resize the big NOR gate at the DCL according to fan in.
+ WNORp = WNORp * compare_bits / 2.0;
+ /* bottom part of comparator */
+ c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) +
+ drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def);
+
+ /* top part of comparator */
+ c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) +
+ drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) +
+ drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) +
+ gate_C(WNORn + WNORp, 10.0) +
+ drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + compare_bits *
+ drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def);
+ return(c1 + c2);
}
void dep_resource_conflict_check::leakage_feedback(double temperature)
{
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
- uca_org_t init_result = init_interface(&l_ip); // init_result is dummy
+ uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
// This is part of conflict_check_power()
- int num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision.
- power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false);
-
- double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
- power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos);
+ // 2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest
+ // to dest comparison.
+ int num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) -
+ coredynp.decodeW);
+ power.readOp.leakage = num_comparators * compare_bits * 2 *
+ simplified_nmos_leakage(Wcompn, false);
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Core_device, coredynp.core_ty);
+ power.readOp.longer_channel_leakage = power.readOp.leakage *
+ long_channel_device_reduction;
+ power.readOp.gate_leakage = num_comparators * compare_bits * 2 *
+ cmos_Ig_leakage(Wcompn, 0, 2, nmos);
}
-//TODO: add inverter and transmission gate base DFF.
DFFCell::DFFCell(
- bool _is_dram,
- double _WdecNANDn,
- double _WdecNANDp,
- double _cell_load,
- const InputParameter *configure_interface)
-:is_dram(_is_dram),
-cell_load(_cell_load),
-WdecNANDn(_WdecNANDn),
-WdecNANDp(_WdecNANDp)
-{//this model is based on the NAND2 based DFF.
- l_ip=*configure_interface;
-// area.set_area(730*l_ip.F_sz_um*l_ip.F_sz_um);
- area.set_area(5*compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, g_tp.cell_h_def)
- + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, g_tp.cell_h_def));
+ bool _is_dram,
+ double _WdecNANDn,
+ double _WdecNANDp,
+ double _cell_load,
+ const InputParameter *configure_interface)
+ : is_dram(_is_dram),
+ cell_load(_cell_load),
+ WdecNANDn(_WdecNANDn),
+ WdecNANDp(_WdecNANDp) { //this model is based on the NAND2 based DFF.
+ l_ip = *configure_interface;
+ area.set_area(5 * compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp,
+ g_tp.cell_h_def)
+ + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn,
+ g_tp.cell_h_def));
}
-double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out)
-{
- double Ctotal = 0;
- //printf("WdecNANDn = %E\n", WdecNANDn);
+double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) {
+ double Ctotal = 0;
- /* part 1: drain cap of NAND gate */
- Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram);
+ /* part 1: drain cap of NAND gate */
+ Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram);
- /* part 2: gate cap of NAND gates */
- Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
+ /* part 2: gate cap of NAND gates */
+ Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
- return Ctotal;
+ return Ctotal;
}
-void DFFCell::compute_DFF_cell()
-{
- double c1, c2, c3, c4, c5, c6;
- /* node 5 and node 6 are identical to node 1 in capacitance */
- c1 = c5 = c6 = fpfp_node_cap(2, 1);
- c2 = fpfp_node_cap(2, 3);
- c3 = fpfp_node_cap(3, 2);
- c4 = fpfp_node_cap(2, 2);
-
- //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2
- clock_cap= 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
- e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2*cell_load)*0.5*g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
-
- /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */
- e_keep_1.readOp.dynamic += c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
- e_keep_0.readOp.dynamic += c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
- e_clock.readOp.dynamic += clock_cap* g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
-
- /* static power */
- e_switch.readOp.leakage += (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF
- + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd;
- e_switch.readOp.gate_leakage += (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF
- + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd;
- //printf("leakage =%E\n",cmos_Ileak(1, is_dram) );
+void DFFCell::compute_DFF_cell() {
+ double c1, c2, c3, c4, c5, c6;
+ /* node 5 and node 6 are identical to node 1 in capacitance */
+ c1 = c5 = c6 = fpfp_node_cap(2, 1);
+ c2 = fpfp_node_cap(2, 3);
+ c3 = fpfp_node_cap(3, 2);
+ c4 = fpfp_node_cap(2, 2);
+
+ //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2
+ clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
+ e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) *
+ 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
+
+ /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */
+ e_keep_1.readOp.dynamic +=
+ c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
+ e_keep_0.readOp.dynamic +=
+ c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
+ e_clock.readOp.dynamic +=
+ clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
+
+ /* static power */
+ e_switch.readOp.leakage +=
+ (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) *
+ 5//5 NAND2 and 1 NAND3 in a DFF
+ + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) *
+ g_tp.peri_global.Vdd;
+ e_switch.readOp.gate_leakage +=
+ (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) *
+ 5//5 NAND2 and 1 NAND3 in a DFF
+ + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) *
+ g_tp.peri_global.Vdd;
}
-Pipeline::Pipeline(
- const InputParameter *configure_interface,
- const CoreDynParam & dyn_p_,
- enum Device_ty device_ty_,
- bool _is_core_pipeline,
- bool _is_default)
-: l_ip(*configure_interface),
- coredynp(dyn_p_),
- device_ty(device_ty_),
- is_core_pipeline(_is_core_pipeline),
- is_default(_is_default),
- num_piperegs(0.0)
-
- {
- local_result = init_interface(&l_ip);
- if (!coredynp.Embedded)
- process_ind = true;
- else
- process_ind = false;
- WNANDn = (process_ind)? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;//this was 20 micron for the 0.8 micron process
- WNANDp = (process_ind)? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_*pmos_to_nmos_sz_ratio();//this was 30 micron for the 0.8 micron process
- load_per_pipeline_stage = 2*gate_C(WNANDn + WNANDp, 0, false);
- compute();
+Pipeline::Pipeline(XMLNode* _xml_data,
+ const InputParameter *configure_interface,
+ const CoreParameters & dyn_p_,
+ enum Device_ty device_ty_,
+ bool _is_core_pipeline,
+ bool _is_default)
+ : McPATComponent(_xml_data), l_ip(*configure_interface),
+ coredynp(dyn_p_), device_ty(device_ty_),
+ is_core_pipeline(_is_core_pipeline), is_default(_is_default),
+ num_piperegs(0.0) {
+ name = "Pipeline?";
+
+ local_result = init_interface(&l_ip, name);
+ if (!coredynp.Embedded) {
+ process_ind = true;
+ } else {
+ process_ind = false;
+ }
+ //this was 20 micron for the 0.8 micron process
+ WNANDn = (process_ind) ? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;
+ //this was 30 micron for the 0.8 micron process
+ WNANDp = (process_ind) ? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_ *
+ pmos_to_nmos_sz_ratio();
+ load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false);
+ compute();
}
-void Pipeline::compute()
-{
- compute_stage_vector();
- DFFCell pipe_reg(false, WNANDn,WNANDp, load_per_pipeline_stage, &l_ip);
- pipe_reg.compute_DFF_cell();
-
- double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic;
- //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider
- //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power.
- double pipe_reg_power = num_piperegs * (pipe_reg.e_switch.readOp.dynamic+pipe_reg.e_keep_0.readOp.dynamic+pipe_reg.e_keep_1.readOp.dynamic)/3+clock_power_pipereg;
- double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage;
- double pipe_reg_gate_leakage = num_piperegs * pipe_reg.e_switch.readOp.gate_leakage;
- power.readOp.dynamic +=pipe_reg_power;
- power.readOp.leakage +=pipe_reg_leakage;
- power.readOp.gate_leakage +=pipe_reg_gate_leakage;
- area.set_area(num_piperegs * pipe_reg.area.get_area());
-
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty, coredynp.core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
-
-
- double sckRation = g_tp.sckt_co_eff;
- power.readOp.dynamic *= sckRation;
- power.writeOp.dynamic *= sckRation;
- power.searchOp.dynamic *= sckRation;
- double macro_layout_overhead = g_tp.macro_layout_overhead;
+void Pipeline::compute() {
+ compute_stage_vector();
+ DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip);
+ pipe_reg.compute_DFF_cell();
+
+ double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic;
+ //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider
+ //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power.
+ double pipe_reg_power = num_piperegs *
+ (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic +
+ pipe_reg.e_keep_1.readOp.dynamic) / 3 + clock_power_pipereg;
+ double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage;
+ double pipe_reg_gate_leakage = num_piperegs *
+ pipe_reg.e_switch.readOp.gate_leakage;
+ power.readOp.dynamic += pipe_reg_power;
+ power.readOp.leakage += pipe_reg_leakage;
+ power.readOp.gate_leakage += pipe_reg_gate_leakage;
+ area.set_area(num_piperegs * pipe_reg.area.get_area());
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, coredynp.core_ty);
+ power.readOp.longer_channel_leakage = power.readOp.leakage *
+ long_channel_device_reduction;
+
+
+ double sckRation = g_tp.sckt_co_eff;
+ power.readOp.dynamic *= sckRation;
+ power.writeOp.dynamic *= sckRation;
+ power.searchOp.dynamic *= sckRation;
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
if (!coredynp.Embedded)
- area.set_area(area.get_area()*macro_layout_overhead);
-}
-
-void Pipeline::compute_stage_vector()
-{
- double num_stages, tot_stage_vector, per_stage_vector;
- int opcode_length = coredynp.x86? coredynp.micro_opcode_length:coredynp.opcode_length;
- //Hthread = thread_clock_gated? 1:num_thread;
+ area.set_area(area.get_area() * macro_layout_overhead);
- if (!is_core_pipeline)
- {
- num_piperegs=l_ip.pipeline_stages*l_ip.per_stage_vector;//The number of pipeline stages are calculated based on the achievable throughput and required throughput
- }
- else
- {
- if (coredynp.core_ty==Inorder)
- {
- /* assume 6 pipe stages and try to estimate bits per pipe stage */
- /* pipe stage 0/IF */
- num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads;
- /* pipe stage IF/ID */
- num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;
- /* pipe stage IF/ThreadSEL */
- if (coredynp.multithreaded) num_piperegs += coredynp.num_hthreads*coredynp.perThreadState; //8 bit thread states
- /* pipe stage ID/EXE */
- num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width + pow(2.0,opcode_length)+ 2*coredynp.int_data_width)*coredynp.num_hthreads;
- /* pipe stage EXE/MEM */
- num_piperegs += coredynp.issueW*(3 * coredynp.arch_ireg_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/);
- /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/
- num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/);
-// /* pipe stage 5/6 */
-// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/);
-// /* pipe stage 6/7 */
-// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/);
-// /* pipe stage 7/8 */
-// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/**2*powers (2,reg_length)*/);
-// /* assume 50% extra in control signals (rule of thumb) */
- num_stages=6;
+ output_data.area = area.get_area() / 1e6;
+ output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power = power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+ output_data.runtime_dynamic_energy = power.readOp.dynamic * total_cycles;
+}
- }
- else
- {
- /* assume 12 stage pipe stages and try to estimate bits per pipe stage */
- /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */
-
- /* pipe stage 0/1F*/
- num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads ;//PC and Next PC
- /* pipe stage IF/ID */
- num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is used to feed branch predictor in ID
- /* pipe stage 1D/Renaming*/
- num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is for branch exe in later stage.
- /* pipe stage Renaming/wire_drive */
- num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width);
- /* pipe stage Renaming/IssueQ */
- num_piperegs += coredynp.issueW*(coredynp.instruction_length + coredynp.pc_width + 3*coredynp.phy_ireg_width)*coredynp.num_hthreads;//3*coredynp.phy_ireg_width means 2 sources and 1 dest
- /* pipe stage IssueQ/Dispatch */
- num_piperegs += coredynp.issueW*(coredynp.instruction_length + 3 * coredynp.phy_ireg_width);
- /* pipe stage Dispatch/EXE */
-
- num_piperegs += coredynp.issueW*(3 * coredynp.phy_ireg_width + coredynp.pc_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);
- /* 2^opcode_length means the total decoded signal for the opcode*/
- num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);
- /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/
- num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);
- /* pipe stage EXE/MEM, data need to be read/write, address*/
- num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.v_address_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);//memory Opcode still need to be passed
- /* pipe stage MEM/WB; result data, writeback regs */
- num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.phy_ireg_width /* powers (2,opcode_length) + (2,opcode_length)+2*powers (2,reg_length)*/);
- /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/
- num_piperegs += coredynp.commitW*(coredynp.int_data_width + coredynp.v_address_width + coredynp.phy_ireg_width/*+ powers (2,opcode_length)*2*powers (2,reg_length)*/)*coredynp.num_hthreads;
-// if (multithreaded)
-// {
-//
-// }
- num_stages=12;
+void Pipeline::compute_stage_vector() {
+ double num_stages, tot_stage_vector, per_stage_vector;
+ int opcode_length = coredynp.x86 ?
+ coredynp.micro_opcode_length : coredynp.opcode_width;
+
+ if (!is_core_pipeline) {
+ //The number of pipeline stages are calculated based on the achievable
+ //throughput and required throughput
+ num_piperegs = l_ip.pipeline_stages * l_ip.per_stage_vector;
+ } else {
+ if (coredynp.core_ty == Inorder) {
+ /* assume 6 pipe stages and try to estimate bits per pipe stage */
+ /* pipe stage 0/IF */
+ num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads;
+ /* pipe stage IF/ID */
+ num_piperegs += coredynp.fetchW *
+ (coredynp.instruction_length + coredynp.pc_width) *
+ coredynp.num_hthreads;
+ /* pipe stage IF/ThreadSEL */
+ if (coredynp.multithreaded) {
+ num_piperegs += coredynp.num_hthreads *
+ coredynp.perThreadState; //8 bit thread states
+ }
+ /* pipe stage ID/EXE */
+ num_piperegs += coredynp.decodeW *
+ (coredynp.instruction_length + coredynp.pc_width +
+ pow(2.0, opcode_length) + 2 * coredynp.int_data_width) *
+ coredynp.num_hthreads;
+ /* pipe stage EXE/MEM */
+ num_piperegs += coredynp.issueW *
+ (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + 8 *
+ 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/);
+ /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/
+ num_piperegs += coredynp.issueW *
+ (2 * coredynp.int_data_width + pow(2.0, opcode_length) + 8 *
+ 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/);
+ num_stages = 6;
+ } else {
+ /* assume 12 stage pipe stages and try to estimate bits per pipe stage */
+ /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */
+
+ /* pipe stage 0/1F*/
+ num_piperegs +=
+ coredynp.pc_width * 2 * coredynp.num_hthreads ;//PC and Next PC
+ /* pipe stage IF/ID */
+ num_piperegs += coredynp.fetchW *
+ (coredynp.instruction_length + coredynp.pc_width) *
+ coredynp.num_hthreads;//PC is used to feed branch predictor in ID
+ /* pipe stage 1D/Renaming*/
+ num_piperegs += coredynp.decodeW *
+ (coredynp.instruction_length + coredynp.pc_width) *
+ coredynp.num_hthreads;//PC is for branch exe in later stage.
+ /* pipe stage Renaming/wire_drive */
+ num_piperegs += coredynp.decodeW *
+ (coredynp.instruction_length + coredynp.pc_width);
+ /* pipe stage Renaming/IssueQ */
+ //3*coredynp.phy_ireg_width means 2 sources and 1 dest
+ num_piperegs += coredynp.issueW *
+ (coredynp.instruction_length + coredynp.pc_width + 3 *
+ coredynp.phy_ireg_width) * coredynp.num_hthreads;
+ /* pipe stage IssueQ/Dispatch */
+ num_piperegs += coredynp.issueW *
+ (coredynp.instruction_length + 3 * coredynp.phy_ireg_width);
+ /* pipe stage Dispatch/EXE */
+
+ num_piperegs += coredynp.issueW *
+ (3 * coredynp.phy_ireg_width + coredynp.pc_width +
+ pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/);
+ /* 2^opcode_length means the total decoded signal for the opcode*/
+ num_piperegs += coredynp.issueW *
+ (2 * coredynp.int_data_width + pow(2.0, opcode_length)
+ /*+2*powers (2,reg_length)*/);
+ /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/
+ num_piperegs += coredynp.issueW *
+ (2 * coredynp.int_data_width + pow(2.0, opcode_length)
+ /*+2*powers (2,reg_length)*/);
+ /* pipe stage EXE/MEM, data need to be read/write, address*/
+ //memory Opcode still need to be passed
+ num_piperegs += coredynp.issueW *
+ (coredynp.int_data_width + coredynp.v_address_width +
+ pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/);
+ /* pipe stage MEM/WB; result data, writeback regs */
+ num_piperegs += coredynp.issueW *
+ (coredynp.int_data_width + coredynp.phy_ireg_width
+ /* powers (2,opcode_length) +
+ (2,opcode_length)+2*powers (2,reg_length)*/);
+ /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/
+ num_piperegs += coredynp.commitW *
+ (coredynp.int_data_width + coredynp.v_address_width +
+ coredynp.phy_ireg_width
+ /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) *
+ coredynp.num_hthreads;
+ num_stages = 12;
}
/* assume 50% extra in control registers and interrupt registers (rule of thumb) */
num_piperegs = num_piperegs * 1.5;
- tot_stage_vector=num_piperegs;
- per_stage_vector=tot_stage_vector/num_stages;
-
- if (coredynp.core_ty==Inorder)
- {
- if (coredynp.pipeline_stages>6)
- num_piperegs= per_stage_vector*coredynp.pipeline_stages;
+ tot_stage_vector = num_piperegs;
+ per_stage_vector = tot_stage_vector / num_stages;
+
+ if (coredynp.core_ty == Inorder) {
+ if (coredynp.pipeline_stages > 6)
+ num_piperegs = per_stage_vector * coredynp.pipeline_stages;
+ } else { //OOO
+ if (coredynp.pipeline_stages > 12)
+ num_piperegs = per_stage_vector * coredynp.pipeline_stages;
}
- else//OOO
- {
- if (coredynp.pipeline_stages>12)
- num_piperegs= per_stage_vector*coredynp.pipeline_stages;
- }
- }
+ }
}
-FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- fu_type(fu_type_)
-{
- double area_t;//, leakage, gate_leakage;
+FunctionalUnit::FunctionalUnit(XMLNode* _xml_data,
+ InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats,
+ enum FU_type fu_type_)
+ : McPATComponent(_xml_data),
+ interface_ip(*interface_ip_), core_params(_core_params),
+ core_stats(_core_stats), fu_type(fu_type_) {
+ double area_t;
+ double leakage;
+ double gate_leakage;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
-
- //XML_interface=_XML_interface;
- uca_org_t result2;
- result2 = init_interface(&interface_ip);
- if (XML->sys.Embedded)
- {
- if (fu_type == FPU)
- {
- num_fu=coredynp.num_fpus;
+ clockRate = core_params.clockRate;
+
+ uca_org_t result2;
+ // Temp name for the following function call
+ name = "Functional Unit";
+
+ result2 = init_interface(&interface_ip, name);
+
+ if (core_params.Embedded) {
+ if (fu_type == FPU) {
+ num_fu=core_params.num_fpus;
//area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number
//4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60%
@@ -449,10 +534,8 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam
per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ)
//FPU power from Sandia's processor sizing tech report
FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
- }
- else if (fu_type == ALU)
- {
- num_fu=coredynp.num_alus;
+ } else if (fu_type == ALU) {
+ num_fu=core_params.num_alus;
area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
@@ -462,10 +545,8 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam
per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
- }
- else if (fu_type == MUL)
- {
- num_fu=coredynp.num_muls;
+ } else if (fu_type == MUL) {
+ num_fu=core_params.num_muls;
area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
@@ -474,197 +555,117 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam
base_energy = 0;
per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
- }
- else
- {
+ } else {
cout<<"Unknown Functional Unit Type"<<endl;
exit(0);
}
per_access_energy *=0.5;//According to ARM data embedded processor has much lower per acc energy
+ } else {
+ if (fu_type == FPU) {
+ name = "Floating Point Unit(s)";
+ num_fu = core_params.num_fpus;
+ area_t = 8.47 * 1e6 * (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 /
+ 90.0);//this is um^2
+ if (g_ip->F_sz_nm > 90)
+ area_t = 8.47 * 1e6 *
+ g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
+ leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
+ gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
+ //W The base energy of ALU average numbers from Intel 4G and
+ //773Mhz (Wattch)
+ base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 3;
+ base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
+ 1.2);
+ per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ)
+ FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
+ } else if (fu_type == ALU) {
+ name = "Integer ALU(s)";
+ num_fu = core_params.num_alus;
+ //this is um^2 ALU + MUl
+ area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff;
+ leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
+ gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
+ //W The base energy of ALU average numbers from Intel 4G and 773Mhz
+ //(Wattch)
+ base_energy = core_params.core_ty == Inorder ? 0 : 89e-3;
+ base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
+ 1.2);
+ per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
+ FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
+ } else if (fu_type == MUL) {
+ name = "Multiply/Divide Unit(s)";
+ num_fu = core_params.num_muls;
+ //this is um^2 ALU + MUl
+ area_t = 280 * 260 * 2 * 3 *
+ g_tp.scaling_factor.logic_scaling_co_eff;
+ leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
+ gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
+ //W The base energy of ALU average numbers from Intel 4G and 773Mhz
+ //(Wattch)
+ base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 2;
+ base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
+ 1.2);
+ per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
+ FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
+ } else {
+ cout << "Unknown Functional Unit Type" << endl;
+ exit(0);
}
- else
- {
- if (fu_type == FPU)
- {
- num_fu=coredynp.num_fpus;
- //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
- area_t = 8.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2
- if (g_ip->F_sz_nm>90)
- area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
- leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
- gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
- //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles.
- base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
- base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
- per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ)
- FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
- }
- else if (fu_type == ALU)
- {
- num_fu=coredynp.num_alus;
- area_t = 280*260*2*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
- leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
- gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
- base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
- base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
- per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
- FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
+ }
- }
- else if (fu_type == MUL)
- {
- num_fu=coredynp.num_muls;
- area_t = 280*260*2*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
- leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
- gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
- base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
- base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
- per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
- FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
- }
- else
- {
- cout<<"Unknown Functional Unit Type"<<endl;
- exit(0);
- }
- }
- //IEXEU, simple ALU and FPU
- // double C_ALU, C_EXEU, C_FPU; //Lum Equivalent capacitance of IEXEU and FPU. Based on Intel and Sun 90nm process fabracation.
- //
- // C_ALU = 0.025e-9;//F
- // C_EXEU = 0.05e-9; //F
- // C_FPU = 0.35e-9;//F
area.set_area(area_t*num_fu);
- leakage *= num_fu;
- gate_leakage *=num_fu;
- double macro_layout_overhead = g_tp.macro_layout_overhead;
-// if (!XML->sys.Embedded)
- area.set_area(area.get_area()*macro_layout_overhead);
-}
-
-void FunctionalUnit::computeEnergy(bool is_tdp)
-{
- double pppm_t[4] = {1,1,1,1};
- double FU_duty_cycle;
- if (is_tdp)
- {
-
-
- set_pppm(pppm_t, 2, 2, 2, 2);//2 means two source operands needs to be passed for each int instruction.
- if (fu_type == FPU)
- {
- stats_t.readAc.access = num_fu;
- tdp_stats = stats_t;
- FU_duty_cycle = coredynp.FPU_duty_cycle;
- }
- else if (fu_type == ALU)
- {
- stats_t.readAc.access = 1*num_fu;
- tdp_stats = stats_t;
- FU_duty_cycle = coredynp.ALU_duty_cycle;
- }
- else if (fu_type == MUL)
- {
- stats_t.readAc.access = num_fu;
- tdp_stats = stats_t;
- FU_duty_cycle = coredynp.MUL_duty_cycle;
- }
-
- //power.readOp.dynamic = base_energy/clockRate + energy*stats_t.readAc.access;
- power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy/clockRate;
- double sckRation = g_tp.sckt_co_eff;
- power.readOp.dynamic *= sckRation*FU_duty_cycle;
- power.writeOp.dynamic *= sckRation;
- power.searchOp.dynamic *= sckRation;
-
- power.readOp.leakage = leakage;
- power.readOp.gate_leakage = gate_leakage;
- double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
-
- }
- else
- {
- if (fu_type == FPU)
- {
- stats_t.readAc.access = XML->sys.core[ithCore].fpu_accesses;
- rtp_stats = stats_t;
- }
- else if (fu_type == ALU)
- {
- stats_t.readAc.access = XML->sys.core[ithCore].ialu_accesses;
- rtp_stats = stats_t;
- }
- else if (fu_type == MUL)
- {
- stats_t.readAc.access = XML->sys.core[ithCore].mul_accesses;
- rtp_stats = stats_t;
- }
-
- //rt_power.readOp.dynamic = base_energy*executionTime + energy*stats_t.readAc.access;
- rt_power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy*executionTime;
- double sckRation = g_tp.sckt_co_eff;
- rt_power.readOp.dynamic *= sckRation;
- rt_power.writeOp.dynamic *= sckRation;
- rt_power.searchOp.dynamic *= sckRation;
-
- }
-
-
+ power.readOp.leakage = leakage * num_fu;
+ power.readOp.gate_leakage = gate_leakage * num_fu;
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Core_device, core_params.core_ty);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ area.set_area(area.get_area()*macro_layout_overhead);
}
-void FunctionalUnit::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
-// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
- if (is_tdp)
- {
- if (fu_type == FPU)
- {
- cout << indent_str << "Floating Point Units (FPUs) (Count: "<< coredynp.num_fpus <<" ):" << endl;
- cout << indent_str_next << "Area = " << area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage << " W" << endl;
- cout << indent_str_next<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else if (fu_type == ALU)
- {
- cout << indent_str << "Integer ALUs (Count: "<< coredynp.num_alus <<" ):" << endl;
- cout << indent_str_next << "Area = " << area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage << " W" << endl;
- cout << indent_str_next<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else if (fu_type == MUL)
- {
- cout << indent_str << "Complex ALUs (Mul/Div) (Count: "<< coredynp.num_muls <<" ):" << endl;
- cout << indent_str_next << "Area = " << area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage << " W" << endl;
- cout << indent_str_next<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
-
- }
+void FunctionalUnit::computeEnergy() {
+ double pppm_t[4] = {1, 1, 1, 1};
+ double FU_duty_cycle;
+ double sckRation = g_tp.sckt_co_eff;
+
+ // TDP power calculation
+ //2 means two source operands needs to be passed for each int instruction.
+ set_pppm(pppm_t, 2, 2, 2, 2);
+ tdp_stats.readAc.access = num_fu;
+ if (fu_type == FPU) {
+ FU_duty_cycle = core_stats.FPU_duty_cycle;
+ } else if (fu_type == ALU) {
+ FU_duty_cycle = core_stats.ALU_duty_cycle;
+ } else if (fu_type == MUL) {
+ FU_duty_cycle = core_stats.MUL_duty_cycle;
+ }
- }
- else
- {
- }
+ power.readOp.dynamic =
+ per_access_energy * tdp_stats.readAc.access + base_energy / clockRate;
+ power.readOp.dynamic *= sckRation * FU_duty_cycle;
+
+ // Runtime power calculation
+ if (fu_type == FPU) {
+ rtp_stats.readAc.access = core_stats.fpu_accesses;
+ } else if (fu_type == ALU) {
+ rtp_stats.readAc.access = core_stats.ialu_accesses;
+ } else if (fu_type == MUL) {
+ rtp_stats.readAc.access = core_stats.mul_accesses;
+ }
+ rt_power.readOp.dynamic = per_access_energy * rtp_stats.readAc.access +
+ base_energy * execution_time;
+ rt_power.readOp.dynamic *= sckRation;
+
+ output_data.area = area.get_area() / 1e6;
+ output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power =
+ (longer_channel_device) ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+ output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
}
void FunctionalUnit::leakage_feedback(double temperature)
@@ -672,7 +673,8 @@ void FunctionalUnit::leakage_feedback(double temperature)
// Update the temperature and initialize the global interfaces.
interface_ip.temp = (unsigned int)round(temperature/10.0)*10;
- uca_org_t init_result = init_interface(&interface_ip); // init_result is dummy
+ // init_result is dummy
+ uca_org_t init_result = init_interface(&interface_ip, name);
// This is part of FunctionalUnit()
double area_t, leakage, gate_leakage;
@@ -706,277 +708,220 @@ void FunctionalUnit::leakage_feedback(double temperature)
power.readOp.leakage = leakage*num_fu;
power.readOp.gate_leakage = gate_leakage*num_fu;
- power.readOp.longer_channel_leakage = longer_channel_device_reduction(Core_device, coredynp.core_ty);
+ power.readOp.longer_channel_leakage =
+ longer_channel_device_reduction(Core_device, core_params.core_ty);
}
-UndiffCore::UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_, bool embedded_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- core_ty(coredynp.core_ty),
- embedded(XML->sys.Embedded),
- pipeline_stage(coredynp.pipeline_stages),
- num_hthreads(coredynp.num_hthreads),
- issue_width(coredynp.issueW),
- exist(exist_)
-// is_default(_is_default)
-{
- if (!exist) return;
- double undifferentiated_core=0;
- double core_tx_density=0;
- double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+UndiffCore::UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & dyn_p_,
+ bool exist_)
+ : McPATComponent(_xml_data),
+ interface_ip(*interface_ip_), coredynp(dyn_p_),
+ core_ty(coredynp.core_ty), embedded(coredynp.Embedded),
+ pipeline_stage(coredynp.pipeline_stages),
+ num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW),
+ exist(exist_) {
+ if (!exist) return;
+
+ name = "Undifferentiated Core";
+ clockRate = coredynp.clockRate;
+
+ double undifferentiated_core = 0;
+ double core_tx_density = 0;
+ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
double undifferentiated_core_coe;
- //XML_interface=_XML_interface;
- uca_org_t result2;
- result2 = init_interface(&interface_ip);
-
- //Compute undifferentiated core area at 90nm.
- if (embedded==false)
- {
- //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements
- if (core_ty==OOO)
- {
- //undifferentiated_core = (0.0764*pipeline_stage*pipeline_stage -2.3685*pipeline_stage + 10.405);//OOO
- undifferentiated_core = (3.57*log(pipeline_stage)-1.2643)>0?(3.57*log(pipeline_stage)-1.2643):0;
- }
- else if (core_ty==Inorder)
- {
- //undifferentiated_core = (0.1238*pipeline_stage + 7.2572)*0.9;//inorder
- undifferentiated_core = (-2.19*log(pipeline_stage)+6.55)>0?(-2.19*log(pipeline_stage)+6.55):0;
- }
- else
- {
- cout<<"invalid core type"<<endl;
- exit(0);
- }
- undifferentiated_core *= (1+ logtwo(num_hthreads)* 0.0716);
+ uca_org_t result2;
+ result2 = init_interface(&interface_ip, name);
+
+ //Compute undifferentiated core area at 90nm.
+ if (embedded == false) {
+ //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements
+ if (core_ty == OOO) {
+ undifferentiated_core = (3.57 * log(pipeline_stage) - 1.2643) > 0 ?
+ (3.57 * log(pipeline_stage) - 1.2643) : 0;
+ } else if (core_ty == Inorder) {
+ undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 ?
+ (-2.19 * log(pipeline_stage) + 6.55) : 0;
+ } else {
+ cout << "invalid core type" << endl;
+ exit(0);
}
- else
- {
- //Based on the results in paper "parametrized processor models" Sandia Labs
- if (XML->sys.opt_clockrate)
+ undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716);
+ } else {
+ //Based on the results in paper "parametrized processor models" Sandia Labs
+ if (opt_for_clk)
undifferentiated_core_coe = 0.05;
else
undifferentiated_core_coe = 0;
- undifferentiated_core = (0.4109* pipeline_stage - 0.776)*undifferentiated_core_coe;
- undifferentiated_core *= (1+ logtwo(num_hthreads)* 0.0426);
- }
-
- undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff*1e6;//change from mm^2 to um^2
- core_tx_density = g_tp.scaling_factor.core_tx_density;
- //undifferentiated_core = 3*1e6;
- //undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*;
- power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
- power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;
-
- double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
- area.set_area(undifferentiated_core);
-
- scktRatio = g_tp.sckt_co_eff;
- power.readOp.dynamic *= scktRatio;
- power.writeOp.dynamic *= scktRatio;
- power.searchOp.dynamic *= scktRatio;
- macro_PR_overhead = g_tp.macro_layout_overhead;
- area.set_area(area.get_area()*macro_PR_overhead);
-
-
-
-// double vt=g_tp.peri_global.Vth;
-// double velocity_index=1.1;
-// double c_in=gate_C(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r , 0.0, false);
-// double c_out= drain_C_(g_tp.min_w_nmos_, NCH, 2, 1, g_tp.cell_h_def, false) + drain_C_(g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, PCH, 1, 1, g_tp.cell_h_def, false) + c_in;
-// double w_nmos=g_tp.min_w_nmos_;
-// double w_pmos=g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
-// double i_on_n=1.0;
-// double i_on_p=1.0;
-// double i_on_n_in=1.0;
-// double i_on_p_in=1;
-// double vdd=g_tp.peri_global.Vdd;
-
-// power.readOp.sc=shortcircuit_simple(vt, velocity_index, c_in, c_out, w_nmos,w_pmos, i_on_n, i_on_p,i_on_n_in, i_on_p_in, vdd);
-// power.readOp.dynamic=c_out*vdd*vdd/2;
-
-// cout<<power.readOp.dynamic << "dynamic" <<endl;
-// cout<<power.readOp.sc << "sc" << endl;
-
-// power.readOp.sc=shortcircuit(vt, velocity_index, c_in, c_out, w_nmos,w_pmos, i_on_n, i_on_p,i_on_n_in, i_on_p_in, vdd);
-// power.readOp.dynamic=c_out*vdd*vdd/2;
-//
-// cout<<power.readOp.dynamic << "dynamic" <<endl;
-// cout<<power.readOp.sc << "sc" << endl;
-
-
-
-}
-
-
-void UndiffCore::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- {
- cout << indent_str << "UndiffCore:" << endl;
- cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl;
- cout << indent_str_next<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- //cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str << "UndiffCore:" << endl;
- cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- //cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <<endl;
- }
+ undifferentiated_core = (0.4109 * pipeline_stage - 0.776) *
+ undifferentiated_core_coe;
+ undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0426);
+ }
+ undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff *
+ 1e6;//change from mm^2 to um^2
+ core_tx_density = g_tp.scaling_factor.core_tx_density;
+ power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
+ power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;
+
+ double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+ area.set_area(undifferentiated_core);
+
+ scktRatio = g_tp.sckt_co_eff;
+ power.readOp.dynamic *= scktRatio;
+ power.writeOp.dynamic *= scktRatio;
+ power.searchOp.dynamic *= scktRatio;
+ macro_PR_overhead = g_tp.macro_layout_overhead;
+ area.set_area(area.get_area()*macro_PR_overhead);
+
+ output_data.area = area.get_area() / 1e6;
+ output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power =
+ longer_channel_device ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
}
-inst_decoder::inst_decoder(
- bool _is_default,
- const InputParameter *configure_interface,
- int opcode_length_,
- int num_decoders_,
- bool x86_,
- enum Device_ty device_ty_,
- enum Core_type core_ty_)
-:is_default(_is_default),
- opcode_length(opcode_length_),
- num_decoders(num_decoders_),
- x86(x86_),
- device_ty(device_ty_),
- core_ty(core_ty_)
- {
- /*
- * Instruction decoder is different from n to 2^n decoders
- * that are commonly used in row decoders in memory arrays.
- * The RISC instruction decoder is typically a very simple device.
- * We can decode an instruction by simply
- * separating the machine word into small parts using wire slices
- * The RISC instruction decoder can be approximate by the n to 2^n decoders,
- * although this approximation usually underestimate power since each decoded
- * instruction normally has more than 1 active signal.
- *
- * However, decoding a CISC instruction word is much more difficult
- * than the RISC case. A CISC decoder is typically set up as a state machine.
- * The machine reads the opcode field to determine
- * what type of instruction it is,
- * and where the other data values are.
- * The instruction word is read in piece by piece,
- * and decisions are made at each stage as to
- * how the remainder of the instruction word will be read.
- * (sequencer and ROM are usually needed)
- * An x86 decoder can be even more complex since
- * it involve both decoding instructions into u-ops and
- * merge u-ops when doing micro-ops fusion.
- */
- bool is_dram=false;
- double pmos_to_nmos_sizing_r;
- double load_nmos_width, load_pmos_width;
- double C_driver_load, R_wire_load;
- Area cell;
-
- l_ip=*configure_interface;
- local_result = init_interface(&l_ip);
- cell.h =g_tp.cell_h_def;
- cell.w =g_tp.cell_h_def;
-
- num_decoder_segments = (int)ceil(opcode_length/18.0);
- if (opcode_length > 18) opcode_length = 18;
- num_decoded_signals= (int)pow(2.0,opcode_length);
- pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- load_nmos_width=g_tp.max_w_nmos_ /2;
- load_pmos_width= g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r;
- C_driver_load = 1024*gate_C(load_nmos_width + load_pmos_width, 0, is_dram); //TODO: this number 1024 needs to be revisited
- R_wire_load = 3000*l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um;
-
- final_dec = new Decoder(
- num_decoded_signals,
- false,
- C_driver_load,
- R_wire_load,
- false/*is_fa*/,
- false/*is_dram*/,
- false/*wl_tr*/, //to use peri device
- cell);
-
- PredecBlk * predec_blk1 = new PredecBlk(
- num_decoded_signals,
- final_dec,
- 0,//Assuming predec and dec are back to back
- 0,
- 1,//Each Predec only drives one final dec
- false/*is_dram*/,
- true);
- PredecBlk * predec_blk2 = new PredecBlk(
- num_decoded_signals,
- final_dec,
- 0,//Assuming predec and dec are back to back
- 0,
- 1,//Each Predec only drives one final dec
- false/*is_dram*/,
- false);
-
- PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false);
- PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false);
-
- pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2);
-
- double area_decoder = final_dec->area.get_area() * num_decoded_signals * num_decoder_segments*num_decoders;
- //double w_decoder = area_decoder / area.get_h();
- double area_pre_dec = (predec_blk_drv1->area.get_area() +
- predec_blk_drv2->area.get_area() +
- predec_blk1->area.get_area() +
- predec_blk2->area.get_area())*
- num_decoder_segments*num_decoders;
- area.set_area(area.get_area()+ area_decoder + area_pre_dec);
- double macro_layout_overhead = g_tp.macro_layout_overhead;
- double chip_PR_overhead = g_tp.chip_layout_overhead;
- area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead);
-
- inst_decoder_delay_power();
-
- double sckRation = g_tp.sckt_co_eff;
- power.readOp.dynamic *= sckRation;
- power.writeOp.dynamic *= sckRation;
- power.searchOp.dynamic *= sckRation;
-
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
- power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
-
+InstructionDecoder::InstructionDecoder(XMLNode* _xml_data, const string _name,
+ bool _is_default,
+ const InputParameter *configure_interface,
+ int opcode_length_, int num_decoders_,
+ bool x86_,
+ double clockRate_,
+ enum Device_ty device_ty_,
+ enum Core_type core_ty_)
+ : McPATComponent(_xml_data), is_default(_is_default),
+ opcode_length(opcode_length_), num_decoders(num_decoders_), x86(x86_),
+ device_ty(device_ty_), core_ty(core_ty_) {
+ /*
+ * Instruction decoder is different from n to 2^n decoders
+ * that are commonly used in row decoders in memory arrays.
+ * The RISC instruction decoder is typically a very simple device.
+ * We can decode an instruction by simply
+ * separating the machine word into small parts using wire slices
+ * The RISC instruction decoder can be approximate by the n to 2^n decoders,
+ * although this approximation usually underestimate power since each decoded
+ * instruction normally has more than 1 active signal.
+ *
+ * However, decoding a CISC instruction word is much more difficult
+ * than the RISC case. A CISC decoder is typically set up as a state machine.
+ * The machine reads the opcode field to determine
+ * what type of instruction it is,
+ * and where the other data values are.
+ * The instruction word is read in piece by piece,
+ * and decisions are made at each stage as to
+ * how the remainder of the instruction word will be read.
+ * (sequencer and ROM are usually needed)
+ * An x86 decoder can be even more complex since
+ * it involve both decoding instructions into u-ops and
+ * merge u-ops when doing micro-ops fusion.
+ */
+ name = _name;
+ clockRate = clockRate_;
+ bool is_dram = false;
+ double pmos_to_nmos_sizing_r;
+ double load_nmos_width, load_pmos_width;
+ double C_driver_load, R_wire_load;
+ Area cell;
+
+ l_ip = *configure_interface;
+ local_result = init_interface(&l_ip, name);
+ cell.h = g_tp.cell_h_def;
+ cell.w = g_tp.cell_h_def;
+
+ num_decoder_segments = (int)ceil(opcode_length / 18.0);
+ if (opcode_length > 18) opcode_length = 18;
+ num_decoded_signals = (int)pow(2.0, opcode_length);
+ pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ load_nmos_width = g_tp.max_w_nmos_ / 2;
+ load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r;
+ C_driver_load = 1024 * gate_C(load_nmos_width + load_pmos_width, 0, is_dram);
+ R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um;
+
+ final_dec = new Decoder(
+ num_decoded_signals,
+ false,
+ C_driver_load,
+ R_wire_load,
+ false/*is_fa*/,
+ false/*is_dram*/,
+ false/*wl_tr*/, //to use peri device
+ cell);
+
+ PredecBlk * predec_blk1 = new PredecBlk(
+ num_decoded_signals,
+ final_dec,
+ 0,//Assuming predec and dec are back to back
+ 0,
+ 1,//Each Predec only drives one final dec
+ false/*is_dram*/,
+ true);
+ PredecBlk * predec_blk2 = new PredecBlk(
+ num_decoded_signals,
+ final_dec,
+ 0,//Assuming predec and dec are back to back
+ 0,
+ 1,//Each Predec only drives one final dec
+ false/*is_dram*/,
+ false);
+
+ PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false);
+ PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false);
+
+ pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2);
+
+ double area_decoder = final_dec->area.get_area() * num_decoded_signals *
+ num_decoder_segments * num_decoders;
+ //double w_decoder = area_decoder / area.get_h();
+ double area_pre_dec = (predec_blk_drv1->area.get_area() +
+ predec_blk_drv2->area.get_area() +
+ predec_blk1->area.get_area() +
+ predec_blk2->area.get_area()) *
+ num_decoder_segments * num_decoders;
+ area.set_area(area.get_area() + area_decoder + area_pre_dec);
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ double chip_PR_overhead = g_tp.chip_layout_overhead;
+ area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead);
+
+ inst_decoder_delay_power();
+
+ double sckRation = g_tp.sckt_co_eff;
+ power.readOp.dynamic *= sckRation;
+ power.writeOp.dynamic *= sckRation;
+ power.searchOp.dynamic *= sckRation;
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, core_ty);
+ power.readOp.longer_channel_leakage = power.readOp.leakage *
+ long_channel_device_reduction;
+
+ output_data.area = area.get_area() / 1e6;
+ output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
+ output_data.subthreshold_leakage_power = power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
}
-void inst_decoder::inst_decoder_delay_power()
-{
+void InstructionDecoder::inst_decoder_delay_power() {
- double dec_outrisetime;
- double inrisetime=0, outrisetime;
- double pppm_t[4] = {1,1,1,1};
- double squencer_passes = x86?2:1;
+ double dec_outrisetime;
+ double inrisetime = 0, outrisetime;
+ double pppm_t[4] = {1, 1, 1, 1};
+ double squencer_passes = x86 ? 2 : 1;
- outrisetime = pre_dec->compute_delays(inrisetime);
- dec_outrisetime = final_dec->compute_delays(outrisetime);
- set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments);
- power = power + pre_dec->power*pppm_t;
+ outrisetime = pre_dec->compute_delays(inrisetime);
+ dec_outrisetime = final_dec->compute_delays(outrisetime);
+ set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments);
+ power = power + pre_dec->power * pppm_t;
set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,
- num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments);
- power = power + final_dec->power*pppm_t;
+ num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments);
+ power = power + final_dec->power * pppm_t;
}
-void inst_decoder::leakage_feedback(double temperature)
-{
+
+void InstructionDecoder::leakage_feedback(double temperature) {
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
- uca_org_t init_result = init_interface(&l_ip); // init_result is dummy
+ uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
final_dec->leakage_feedback(temperature);
pre_dec->leakage_feedback(temperature);
@@ -1000,15 +945,14 @@ void inst_decoder::leakage_feedback(double temperature)
power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
}
-inst_decoder::~inst_decoder()
-{
- local_result.cleanup();
+InstructionDecoder::~InstructionDecoder() {
+ local_result.cleanup();
- delete final_dec;
+ delete final_dec;
- delete pre_dec->blk1;
- delete pre_dec->blk2;
- delete pre_dec->drv1;
- delete pre_dec->drv2;
- delete pre_dec;
+ delete pre_dec->blk1;
+ delete pre_dec->blk2;
+ delete pre_dec->drv1;
+ delete pre_dec->drv2;
+ delete pre_dec;
}
diff --git a/ext/mcpat/logic.h b/ext/mcpat/logic.h
index e2a35e845..19c774ef9 100644
--- a/ext/mcpat/logic.h
+++ b/ext/mcpat/logic.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,18 +26,16 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef LOGIC_H_
#define LOGIC_H_
-#include <cassert>
#include <cmath>
#include <cstring>
#include <iostream>
-#include "XML_Parse.h"
#include "arch_const.h"
#include "basic_circuit.h"
#include "basic_components.h"
@@ -49,185 +48,190 @@
using namespace std;
-class selection_logic : public Component{
+class selection_logic : public McPATComponent {
public:
- selection_logic(bool _is_default, int win_entries_,
- int issue_width_, const InputParameter *configure_interface,
- enum Device_ty device_ty_=Core_device,
- enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface);
- bool is_default;
- InputParameter l_ip;
- uca_org_t local_result;
- const ParseXML *XML_interface;
- int win_entries;
- int issue_width;
- int num_threads;
- enum Device_ty device_ty;
- enum Core_type core_ty;
-
- void selection_power();
+ bool is_default;
+ InputParameter l_ip;
+ uca_org_t local_result;
+ int win_entries;
+ int issue_width;
+ double accesses;
+ int num_threads;
+ enum Device_ty device_ty;
+ enum Core_type core_ty;
+
+ selection_logic(XMLNode* _xml_data, bool _is_default, int _win_entries,
+ int issue_width_, const InputParameter* configure_interface,
+ string _name, double _accesses,
+ double clockRate_ = 0.0f,
+ enum Device_ty device_ty_ = Core_device,
+ enum Core_type core_ty_ = Inorder);
+ void computeArea();
+ void computeEnergy();
void leakage_feedback(double temperature); // TODO
+ // TODO: Add a deconstructor
};
-class dep_resource_conflict_check : public Component{
+class dep_resource_conflict_check : public McPATComponent {
public:
- dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true);
- InputParameter l_ip;
- uca_org_t local_result;
- double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
- CoreDynParam coredynp;
- int compare_bits;
- bool is_default;
- statsDef tdp_stats;
- statsDef rtp_stats;
- statsDef stats_t;
- powerDef power_t;
-
- void conflict_check_power();
- double compare_cap();
- ~dep_resource_conflict_check(){
- local_result.cleanup();
- }
+ InputParameter l_ip;
+ uca_org_t local_result;
+ double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
+ CoreParameters coredynp;
+ int compare_bits;
+ bool is_default;
+ statsDef stats_t;
+
+ dep_resource_conflict_check(XMLNode* _xml_data, const string _name,
+ const InputParameter *configure_interface,
+ const CoreParameters & dyn_p_, int compare_bits_,
+ double clockRate_ = 0.0f,
+ bool _is_default = true);
+ void conflict_check_power();
+ double compare_cap();
+ void computeEnergy() {};
+ ~dep_resource_conflict_check() {
+ local_result.cleanup();
+ }
void leakage_feedback(double temperature);
};
-class inst_decoder: public Component{
+class InstructionDecoder: public McPATComponent {
public:
- inst_decoder(bool _is_default, const InputParameter *configure_interface,
- int opcode_length_,
- int num_decoders_,
- bool x86_,
- enum Device_ty device_ty_=Core_device,
- enum Core_type core_ty_=Inorder);
- inst_decoder();
- bool is_default;
- int opcode_length;
- int num_decoders;
- bool x86;
- int num_decoder_segments;
- int num_decoded_signals;
- InputParameter l_ip;
- uca_org_t local_result;
- enum Device_ty device_ty;
- enum Core_type core_ty;
-
- Decoder * final_dec;
- Predec * pre_dec;
-
- statsDef tdp_stats;
- statsDef rtp_stats;
- statsDef stats_t;
- powerDef power_t;
- void inst_decoder_delay_power();
- ~inst_decoder();
+ Decoder* final_dec;
+ Predec* pre_dec;
+
+ bool is_default;
+ int opcode_length;
+ int num_decoders;
+ bool x86;
+ int num_decoder_segments;
+ int num_decoded_signals;
+ InputParameter l_ip;
+ uca_org_t local_result;
+ enum Device_ty device_ty;
+ enum Core_type core_ty;
+ statsDef stats_t;
+
+ InstructionDecoder(XMLNode* _xml_data, const string _name, bool _is_default,
+ const InputParameter *configure_interface,
+ int opcode_length_, int num_decoders_, bool x86_,
+ double clockRate_ = 0.0f,
+ enum Device_ty device_ty_ = Core_device,
+ enum Core_type core_ty_ = Inorder);
+ InstructionDecoder();
+ void computeEnergy() {};
+ void inst_decoder_delay_power();
+ ~InstructionDecoder();
void leakage_feedback(double temperature);
};
+// TODO: This should be defined elsewhere? This isn't a true McPATComponent
class DFFCell : public Component {
public:
- DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load,
- const InputParameter *configure_interface);
- InputParameter l_ip;
- bool is_dram;
- double cell_load;
- double WdecNANDn;
- double WdecNANDp;
- double clock_cap;
- int model;
- int n_switch;
- int n_keep_1;
- int n_keep_0;
- int n_clock;
- powerDef e_switch;
- powerDef e_keep_1;
- powerDef e_keep_0;
- powerDef e_clock;
-
- double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
- void compute_DFF_cell(void);
- };
-
-class Pipeline : public Component{
+ InputParameter l_ip;
+ bool is_dram;
+ double cell_load;
+ double WdecNANDn;
+ double WdecNANDp;
+ double clock_cap;
+ int model;
+ int n_switch;
+ int n_keep_1;
+ int n_keep_0;
+ int n_clock;
+ powerDef e_switch;
+ powerDef e_keep_1;
+ powerDef e_keep_0;
+ powerDef e_clock;
+
+ DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, double _cell_load,
+ const InputParameter *configure_interface);
+ double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
+ void compute_DFF_cell(void);
+ ~DFFCell() {};
+};
+
+// TODO: This is a very ambiguous component. Try to refactor it.
+class Pipeline : public McPATComponent {
public:
- Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true);
- InputParameter l_ip;
- uca_org_t local_result;
- CoreDynParam coredynp;
- enum Device_ty device_ty;
- bool is_core_pipeline, is_default;
- double num_piperegs;
-// int pipeline_stages;
-// int tot_stage_vector, per_stage_vector;
- bool process_ind;
- double WNANDn ;
- double WNANDp;
- double load_per_pipeline_stage;
-// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
-// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
-// bool thread_clock_gated;
-// bool in_order, multithreaded;
- void compute_stage_vector();
- void compute();
- ~Pipeline(){
- local_result.cleanup();
- };
+ InputParameter l_ip;
+ uca_org_t local_result;
+ CoreParameters coredynp;
+ enum Device_ty device_ty;
+ bool is_core_pipeline, is_default;
+ double num_piperegs;
+ bool process_ind;
+ double WNANDn;
+ double WNANDp;
+ double load_per_pipeline_stage;
+
+ Pipeline(XMLNode* _xml_data, const InputParameter *configure_interface,
+ const CoreParameters & dyn_p_,
+ enum Device_ty device_ty_ = Core_device,
+ bool _is_core_pipeline = true, bool _is_default = true);
+ void compute_stage_vector();
+ /**
+ * TODO: compute() completes work that should be completed in computeArea()
+ * and computeEnergy() recursively. Consider shifting these calculations
+ * around to be consistent with rest of hierarchy
+ */
+ void compute();
+ void computeArea() {};
+ // TODO: Move energy computation to this function to unify hierarchy
+ void computeEnergy() {};
+ ~Pipeline() {
+ local_result.cleanup();
+ };
};
-//class core_pipeline :public pipeline{
-//public:
-// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
-// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
-// bool thread_clock_gated;
-// bool in_order, multithreaded;
-// core_pipeline(bool _is_default, const InputParameter *configure_interface);
-// virtual void compute_stage_vector();
-//
-//};
-
-class FunctionalUnit :public Component{
+class FunctionalUnit : public McPATComponent {
public:
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double FU_height;
- double clockRate,executionTime;
- double num_fu;
- double energy, base_energy,per_access_energy, leakage, gate_leakage;
- bool is_default;
- enum FU_type fu_type;
- statsDef tdp_stats;
- statsDef rtp_stats;
- statsDef stats_t;
- powerDef power_t;
-
- FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
+ InputParameter interface_ip;
+ CoreParameters core_params;
+ CoreStatistics core_stats;
+ double FU_height;
+ double num_fu;
+ double energy;
+ double base_energy;
+ double per_access_energy;
+ bool is_default;
+ enum FU_type fu_type;
+ statsDef stats_t;
+
+ FunctionalUnit(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & _core_params,
+ const CoreStatistics & _core_stats, enum FU_type fu_type);
+ void computeEnergy();
void leakage_feedback(double temperature);
-
+ ~FunctionalUnit() {};
};
-class UndiffCore :public Component{
+// TODO: This is a very ambiguous component. Try to refactor it.
+class UndiffCore : public McPATComponent {
public:
- UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false);
- ParseXML *XML;
- int ithCore;
- InputParameter interface_ip;
- CoreDynParam coredynp;
- double clockRate,executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- enum Core_type core_ty;
- bool opt_performance, embedded;
- double pipeline_stage,num_hthreads,issue_width;
- bool is_default;
-
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~UndiffCore(){};
- bool exist;
-
-
+ InputParameter interface_ip;
+ CoreParameters coredynp;
+ double scktRatio;
+ double chip_PR_overhead;
+ double macro_PR_overhead;
+ enum Core_type core_ty;
+ bool opt_performance;
+ bool embedded;
+ double pipeline_stage;
+ double num_hthreads;
+ double issue_width;
+ bool is_default;
+ bool exist;
+
+ UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const CoreParameters & dyn_p_,
+ bool exist_ = true);
+ void computeArea() {};
+ // TODO: Move energy computation to this function to unify hierarchy
+ void computeEnergy() {};
+ ~UndiffCore() {};
};
#endif /* LOGIC_H_ */
diff --git a/ext/mcpat/main.cc b/ext/mcpat/main.cc
index 8acce8d23..ec266f386 100644
--- a/ext/mcpat/main.cc
+++ b/ext/mcpat/main.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,15 +26,17 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
+#include <sys/stat.h>
+
+#include <cassert>
#include <iostream>
-#include "XML_Parse.h"
-#include "globalvar.h"
+#include "basic_components.h"
#include "io.h"
-#include "processor.h"
+#include "system.h"
#include "version.h"
#include "xmlParser.h"
@@ -41,61 +44,68 @@ using namespace std;
void print_usage(char * argv0);
-int main(int argc,char *argv[])
-{
- char * fb ;
- bool infile_specified = false;
- int plevel = 2;
- opt_for_clk =true;
- //cout.precision(10);
- if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help"))
- {
- print_usage(argv[0]);
- }
+int main(int argc, char *argv[]) {
+ char* xml_file = NULL;
+ int plevel = 2;
- for (int32_t i = 0; i < argc; i++)
- {
- if (argv[i] == string("-infile"))
- {
- infile_specified = true;
- i++;
- fb = argv[ i];
- }
-
- if (argv[i] == string("-print_level"))
- {
- i++;
- plevel = atoi(argv[i]);
- }
-
- if (argv[i] == string("-opt_for_clk"))
- {
- i++;
- opt_for_clk = (bool)atoi(argv[i]);
- }
- }
- if (infile_specified == false)
- {
- print_usage(argv[0]);
+ for (int32_t i = 0; i < argc; i++) {
+ if (argv[i] == string("-infile")) {
+ xml_file = argv[++i];
+
+ } else if (argv[i] == string("-print_level")) {
+ plevel = atoi(argv[++i]);
+
+ } else if (argv[i] == string("-opt_for_clk")) {
+ McPATComponent::opt_for_clk = (bool)atoi(argv[++i]);
}
+ }
+
+ // Ensure that the XML file was specified
+ if (xml_file == NULL) {
+ cerr << "ERROR: Please specify infile\n\n";
+ print_usage(argv[0]);
+ }
+
+ // Ensure that the XML file exists
+ struct stat file_info;
+ if (stat(xml_file, &file_info)) {
+ cerr << "ERROR: File not found: " << xml_file << endl << endl;
+ print_usage(argv[0]);
+ }
+
+ cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR
+ << " of " << VER_UPDATE << ") is computing the target processor...\n "
+ << endl;
+
+ // Parse the XML input file
+ XMLNode xml_data = XMLNode::openFileHelper(xml_file, "component");
+ unsigned int num_children = xml_data.nChildNode("component");
+ assert(num_children == 1);
+ XMLNode system_xml = xml_data.getChildNode("component");
+ assert(strcmp(system_xml.getAttribute("type"), "System") == 0);
+
+ // Recursively instantiate the system hierarchy
+ System* system = new System(&system_xml);
+
+ // Recursively compute chip area
+ system->computeArea();
+
+ // Recursively compute the power consumed
+ system->computeEnergy();
+ // Recursively output the computed values
+ system->displayData(2, plevel);
- cout<<"McPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
- << " of " << VER_UPDATE << ") is computing the target processor...\n "<<endl;
+ // Clean up
+ delete system;
+ return 0;
- //parse XML-based interface
- ParseXML *p1= new ParseXML();
- p1->parse(fb);
- Processor proc(p1);
- proc.displayEnergy(2, plevel);
- delete p1;
- return 0;
}
-void print_usage(char * argv0)
-{
+void print_usage(char * argv0) {
cerr << "How to use McPAT:" << endl;
- cerr << " mcpat -infile <input file name> -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl;
- //cerr << " Note:default print level is at processor level, please increase it to see the details" << endl;
+ cerr << " mcpat -infile <input file name> -print_level < "
+ << "level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P "
+ << "only)/1 (optimzed for target clock rate)>" << endl;
exit(1);
}
diff --git a/ext/mcpat/mcpat.mk b/ext/mcpat/mcpat.mk
index f89f499a9..acb73211e 100644
--- a/ext/mcpat/mcpat.mk
+++ b/ext/mcpat/mcpat.mk
@@ -29,13 +29,16 @@ VPATH = cacti
SRCS = \
Ucache.cc \
- XML_Parse.cc \
arbiter.cc \
area.cc \
array.cc \
bank.cc \
basic_circuit.cc \
basic_components.cc \
+ bus_interconnect.cc \
+ cachearray.cc \
+ cachecontroller.cc \
+ cacheunit.cc \
cacti_interface.cc \
component.cc \
core.cc \
@@ -52,14 +55,13 @@ SRCS = \
noc.cc \
nuca.cc \
parameter.cc \
- processor.cc \
router.cc \
- sharedcache.cc \
subarray.cc \
+ system.cc \
technology.cc \
uca.cc \
wire.cc \
- xmlParser.cc
+ xmlParser.cc
OBJS = $(patsubst %.cc,$(ODIR)/obj_$(TAG)/%.o,$(SRCS))
diff --git a/ext/mcpat/mcpatXeonCore.mk b/ext/mcpat/mcpatXeonCore.mk
deleted file mode 100644
index 20cf0ddc8..000000000
--- a/ext/mcpat/mcpatXeonCore.mk
+++ /dev/null
@@ -1,81 +0,0 @@
-TARGET = mcpatXeonCore
-SHELL = /bin/sh
-.PHONY: all depend clean
-.SUFFIXES: .cc .o
-
-ifndef NTHREADS
- NTHREADS = 4
-endif
-
-
-LIBS =
-INCS = -lm
-
-ifeq ($(TAG),dbg)
- DBG = -Wall
- OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti
-else
- DBG =
- OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti
- #OPT = -O0 -DNTHREADS=$(NTHREADS)
-endif
-
-#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
-CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
-CXX = g++ -m32
-CC = gcc -m32
-
-VPATH = cacti
-
-SRCS = \
- Ucache.cc \
- XML_Parse.cc \
- arbiter.cc \
- area.cc \
- array.cc \
- bank.cc \
- basic_circuit.cc \
- basic_components.cc \
- cacti_interface.cc \
- component.cc \
- core.cc \
- crossbar.cc \
- decoder.cc \
- htree2.cc \
- interconnect.cc \
- io.cc \
- iocontrollers.cc \
- logic.cc \
- main.cc \
- mat.cc \
- memoryctrl.cc \
- noc.cc \
- nuca.cc \
- parameter.cc \
- processor.cc \
- router.cc \
- sharedcache.cc \
- subarray.cc \
- technology_xeon_core.cc \
- uca.cc \
- wire.cc \
- xmlParser.cc
-
-OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
-
-all: obj_$(TAG)/$(TARGET)
- cp -f obj_$(TAG)/$(TARGET) $(TARGET)
-
-obj_$(TAG)/$(TARGET) : $(OBJS)
- $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
-
-#obj_$(TAG)/%.o : %.cc
-# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
-
-obj_$(TAG)/%.o : %.cc
- $(CXX) $(CXXFLAGS) -c $< -o $@
-
-clean:
- -rm -f *.o $(TARGET)
-
-
diff --git a/ext/mcpat/memoryctrl.cc b/ext/mcpat/memoryctrl.cc
index ae3bc75ec..dec24512e 100644
--- a/ext/mcpat/memoryctrl.cc
+++ b/ext/mcpat/memoryctrl.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,18 +26,19 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
+
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <string>
-#include "XML_Parse.h"
#include "basic_circuit.h"
#include "basic_components.h"
+#include "common.h"
#include "const.h"
#include "io.h"
#include "logic.h"
@@ -69,668 +71,543 @@
*
*/
-MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
-:l_ip(*interface_ip_),
- mc_type(mc_type_),
- mcp(mcp_)
-{
-
- local_result = init_interface(&l_ip);
- compute();
-
+MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const MCParameters & mcp_, const MCStatistics & mcs_)
+ : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) {
+ name = "Transaction Engine";
+ local_result = init_interface(&l_ip, name);
+
+ // Set up stats for the power calculations
+ tdp_stats.reset();
+ tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate;
+ tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate;
+ rtp_stats.reset();
+ rtp_stats.readAc.access = mcs.reads;
+ rtp_stats.writeAc.access = mcs.writes;
}
+void MCBackend::computeArea() {
+ // The area is in nm^2
+ if (mcp.mc_type == MC) {
+ if (mcp.type == 0) {
+ output_data.area = (2.7927 * log(mcp.peak_transfer_rate * 2) -
+ 19.862) / 2.0 * mcp.dataBusWidth / 128.0 *
+ (l_ip.F_sz_um / 0.09) * mcp.num_channels;
+ } else {
+ output_data.area = 0.15 * mcp.dataBusWidth / 72.0 *
+ (l_ip.F_sz_um / 0.065) * (l_ip.F_sz_um / 0.065) *
+ mcp.num_channels;
+ }
+ } else {
+ //skip old model
+ cout << "Unknown memory controllers" << endl;
+ exit(0);
+ //area based on Cadence ChipEstimator for 8bit bus
+ output_data.area = 0.243 * mcp.dataBusWidth / 8;
+ }
+}
-void MCBackend::compute()
-{
- //double max_row_addr_width = 20.0;//Current address 12~18bits
- double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend,
- double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- double NMOS_sizing, PMOS_sizing;
-
- if (mc_type == MC)
- {
- if (mcp.type == 0)
- {
- //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09);
- area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2
- //assuming the approximately same scaling factor as seen in processors.
- //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip.
- //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
- //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
- mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
- C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
- power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller
- power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
- power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
+void MCBackend::computeEnergy() {
+ double C_MCB, mc_power;
+ double backend_dyn;
+ double backend_gates;
+ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ double NMOS_sizing = g_tp.min_w_nmos_;
+ double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+ double area_um2 = output_data.area * 1e6;
+
+ if (mcp.mc_type == MC) {
+ if (mcp.type == 0) {
+ //assuming the approximately same scaling factor as seen in processors.
+ //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
+ //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
+ mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
+ C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
+ //per access energy in memory controller
+ power.readOp.dynamic = C_MCB * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd *
+ (mcp.dataBusWidth/*+mcp.addressBusWidth*/);
+ power.readOp.leakage = area_um2 / 2 *
+ (g_tp.scaling_factor.core_tx_density) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
+ g_tp.peri_global.Vdd;//unit W
+ power.readOp.gate_leakage = area_um2 / 2 *
+ (g_tp.scaling_factor.core_tx_density) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
+ g_tp.peri_global.Vdd;//unit W
+ } else {
+ //Average on DDR2/3 protocol controller and DDRC 1600/800A in
+ //Cadence ChipEstimate
+ backend_dyn = 0.9e-9 / 800e6 * mcp.clockRate / 12800 *
+ mcp.peak_transfer_rate* mcp.dataBusWidth / 72.0 *
+ g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 *
+ (l_ip.F_sz_nm/65.0);
+ //Scaling to technology and DIMM feature. The base IP support
+ //DDR3-1600(PC3 12800)
+ //5000 is from Cadence ChipEstimator
+ backend_gates = 50000 * mcp.dataBusWidth / 64.0;
+
+ power.readOp.dynamic = backend_dyn;
+ power.readOp.leakage = (backend_gates) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+ power.readOp.gate_leakage = (backend_gates) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
}
- else
- { NMOS_sizing = g_tp.min_w_nmos_;
- PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
- area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2
- backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate
- //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800)
- backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator
-
- power_t.readOp.dynamic = backend_dyn;
- power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
+ } else {
+ //skip old model
+ cout<<"Unknown memory controllers"<<endl;exit(0);
+ //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
+ C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
+ power.readOp.leakage = area_um2 / 2 *
+ (g_tp.scaling_factor.core_tx_density) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
+ g_tp.peri_global.Vdd;//unit W
+ power.readOp.gate_leakage = area_um2 / 2 *
+ (g_tp.scaling_factor.core_tx_density) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
+ g_tp.peri_global.Vdd;//unit W
+ power.readOp.dynamic *= 1.2;
+ power.readOp.leakage *= 1.2;
+ power.readOp.gate_leakage *= 1.2;
+ //flash controller has about 20% more backend power since BCH ECC in
+ //flash is complex and power hungry
+ }
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Uncore_device);
+ power.readOp.longer_channel_leakage = power.readOp.leakage *
+ long_channel_device_reduction;
+
+ // Output leakage power calculations
+ output_data.subthreshold_leakage_power =
+ longer_channel_device ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+
+ // Peak dynamic power calculation
+ output_data.peak_dynamic_power = power.readOp.dynamic *
+ (tdp_stats.readAc.access + tdp_stats.writeAc.access);
+
+ // Runtime dynamic energy calculation
+ output_data.runtime_dynamic_energy =
+ power.readOp.dynamic *
+ (rtp_stats.readAc.access + rtp_stats.writeAc.access) *
+ mcp.llcBlockSize * BITS_PER_BYTE / mcp.dataBusWidth +
+ // Original McPAT code: Assume 10% of peak power is consumed by routine
+ // job including memory refreshing and scrubbing
+ power.readOp.dynamic * 0.1 * execution_time;
+}
- }
- }
- else
- {//skip old model
- cout<<"Unknown memory controllers"<<endl;exit(0);
- area.set_area(0.243*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus
- //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
- C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
- power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
- power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
- power_t.readOp.dynamic *= 1.2;
- power_t.readOp.leakage *= 1.2;
- power_t.readOp.gate_leakage *= 1.2;
- //flash controller has about 20% more backend power since BCH ECC in flash is complex and power hungry
- }
- double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
- power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
+MCPHY::MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const MCParameters & mcp_, const MCStatistics & mcs_)
+ : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) {
+ name = "Physical Interface (PHY)";
+ local_result = init_interface(&l_ip, name);
+
+ // Set up stats for the power calculations
+ // TODO: Figure out why TDP stats aren't used
+ tdp_stats.reset();
+ tdp_stats.readAc.access = 0.5 * mcp.num_channels;
+ tdp_stats.writeAc.access = 0.5 * mcp.num_channels;
+ rtp_stats.reset();
+ rtp_stats.readAc.access = mcs.reads;
+ rtp_stats.writeAc.access = mcs.writes;
}
-void MCBackend::computeEnergy(bool is_tdp)
-{
- //backend uses internal data buswidth
- if (is_tdp)
- {
- //init stats for Peak
- stats_t.readAc.access = 0.5*mcp.num_channels;
- stats_t.writeAc.access = 0.5*mcp.num_channels;
- tdp_stats = stats_t;
+void MCPHY::computeArea() {
+ if (mcp.mc_type == MC) {
+ if (mcp.type == 0) {
+ //Based on die photos from Niagara 1 and 2.
+ //TODO merge this into undifferentiated core.PHY only achieves
+ //square root of the ideal scaling.
+ output_data.area = (6.4323 * log(mcp.peak_transfer_rate * 2) -
+ 48.134) * mcp.dataBusWidth / 128.0 *
+ (l_ip.F_sz_um / 0.09) * mcp.num_channels / 2;//TODO:/2
+ } else {
+ //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm
+ //for upto DDR3 2133 (PC3 17066)
+ double non_IO_percentage = 0.2;
+ output_data.area = 1.3 * non_IO_percentage / 2133.0e6 *
+ mcp.clockRate / 17066 * mcp.peak_transfer_rate *
+ mcp.dataBusWidth / 16.0 * (l_ip.F_sz_um / 0.040)*
+ (l_ip.F_sz_um / 0.040) * mcp.num_channels;//um^2
}
- else
- {
- //init stats for runtime power (RTP)
- stats_t.readAc.access = mcp.reads;
- stats_t.writeAc.access = mcp.writes;
- tdp_stats = stats_t;
- }
- if (is_tdp)
- {
- power = power_t;
- power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*power_t.readOp.dynamic;
-
- }
- else
- {
- rt_power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*mcp.llcBlockSize*8.0/mcp.dataBusWidth*power_t.readOp.dynamic;
- rt_power = rt_power + power_t*pppm_lkg;
- rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
- //Assume 10% of peak power is consumed by routine job including memory refreshing and scrubbing
+ } else {
+ //area based on Cadence ChipEstimator for 8bit bus
+ output_data.area = 0.4e6 / 2 * mcp.dataBusWidth / 8 / 1e6;
}
}
-
-MCPHY::MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
-:l_ip(*interface_ip_),
- mc_type(mc_type_),
- mcp(mcp_)
-{
-
- local_result = init_interface(&l_ip);
- compute();
-}
-
-void MCPHY::compute()
-{
- //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
- double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio() ;
- /*
- * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006;
- * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s
- */
- double power_per_gb_per_s, phy_dyn,phy_gates, NMOS_sizing, PMOS_sizing;
-
- if (mc_type == MC)
- {
- if (mcp.type == 0)
- {
- power_per_gb_per_s = mcp.LVDS? 0.01:0.04;
- //Based on die photos from Niagara 1 and 2.
- //TODO merge this into undifferentiated core.PHY only achieves square root of the ideal scaling.
- //area = (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(l_ip.F_sz_um/0.09);
- area.set_area((6.4323*log(mcp.peakDataTransferRate*2)-48.134)*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6/2);//TODO:/2
- //This is from curve fitting based on Niagara 1 and 2's PHY die photo.
- //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
- //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
- power_t.readOp.dynamic = power_per_gb_per_s*sqrt(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
- power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
- power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
-
- }
- else
- {
- NMOS_sizing = g_tp.min_w_nmos_;
- PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
- //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto DDR3 2133 (PC3 17066)
- double non_IO_percentage = 0.2;
- area.set_area(1.3*non_IO_percentage/2133.0e6*mcp.clockRate/17066*mcp.peakDataTransferRate*mcp.dataBusWidth/16.0*(l_ip.F_sz_um/0.040)* (l_ip.F_sz_um/0.040)*mcp.num_channels*1e6);//um^2
- phy_gates = 200000*mcp.dataBusWidth/64.0;
- power_per_gb_per_s = 0.01;
- //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
- power_t.readOp.dynamic = power_per_gb_per_s*(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
- power_t.readOp.leakage = (mcp.withPHY? phy_gates:0)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- power_t.readOp.gate_leakage = (mcp.withPHY? phy_gates:0)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
- }
-
- }
- else
- {
- area.set_area(0.4e6/2*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus
- }
+void MCPHY::computeEnergy() {
+ //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
+ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
+ /*
+ * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006;
+ * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s
+ */
+ double power_per_gb_per_s, phy_dyn,phy_gates;
+ double NMOS_sizing = g_tp.min_w_nmos_;
+ double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
+ double area_um2 = output_data.area * 1e6;
+
+ if (mcp.mc_type == MC) {
+ if (mcp.type == 0) {
+ power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04;
+ //This is from curve fitting based on Niagara 1 and 2's PHY die photo.
+ //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
+ //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
+ power.readOp.dynamic = power_per_gb_per_s *
+ sqrt(l_ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 *
+ g_tp.peri_global.Vdd / 1.2;
+ power.readOp.leakage = area_um2 / 2 *
+ (g_tp.scaling_factor.core_tx_density) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
+ g_tp.peri_global.Vdd;//unit W
+ power.readOp.gate_leakage = area_um2 / 2 *
+ (g_tp.scaling_factor.core_tx_density) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
+ g_tp.peri_global.Vdd;//unit W
+ } else {
+ phy_gates = 200000 * mcp.dataBusWidth / 64.0;
+ power_per_gb_per_s = 0.01;
+ //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
+ power.readOp.dynamic = power_per_gb_per_s * (l_ip.F_sz_um / 0.09) *
+ g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
+ power.readOp.leakage = (mcp.withPHY ? phy_gates : 0) *
+ cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+ power.readOp.gate_leakage = (mcp.withPHY ? phy_gates : 0) *
+ cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
+ g_tp.peri_global.Vdd;//unit W
+ }
+ }
// double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface
// power_t.readOp.dynamic *= phy_factor;
// power_t.readOp.leakage *= phy_factor;
// power_t.readOp.gate_leakage *= phy_factor;
- double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
- power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(Uncore_device);
+ power.readOp.longer_channel_leakage =
+ power.readOp.leakage * long_channel_device_reduction;
+
+ // Leakage power calculations
+ output_data.subthreshold_leakage_power =
+ longer_channel_device ? power.readOp.longer_channel_leakage :
+ power.readOp.leakage;
+ output_data.gate_leakage_power = power.readOp.gate_leakage;
+
+ // Peak dynamic power calculation
+ double data_transfer_unit = (mcp.mc_type == MC)? 72:16;/*DIMM data width*/
+ output_data.peak_dynamic_power = power.readOp.dynamic *
+ (mcp.peak_transfer_rate * BITS_PER_BYTE / 1e3) * mcp.dataBusWidth /
+ data_transfer_unit * mcp.num_channels / mcp.clockRate;
+
+ // Runtime dynamic energy calculation
+ output_data.runtime_dynamic_energy =
+ power.readOp.dynamic *
+ (rtp_stats.readAc.access + rtp_stats.writeAc.access) *
+ mcp.llcBlockSize * BITS_PER_BYTE / 1e9 +
+ // Original McPAT code: Assume 10% of peak power is consumed by routine
+ // job including memory refreshing and scrubbing
+ power.readOp.dynamic * 0.1 * execution_time;
}
+MCFrontEnd::MCFrontEnd(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const MCParameters & mcp_, const MCStatistics & mcs_)
+ : McPATComponent(_xml_data), frontendBuffer(NULL), readBuffer(NULL),
+ writeBuffer(NULL), MC_arb(NULL), interface_ip(*interface_ip_),
+ mcp(mcp_), mcs(mcs_) {
+ int tag, data;
+ bool is_default = true;//indication for default setup
+
+ /* MC frontend engine channels share the same engines but logically partitioned
+ * For all hardware inside MC. different channels do not share resources.
+ * TODO: add docodeing/mux stage to steer memory requests to different channels.
+ */
+
+ name = "Front End";
+
+ // Memory Request Reorder Buffer
+ tag = mcp.addressbus_width + EXTRA_TAG_BITS + mcp.opcodeW;
+ data = int(ceil((physical_address_width + mcp.opcodeW) / BITS_PER_BYTE));
+
+ interface_ip.cache_sz = data * mcp.req_window_size_per_channel;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = mcp.reorder_buffer_assoc;
+ interface_ip.nbanks = mcp.reorder_buffer_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = tag > 0;
+ interface_ip.tag_w = tag;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = mcp.num_channels;
+ interface_ip.num_wr_ports = interface_ip.num_rd_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = mcp.num_channels;
+ interface_ip.is_cache = true;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = false;
+ interface_ip.throughput = 1.0 / mcp.clockRate;
+ interface_ip.latency = 1.0 / mcp.clockRate;
+ frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer",
+ Uncore_device, mcp.clockRate);
+ children.push_back(frontendBuffer);
+
+ frontendBuffer->tdp_stats.reset();
+ frontendBuffer->tdp_stats.readAc.access =
+ frontendBuffer->l_ip.num_search_ports +
+ frontendBuffer->l_ip.num_wr_ports;
+ frontendBuffer->tdp_stats.writeAc.access =
+ frontendBuffer->l_ip.num_search_ports;
+ frontendBuffer->tdp_stats.searchAc.access =
+ frontendBuffer->l_ip.num_wr_ports;
+ frontendBuffer->rtp_stats.reset();
+ // TODO: These stats assume that access power is calculated per buffer
+ // bit, which requires the stats to take into account the number of
+ // bits for each buffer slot. This should be revised...
+ //For each channel, each memory word need to check the address data to
+ //achieve best scheduling results.
+ //and this need to be done on all physical DIMMs in each logical memory
+ //DIMM *mcp.dataBusWidth/72
+ frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
+ BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72;
+ frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize *
+ BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72;
+ frontendBuffer->rtp_stats.searchAc.access =
+ frontendBuffer->rtp_stats.readAc.access +
+ frontendBuffer->rtp_stats.writeAc.access;
+
+ // Read Buffers
+ //Support key words first operation
+ data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE);
+
+ interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = mcp.read_buffer_assoc;
+ interface_ip.nbanks = mcp.read_buffer_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = mcp.read_buffer_tag_width > 0;
+ interface_ip.tag_w = mcp.read_buffer_tag_width;
+ interface_ip.access_mode = Sequential;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = mcp.num_channels;
+ interface_ip.num_wr_ports = interface_ip.num_rd_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / mcp.clockRate;
+ interface_ip.latency = 1.0 / mcp.clockRate;
+ readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer",
+ Uncore_device, mcp.clockRate);
+ children.push_back(readBuffer);
+
+ readBuffer->tdp_stats.reset();
+ readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports *
+ mcs.duty_cycle;
+ readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports *
+ mcs.duty_cycle;
+ readBuffer->rtp_stats.reset();
+ readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
+ BITS_PER_BYTE / mcp.dataBusWidth;
+ readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize *
+ BITS_PER_BYTE / mcp.dataBusWidth;
+
+ // Write Buffer
+ //Support key words first operation
+ data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE);
+
+ interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel;
+ interface_ip.line_sz = data;
+ interface_ip.assoc = mcp.write_buffer_assoc;
+ interface_ip.nbanks = mcp.write_buffer_nbanks;
+ interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
+ interface_ip.specific_tag = mcp.write_buffer_tag_width > 0;
+ interface_ip.tag_w = mcp.write_buffer_tag_width;
+ interface_ip.access_mode = Normal;
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 0;
+ interface_ip.num_rd_ports = mcp.num_channels;
+ interface_ip.num_wr_ports = interface_ip.num_rd_ports;
+ interface_ip.num_se_rd_ports = 0;
+ interface_ip.num_search_ports = 0;
+ interface_ip.is_cache = false;
+ interface_ip.pure_cam = false;
+ interface_ip.pure_ram = true;
+ interface_ip.throughput = 1.0 / mcp.clockRate;
+ interface_ip.latency = 1.0 / mcp.clockRate;
+ writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer",
+ Uncore_device, mcp.clockRate);
+ children.push_back(writeBuffer);
+
+ writeBuffer->tdp_stats.reset();
+ writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports *
+ mcs.duty_cycle;
+ writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports *
+ mcs.duty_cycle;
+ writeBuffer->rtp_stats.reset();
+ writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
+ BITS_PER_BYTE / mcp.dataBusWidth;
+ writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize *
+ BITS_PER_BYTE / mcp.dataBusWidth;
+
+ // TODO: Set up selection logic as a leaf node in tree
+ //selection and arbitration logic
+ MC_arb =
+ new selection_logic(xml_data, is_default,
+ mcp.req_window_size_per_channel, 1, &interface_ip,
+ "Arbitration Logic", (mcs.reads + mcs.writes),
+ mcp.clockRate, Uncore_device);
+ // MC_arb is not included in the roll-up due to the uninitialized area
+ //children.push_back(MC_arb);
+}
-void MCPHY::computeEnergy(bool is_tdp)
-{
- if (is_tdp)
- {
- //init stats for Peak
- stats_t.readAc.access = 0.5*mcp.num_channels; //time share on buses
- stats_t.writeAc.access = 0.5*mcp.num_channels;
- tdp_stats = stats_t;
- }
- else
- {
- //init stats for runtime power (RTP)
- stats_t.readAc.access = mcp.reads;
- stats_t.writeAc.access = mcp.writes;
- tdp_stats = stats_t;
- }
-
- if (is_tdp)
- {
- double data_transfer_unit = (mc_type == MC)? 72:16;/*DIMM data width*/
- power = power_t;
- power.readOp.dynamic = power.readOp.dynamic * (mcp.peakDataTransferRate*8*1e6/1e9/*change to Gbs*/)*mcp.dataBusWidth/data_transfer_unit*mcp.num_channels/mcp.clockRate;
- // divide by clock rate is for match the final computation where *clock is used
- //(stats_t.readAc.access*power_t.readOp.dynamic+
-// stats_t.writeAc.access*power_t.readOp.dynamic);
-
- }
- else
- {
- rt_power = power_t;
-// rt_power.readOp.dynamic = (stats_t.readAc.access*power_t.readOp.dynamic+
-// stats_t.writeAc.access*power_t.readOp.dynamic);
-
- rt_power.readOp.dynamic=power_t.readOp.dynamic*(stats_t.readAc.access + stats_t.writeAc.access)*(mcp.llcBlockSize)*8/1e9/mcp.executionTime*(mcp.executionTime);
- rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
+MemoryController::MemoryController(XMLNode* _xml_data,
+ InputParameter* interface_ip_)
+ : McPATComponent(_xml_data), interface_ip(*interface_ip_) {
+ name = "Memory Controller";
+ set_mc_param();
+ // TODO: Pass params and stats as pointers
+ children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs));
+ children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs));
+
+ if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) {
+ children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs));
}
}
-MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
-:XML(XML_interface),
- interface_ip(*interface_ip_),
- mc_type(mc_type_),
- mcp(mcp_),
- MC_arb(0),
- frontendBuffer(0),
- readBuffer(0),
- writeBuffer(0)
-{
- /* All computations are for a single MC
- *
- */
-
- int tag, data;
- bool is_default =true;//indication for default setup
-
- /* MC frontend engine channels share the same engines but logically partitioned
- * For all hardware inside MC. different channels do not share resources.
- * TODO: add docodeing/mux stage to steer memory requests to different channels.
- */
-
- //memory request reorder buffer
- tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW;
- data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0));
- interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel;
- interface_ip.line_sz = data;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/mcp.clockRate;
- interface_ip.latency = 1.0/mcp.clockRate;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc;
- interface_ip.num_wr_ports = interface_ip.num_rd_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc;
- frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device);
- frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
- area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
-
- //selection and arbitration logic
- MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device);
-
- //read buffers.
- data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte
- interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize;
- interface_ip.line_sz = data;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/mcp.clockRate;
- interface_ip.latency = 1.0/mcp.clockRate;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2;
- interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc;
- interface_ip.num_wr_ports = interface_ip.num_rd_ports;
- interface_ip.num_se_rd_ports = 0;
- readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device);
- readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
- area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
-
- //write buffer
- data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte
- interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize;
- interface_ip.line_sz = data;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/mcp.clockRate;
- interface_ip.latency = 1.0/mcp.clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc;
- interface_ip.num_wr_ports = interface_ip.num_rd_ports;
- interface_ip.num_se_rd_ports = 0;
- writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device);
- writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
- area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
+void MemoryController::initialize_params() {
+ memset(&mcp, 0, sizeof(MCParameters));
}
-void MCFrontEnd::computeEnergy(bool is_tdp)
-{
- if (is_tdp)
- {
- //init stats for Peak
- frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports;
- frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports;
- frontendBuffer->tdp_stats = frontendBuffer->stats_t;
-
- readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle;
- readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle;
- readBuffer->tdp_stats = readBuffer->stats_t;
-
- writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle;
- writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle;
- writeBuffer->tdp_stats = writeBuffer->stats_t;
-
- }
- else
- {
- //init stats for runtime power (RTP)
- frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72;
- //For each channel, each memory word need to check the address data to achieve best scheduling results.
- //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72
- frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72;
- frontendBuffer->rtp_stats = frontendBuffer->stats_t;
-
- readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first
- readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first
- readBuffer->rtp_stats = readBuffer->stats_t;
-
- writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth;
- writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth;
- writeBuffer->rtp_stats = writeBuffer->stats_t;
- }
-
- frontendBuffer->power_t.reset();
- readBuffer->power_t.reset();
- writeBuffer->power_t.reset();
-
-// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access*
-// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+
-// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic);
-
- frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access +
- frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic
- + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic
- + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic;
-
- readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access*
- readBuffer->local_result.power.readOp.dynamic+
- readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic);
- writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access*
- writeBuffer->local_result.power.readOp.dynamic+
- writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic);
-
- if (is_tdp)
- {
- power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t +
- (frontendBuffer->local_result.power +
- readBuffer->local_result.power +
- writeBuffer->local_result.power)*pppm_lkg;
-
+void MemoryController::set_mc_param() {
+ initialize_params();
+
+ int num_children = xml_data->nChildNode("param");
+ int tech_type;
+ int mat_type;
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("mc_clock", mcp.clockRate);
+ ASSIGN_INT_IF("tech_type", tech_type);
+ ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type);
+ ASSIGN_FP_IF("num_mcs", mcp.num_mcs);
+ ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length);
+ ASSIGN_INT_IF("databus_width", mcp.databus_width);
+ ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels);
+ ASSIGN_INT_IF("req_window_size_per_channel",
+ mcp.req_window_size_per_channel);
+ ASSIGN_INT_IF("IO_buffer_size_per_channel",
+ mcp.IO_buffer_size_per_channel);
+ ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width);
+ ASSIGN_INT_IF("opcode_width", mcp.opcodeW);
+ ASSIGN_INT_IF("type", mcp.type);
+ ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool);
+ ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool);
+ ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate);
+ ASSIGN_INT_IF("number_ranks", mcp.number_ranks);
+ ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc);
+ ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks);
+ ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc);
+ ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks);
+ ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width);
+ ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc);
+ ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks);
+ ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width);
+ ASSIGN_INT_IF("wire_mat_type", mat_type);
+ ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
}
- else
- {
- rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t +
- (frontendBuffer->local_result.power +
- readBuffer->local_result.power +
- writeBuffer->local_result.power)*pppm_lkg;
- rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
+
+ if (mcp.mc_type != MC) {
+ cout << "Unknown memory controller type: Only DRAM controller is "
+ << "supported for now" << endl;
+ exit(0);
}
-}
-void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
-
- if (is_tdp)
- {
- cout << indent_str << "Front End ROB:" << endl;
- cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
-
- cout <<endl;
- cout << indent_str<< "Read Buffer:" << endl;
- cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << readBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << readBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str << "Write Buffer:" << endl;
- cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << writeBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << writeBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- cout << indent_str << "Front End ROB:" << endl;
- cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage << " W" << endl;
- cout <<endl;
- cout << indent_str<< "Read Buffer:" << endl;
- cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << readBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage << " W" << endl;
- cout <<endl;
- cout << indent_str << "Write Buffer:" << endl;
- cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << writeBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage << " W" << endl;
- }
+ // Change from MHz to Hz
+ mcp.clockRate *= 1e6;
-}
+ interface_ip.data_arr_ram_cell_tech_type = tech_type;
+ interface_ip.data_arr_peri_global_tech_type = tech_type;
+ interface_ip.tag_arr_ram_cell_tech_type = tech_type;
+ interface_ip.tag_arr_peri_global_tech_type = tech_type;
+ interface_ip.wire_is_mat_type = mat_type;
+ interface_ip.wire_os_mat_type = mat_type;
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
-MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_)
-:XML(XML_interface),
- interface_ip(*interface_ip_),
- mc_type(mc_type_),
- frontend(0),
- transecEngine(0),
- PHY(0),
- pipeLogic(0)
-{
- /* All computations are for a single MC
- *
- */
- interface_ip.wire_is_mat_type = 2;
- interface_ip.wire_os_mat_type = 2;
- interface_ip.wt =Global;
- set_mc_param();
- frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type);
- area.set_area(area.get_area()+ frontend->area.get_area());
- transecEngine = new MCBackend(&interface_ip, mcp, mc_type);
- area.set_area(area.get_area()+ transecEngine->area.get_area());
- if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
- {
- PHY = new MCPHY(&interface_ip, mcp, mc_type);
- area.set_area(area.get_area()+ PHY->area.get_area());
- }
- //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
-// transecEngine.initialize(&interface_ip);
-// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate;
-// transecEngine.memDataWidth = dataBusWidth;
-// transecEngine.memRank = XML->sys.mem.number_ranks;
-// //transecEngine.memAccesses=XML->sys.mc.memory_accesses;
-// //transecEngine.llcBlocksize=llcBlockSize;
-// transecEngine.compute();
-// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ;
-// area.set_area(area.get_area()+ transecEngine.area.get_area());
-// ///cout<<"area="<<area<<endl;
-////
-// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
-// PHY.initialize(&interface_ip);
-// PHY.peakDataTransferRate = XML->sys.mem.peak_transfer_rate;
-// PHY.memDataWidth = dataBusWidth;
-// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
-// //PHY.llcBlocksize=llcBlockSize;
-// PHY.compute();
-// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ;
-// area.set_area(area.get_area()+ PHY.area.get_area());
- ///cout<<"area="<<area<<endl;
-//
-// interface_ip.pipeline_stages = 5;//normal memory controller has five stages in the pipeline.
-// interface_ip.per_stage_vector = addressBusWidth + XML->sys.core[0].opcode_width + dataBusWidth;
-// pipeLogic = new pipeline(is_default, &interface_ip);
-// //pipeLogic.init_pipeline(is_default, &interface_ip);
-// pipeLogic->compute_pipeline();
-// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6);
-// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead
-//
-//
-//// //clock
-//// clockNetwork.init_wire_external(is_default, &interface_ip);
-//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
-//// clockNetwork.end_wiring_level =5;//toplevel metal
-//// clockNetwork.start_wiring_level =5;//toplevel metal
-//// clockNetwork.num_regs = pipeLogic.tot_stage_vector;
-//// clockNetwork.optimize_wire();
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+ ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle);
+ ASSIGN_FP_IF("perc_load", mcs.perc_load);
+ ASSIGN_FP_IF("memory_reads", mcs.reads);
+ ASSIGN_INT_IF("memory_writes", mcs.writes);
-}
-void MemoryController::computeEnergy(bool is_tdp)
-{
-
- frontend->computeEnergy(is_tdp);
- transecEngine->computeEnergy(is_tdp);
- if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
- {
- PHY->computeEnergy(is_tdp);
- }
- if (is_tdp)
- {
- power = power + frontend->power + transecEngine->power;
- if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
- {
- power = power + PHY->power;
- }
- }
- else
- {
- rt_power = rt_power + frontend->rt_power + transecEngine->rt_power;
- if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
- {
- rt_power = rt_power + PHY->rt_power;
- }
- }
-}
-
-void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- {
- cout << "Memory Controller:" << endl;
- cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str<< "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
- cout<<endl;
- cout << indent_str << "Front End Engine:" << endl;
- cout << indent_str_next << "Area = " << frontend->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << frontend->power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? frontend->power.readOp.longer_channel_leakage:frontend->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << frontend->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
- cout <<endl;
- if (plevel >2){
- frontend->displayEnergy(indent+4,is_tdp);
- }
- cout << indent_str << "Transaction Engine:" << endl;
- cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
- cout <<endl;
- if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
- {
- cout << indent_str << "PHY:" << endl;
- cout << indent_str_next << "Area = " << PHY->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << PHY->power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? PHY->power.readOp.longer_channel_leakage:PHY->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << PHY->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
- cout <<endl;
- }
- }
- else
- {
- cout << "Memory Controller:" << endl;
- cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout<<endl;
+ else {
+ warnUnrecognizedStat(node_name);
}
+ }
+ // Add ECC overhead
+ mcp.llcBlockSize = int(ceil(mcp.llc_line_length / BITS_PER_BYTE)) +
+ mcp.llc_line_length;
+ mcp.dataBusWidth = int(ceil(mcp.databus_width / BITS_PER_BYTE)) +
+ mcp.databus_width;
}
-void MemoryController::set_mc_param()
-{
-
- if (mc_type==MC)
- {
- mcp.clockRate =XML->sys.mc.mc_clock*2;//DDR double pumped
- mcp.clockRate *= 1e6;
- mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
-
- mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead
- mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width;
- mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width;
- mcp.opcodeW =16;
- mcp.num_mcs = XML->sys.mc.number_mcs;
- mcp.num_channels = XML->sys.mc.memory_channels_per_mc;
- mcp.reads = XML->sys.mc.memory_reads;
- mcp.writes = XML->sys.mc.memory_writes;
- //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
- mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate;
- mcp.memRank = XML->sys.mc.number_ranks;
- //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
- //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
- //PHY.llcBlocksize=llcBlockSize;
- mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared
- mcp.LVDS = XML->sys.mc.LVDS;
- mcp.type = XML->sys.mc.type;
- mcp.withPHY = XML->sys.mc.withPHY;
- }
-// else if (mc_type==FLASHC)
-// {
-// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped
-// mcp.clockRate *= 1e6;
-// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
-//
-// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead
-// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width;
-// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width;
-// mcp.opcodeW =16;
-// mcp.num_mcs = XML->sys.flashc.number_mcs;
-// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc;
-// mcp.reads = XML->sys.flashc.memory_reads;
-// mcp.writes = XML->sys.flashc.memory_writes;
-// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
-// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
-// mcp.memRank = XML->sys.flashc.number_ranks;
-// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
-// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
-// //PHY.llcBlocksize=llcBlockSize;
-// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared
-// mcp.LVDS = XML->sys.flashc.LVDS;
-// mcp.type = XML->sys.flashc.type;
-// }
- else
- {
- cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <<endl;
- exit(0);
- }
-}
+MCFrontEnd ::~MCFrontEnd() {
-MCFrontEnd ::~MCFrontEnd(){
-
- if(MC_arb) {delete MC_arb; MC_arb = 0;}
- if(frontendBuffer) {delete frontendBuffer; frontendBuffer = 0;}
- if(readBuffer) {delete readBuffer; readBuffer = 0;}
- if(writeBuffer) {delete writeBuffer; writeBuffer = 0;}
+ if (MC_arb) {
+ delete MC_arb;
+ MC_arb = NULL;
+ }
+ if (frontendBuffer) {
+ delete frontendBuffer;
+ frontendBuffer = NULL;
+ }
+ if (readBuffer) {
+ delete readBuffer;
+ readBuffer = NULL;
+ }
+ if (writeBuffer) {
+ delete writeBuffer;
+ writeBuffer = NULL;
+ }
}
-MemoryController ::~MemoryController(){
-
- if(frontend) {delete frontend; frontend = 0;}
- if(transecEngine) {delete transecEngine; transecEngine = 0;}
- if(PHY) {delete PHY; PHY = 0;}
- if(pipeLogic) {delete pipeLogic; pipeLogic = 0;}
+MemoryController::~MemoryController() {
+ // TODO: use default constructor to delete children
}
diff --git a/ext/mcpat/memoryctrl.h b/ext/mcpat/memoryctrl.h
index 65be20a8f..72e18dd5c 100644
--- a/ext/mcpat/memoryctrl.h
+++ b/ext/mcpat/memoryctrl.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,89 +26,75 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef MEMORYCTRL_H_
#define MEMORYCTRL_H_
-#include "XML_Parse.h"
-#include "parameter.h"
-//#include "io.h"
#include "array.h"
-//#include "Undifferentiated_Core_Area.h"
-#include <vector>
-
#include "basic_components.h"
+#include "cachearray.h"
+#include "parameter.h"
-class MCBackend : public Component {
- public:
+class MCBackend : public McPATComponent {
+public:
InputParameter l_ip;
uca_org_t local_result;
- enum MemoryCtrl_type mc_type;
- MCParam mcp;
- statsDef tdp_stats;
- statsDef rtp_stats;
+ MCParameters mcp;
+ MCStatistics mcs;
statsDef stats_t;
- powerDef power_t;
- MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
- void compute();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~MCBackend(){};
+
+ MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const MCParameters & mcp_, const MCStatistics & mcs_);
+ void computeArea();
+ void computeEnergy();
+ ~MCBackend() {};
};
-class MCPHY : public Component {
- public:
+class MCPHY : public McPATComponent {
+public:
InputParameter l_ip;
uca_org_t local_result;
- enum MemoryCtrl_type mc_type;
- MCParam mcp;
- statsDef tdp_stats;
- statsDef rtp_stats;
- statsDef stats_t;
- powerDef power_t;
- MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
- void compute();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~MCPHY(){};
+ MCParameters mcp;
+ MCStatistics mcs;
+ statsDef stats_t;
+
+ MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
+ const MCParameters & mcp_, const MCStatistics & mcs_);
+ void computeArea();
+ void computeEnergy();
+ ~MCPHY() {};
};
-class MCFrontEnd : public Component {
- public:
- ParseXML *XML;
- InputParameter interface_ip;
- enum MemoryCtrl_type mc_type;
- MCParam mcp;
- selection_logic * MC_arb;
- ArrayST * frontendBuffer;
- ArrayST * readBuffer;
- ArrayST * writeBuffer;
+class MCFrontEnd : public McPATComponent {
+public:
+ CacheArray* frontendBuffer;
+ CacheArray* readBuffer;
+ CacheArray* writeBuffer;
+ selection_logic* MC_arb;
- MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
+ InputParameter interface_ip;
+ MCParameters mcp;
+ MCStatistics mcs;
+
+ MCFrontEnd(XMLNode* _xml_data,
+ InputParameter* interface_ip_, const MCParameters & mcp_,
+ const MCStatistics & mcs_);
~MCFrontEnd();
};
-class MemoryController : public Component {
- public:
- ParseXML *XML;
- InputParameter interface_ip;
- enum MemoryCtrl_type mc_type;
- MCParam mcp;
- MCFrontEnd * frontend;
- MCBackend * transecEngine;
- MCPHY * PHY;
- Pipeline * pipeLogic;
+class MemoryController : public McPATComponent {
+public:
+ InputParameter interface_ip;
+ MCParameters mcp;
+ MCStatistics mcs;
- //clock_network clockNetwork;
- MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_);
+ MemoryController(XMLNode* _xml_data, InputParameter* interface_ip_);
+ void initialize_params();
void set_mc_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~MemoryController();
};
+
#endif /* MEMORYCTRL_H_ */
diff --git a/ext/mcpat/noc.cc b/ext/mcpat/noc.cc
index d5dfbb137..d6e309054 100644
--- a/ext/mcpat/noc.cc
+++ b/ext/mcpat/noc.cc
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -35,321 +36,236 @@
#include <iostream>
#include <string>
-#include "XML_Parse.h"
#include "basic_circuit.h"
+#include "common.h"
#include "const.h"
#include "io.h"
#include "noc.h"
#include "parameter.h"
-NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_)
-:XML(XML_interface),
-ithNoC(ithNoC_),
-interface_ip(*interface_ip_),
-router(0),
-link_bus(0),
-link_bus_exist(false),
-router_exist(false),
-M_traffic_pattern(M_traffic_pattern_)
-{
- /*
- * initialize, compute and optimize individual components.
- */
-
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 1;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;
- interface_ip.wire_os_mat_type = 2;
- }
- set_noc_param();
- local_result=init_interface(&interface_ip);
- scktRatio = g_tp.sckt_co_eff;
-
- if (nocdynp.type)
- {/*
- * if NOC compute router, router links must be computed separately
- * and called from external
- * since total chip area must be known first
- */
- init_router();
- }
- else
- {
- init_link_bus(link_len_); //if bus compute bus
- }
-
- // //clock power
- // clockNetwork.init_wire_external(is_default, &interface_ip);
- // clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
- // clockNetwork.end_wiring_level =5;//toplevel metal
- // clockNetwork.start_wiring_level =5;//toplevel metal
- // clockNetwork.num_regs = corepipe.tot_stage_vector;
- // clockNetwork.optimize_wire();
+OnChipNetwork::OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
+ InputParameter* interface_ip_)
+ : McPATComponent(_xml_data), router(NULL), link_bus(NULL), ithNoC(ithNoC_),
+ interface_ip(*interface_ip_), link_bus_exist(false),
+ router_exist(false) {
+ name = "On-Chip Network";
+ set_param_stats();
+ local_result = init_interface(&interface_ip, name);
+ scktRatio = g_tp.sckt_co_eff;
+
+ // TODO: Routers and links should be children of the NOC component
+ if (noc_params.type) {
+ init_router();
+ } else {
+ init_link_bus();
+ }
}
-void NoC::init_router()
-{
- router = new Router(nocdynp.flit_size,
- nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc,
- nocdynp.virtual_channel_per_port, &(g_tp.peri_global),
- nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern);
- //router->print_router();
- area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes);
-
- double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
- router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
- router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
- router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
- router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
- router_exist = true;
+void OnChipNetwork::init_router() {
+ router = new Router(noc_params.flit_size,
+ noc_params.virtual_channel_per_port *
+ noc_params.input_buffer_entries_per_vc,
+ noc_params.virtual_channel_per_port,
+ &(g_tp.peri_global),
+ noc_params.input_ports, noc_params.output_ports,
+ noc_params.M_traffic_pattern);
+ // TODO: Make a router class within McPAT that descends from McPATComponent
+ // children.push_back(router);
+ area.set_area(area.get_area() + router->area.get_area() *
+ noc_params.total_nodes);
+
+ double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
+ router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
+ router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
+ router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
+ router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
+ router_exist = true;
}
-void NoC ::init_link_bus(double link_len_)
-{
-
-
-// if (nocdynp.min_ports==1 )
- if (nocdynp.type)
- link_name = "Links";
- else
- link_name = "Bus";
-
- link_len=link_len_;
- assert(link_len>0);
-
- interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate;
- interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate;
-
- link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2;
-
- if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors
- link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size,
- link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc);
-
- link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area()
- * nocdynp.global_linked_ports);
-
- area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes);
- link_bus_exist = true;
-}
-void NoC::computeEnergy(bool is_tdp)
-{
- //power_point_product_masks
- double pppm_t[4] = {1,1,1,1};
- double M=nocdynp.duty_cycle;
- if (is_tdp)
- {
- //init stats for TDP
- stats_t.readAc.access = M;
- tdp_stats = stats_t;
- if (router_exist)
- {
- set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern
- router->power = router->power*pppm_t;
- set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes);
- power = power + router->power*pppm_t;
- }
- if (link_bus_exist)
- {
- if (nocdynp.type)
- set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports,
- nocdynp.global_linked_ports, nocdynp.global_linked_ports);
- //reset traffic pattern; local port do not have router links
- else
- set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports,
- nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern
-
- link_bus_tot_per_Router.power = link_bus->power*pppm_t;
-
- set_pppm(pppm_t, nocdynp.total_nodes,
- nocdynp.total_nodes,
- nocdynp.total_nodes,
- nocdynp.total_nodes);
- power = power + link_bus_tot_per_Router.power*pppm_t;
-
- }
- }
- else
- {
- //init stats for runtime power (RTP)
- stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses;
- rtp_stats = stats_t;
- set_pppm(pppm_t, 1, 0 , 0, 0);
- if (router_exist)
- {
- router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ;
- router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ;
- router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ;
-
- router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t +
- router->power*pppm_lkg;//TDP power must be calculated first!
- rt_power = rt_power + router->rt_power;
- }
- if (link_bus_exist)
- {
- set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access);
- link_bus->rt_power = link_bus->power * pppm_t;
- rt_power = rt_power + link_bus->rt_power;
- }
-
- }
+void OnChipNetwork::init_link_bus() {
+ if (noc_params.type) {
+ link_name = "Links";
+ } else {
+ link_name = "Bus";
+ }
+
+ interface_ip.throughput = noc_params.link_throughput /
+ noc_params.clockRate;
+ interface_ip.latency = noc_params.link_latency / noc_params.clockRate;
+
+ link_len /= (noc_params.horizontal_nodes + noc_params.vertical_nodes) / 2;
+
+ if (noc_params.total_nodes > 1) {
+ //All links are shared by neighbors
+ link_len /= 2;
+ }
+ link_bus = new Interconnect(xml_data, "Link", Uncore_device,
+ noc_params.link_base_width,
+ noc_params.link_base_height,
+ noc_params.flit_size, link_len, &interface_ip,
+ noc_params.link_start_wiring_level,
+ noc_params.clockRate, true/*pipelinable*/,
+ noc_params.route_over_perc);
+ children.push_back(link_bus);
+
+ link_bus_exist = true;
}
-
-void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- double M =M_traffic_pattern*nocdynp.duty_cycle;
- /*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc;
- * When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to
- * be applied together with McPAT's extra traffic pattern.
- * */
- if (is_tdp)
- {
- cout << name << endl;
- cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
- cout << indent_str << "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
- cout<<endl;
-
- if (router_exist)
- {
- cout << indent_str << "Router: " << endl;
- cout << indent_str_next << "Area = " << router->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next<< "Peak Dynamic = " << router->power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? router->power.readOp.longer_channel_leakage:router->power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next<< "Runtime Dynamic = " << router->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
- cout<<endl;
- if (plevel >2){
- cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl;
- cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl;
- cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)
- *nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl;
- cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
- << (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl;
- cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl;
- cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< indent_str<< "Crossbar:" << endl;
- cout << indent_str<< indent_str_next << "Area = " << router->crossbar.area.get_area()*1e-6 << " mm^2" << endl;
- cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->crossbar.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
- cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
- << (long_channel? router->crossbar.power.readOp.longer_channel_leakage:router->crossbar.power.readOp.leakage) << " W" << endl;
- cout << indent_str<< indent_str_next << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->crossbar.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
- cout <<endl;
- cout << indent_str<< indent_str<< "Arbiter:" << endl;
- cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->arbiter.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
- cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
- << (long_channel? router->arbiter.power.readOp.longer_channel_leakage:router->arbiter.power.readOp.leakage) << " W" << endl;
- cout << indent_str<< indent_str_next << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->arbiter.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
- cout <<endl;
- }
- }
- if (link_bus_exist)
- {
- cout << indent_str << (nocdynp.type? "Per Router ":"") << link_name<<": " << endl;
- cout << indent_str_next << "Area = " << link_bus_tot_per_Router.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next<< "Peak Dynamic = " << link_bus_tot_per_Router.power.readOp.dynamic*
- nocdynp.clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? link_bus_tot_per_Router.power.readOp.longer_channel_leakage:link_bus_tot_per_Router.power.readOp.leakage)
- <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage
- << " W" << endl;
- cout << indent_str_next<< "Runtime Dynamic = " << link_bus->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
- cout<<endl;
-
- }
- }
- else
- {
-// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
-// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
-// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
-// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
+// TODO: This should use the McPATComponent::computeEnergy function to
+// recursively calculate energy of routers and links and then add
+void OnChipNetwork::computeEnergy() {
+ double pppm_t[4] = {1, 1, 1, 1};
+
+ // Initialize stats for TDP
+ tdp_stats.reset();
+ tdp_stats.readAc.access = noc_stats.duty_cycle;
+ if (router_exist) {
+ // TODO: Define a regression to exercise routers
+ // TODO: Clean this up: it is too invasive and breaks abstraction
+ set_pppm(pppm_t, 1 * tdp_stats.readAc.access, 1, 1, 1);
+ router->power = router->power * pppm_t;
+ set_pppm(pppm_t, noc_params.total_nodes,
+ noc_params.total_nodes,
+ noc_params.total_nodes,
+ noc_params.total_nodes);
+ }
+ if (link_bus_exist) {
+ if (noc_params.type) {
+ link_bus->int_params.active_ports = noc_params.min_ports - 1;
+ } else {
+ link_bus->int_params.active_ports = noc_params.min_ports;
}
+ link_bus->int_stats.duty_cycle =
+ noc_params.M_traffic_pattern * noc_stats.duty_cycle;
+
+ // TODO: Decide how to roll multiple routers into a single top-level
+ // NOC module. I would prefer not to, but it might be a nice feature
+ set_pppm(pppm_t, noc_params.total_nodes,
+ noc_params.total_nodes,
+ noc_params.total_nodes,
+ noc_params.total_nodes);
+ }
+
+ // Initialize stats for runtime energy and power
+ rtp_stats.reset();
+ rtp_stats.readAc.access = noc_stats.total_access;
+ set_pppm(pppm_t, 1, 0 , 0, 0);
+ if (router_exist) {
+ // TODO: Move this to a McPATComponent parent class of Router
+ router->buffer.rt_power.readOp.dynamic =
+ (router->buffer.power.readOp.dynamic +
+ router->buffer.power.writeOp.dynamic) * rtp_stats.readAc.access;
+ router->crossbar.rt_power.readOp.dynamic =
+ router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access;
+ router->arbiter.rt_power.readOp.dynamic =
+ router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access;
+
+ router->rt_power = router->rt_power +
+ (router->buffer.rt_power + router->crossbar.rt_power +
+ router->arbiter.rt_power) * pppm_t +
+ router->power * pppm_lkg;//TDP power must be calculated first!
+ }
+ if (link_bus_exist) {
+ link_bus->int_stats.accesses = noc_stats.total_access;
+ }
+
+ // Recursively compute energy
+ McPATComponent::computeEnergy();
}
-void NoC::set_noc_param()
-{
-
- nocdynp.type = XML->sys.NoC[ithNoC].type;
- nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate;
- nocdynp.clockRate *= 1e6;
- nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
-
- nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits;
- if (nocdynp.type)
- {
- nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports;
- nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1
- nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
- nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1);
- /*
- * Except local i/o ports, all ports needs links( global_linked_ports);
- * However only min_ports can be fully active simultaneously
- * since the fewer number of ports (input or output ) is the bottleneck.
- */
- }
- else
- {
- nocdynp.input_ports = 1;
- nocdynp.output_ports = 1;
- nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
- nocdynp.global_linked_ports = 1;
+void OnChipNetwork::set_param_stats() {
+ // TODO: Remove this or move initialization elsewhere
+ memset(&noc_params, 0, sizeof(OnChipNetworkParameters));
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ int mat_type;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_INT_IF("type", noc_params.type);
+ ASSIGN_FP_IF("clockrate", noc_params.clockRate);
+ ASSIGN_INT_IF("flit_bits", noc_params.flit_size);
+ ASSIGN_FP_IF("link_len", link_len);
+ ASSIGN_FP_IF("link_throughput", noc_params.link_throughput);
+ ASSIGN_FP_IF("link_latency", noc_params.link_latency);
+ ASSIGN_INT_IF("input_ports", noc_params.input_ports);
+ ASSIGN_INT_IF("output_ports", noc_params.output_ports);
+ ASSIGN_INT_IF("global_linked_ports", noc_params.global_linked_ports);
+ ASSIGN_INT_IF("horizontal_nodes", noc_params.horizontal_nodes);
+ ASSIGN_INT_IF("vertical_nodes", noc_params.vertical_nodes);
+ ASSIGN_FP_IF("chip_coverage", noc_params.chip_coverage);
+ ASSIGN_FP_IF("link_routing_over_percentage",
+ noc_params.route_over_perc);
+ ASSIGN_INT_IF("has_global_link", noc_params.has_global_link);
+ ASSIGN_INT_IF("virtual_channel_per_port",
+ noc_params.virtual_channel_per_port);
+ ASSIGN_INT_IF("input_buffer_entries_per_vc",
+ noc_params.input_buffer_entries_per_vc);
+ ASSIGN_FP_IF("M_traffic_pattern", noc_params.M_traffic_pattern);
+ ASSIGN_FP_IF("link_base_width", noc_params.link_base_width);
+ ASSIGN_FP_IF("link_base_height", noc_params.link_base_height);
+ ASSIGN_INT_IF("link_start_wiring_level",
+ noc_params.link_start_wiring_level);
+ ASSIGN_INT_IF("wire_mat_type", mat_type);
+ ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
+
+ else {
+ warnUnrecognizedParam(node_name);
}
+ }
- nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port;
- nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc;
+ // Change from MHz to Hz
+ noc_params.clockRate *= 1e6;
- nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes;
- nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes;
- nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes;
- nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle;
- nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link;
- nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput;
- nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency;
- nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage;
- nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc;
+ interface_ip.wire_is_mat_type = mat_type;
+ interface_ip.wire_os_mat_type = mat_type;
- assert (nocdynp.chip_coverage <=1);
- assert (nocdynp.route_over_perc <=1);
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
- if (nocdynp.type)
- name = "NOC";
- else
- name = "BUSES";
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
-}
+ ASSIGN_FP_IF("duty_cycle", noc_stats.duty_cycle);
+ ASSIGN_FP_IF("total_accesses", noc_stats.total_access);
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+
+ clockRate = noc_params.clockRate;
+ noc_params.min_ports =
+ min(noc_params.input_ports, noc_params.output_ports);
+ if (noc_params.type) {
+ noc_params.global_linked_ports = (noc_params.input_ports - 1) +
+ (noc_params.output_ports - 1);
+ }
+ noc_params.total_nodes =
+ noc_params.horizontal_nodes * noc_params.vertical_nodes;
+
+ assert(noc_params.chip_coverage <= 1);
+ assert(noc_params.route_over_perc <= 1);
+ assert(link_len > 0);
+}
-NoC ::~NoC(){
+OnChipNetwork ::~OnChipNetwork() {
- if(router) {delete router; router = 0;}
- if(link_bus) {delete link_bus; link_bus = 0;}
+ if (router) {
+ delete router;
+ router = 0;
+ }
+ if (link_bus) {
+ delete link_bus;
+ link_bus = 0;
+ }
}
diff --git a/ext/mcpat/noc.h b/ext/mcpat/noc.h
index 31b5b3b2e..291712b9d 100644
--- a/ext/mcpat/noc.h
+++ b/ext/mcpat/noc.h
@@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,13 +26,13 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef NOC_H_
#define NOC_H_
-#include "XML_Parse.h"
+
#include "array.h"
#include "basic_components.h"
#include "interconnect.h"
@@ -39,37 +40,62 @@
#include "parameter.h"
#include "router.h"
-class NoC :public Component {
- public:
+class OnChipNetworkParameters {
+public:
+ double clockRate;
+ int flit_size;
+ int input_ports;
+ int output_ports;
+ int min_ports;
+ int global_linked_ports;
+ int virtual_channel_per_port;
+ int input_buffer_entries_per_vc;
+ int horizontal_nodes;
+ int vertical_nodes;
+ int total_nodes;
+ double link_throughput;
+ double link_latency;
+ double chip_coverage;
+ double route_over_perc;
+ bool has_global_link;
+ bool type;
+ double M_traffic_pattern;
+ double link_base_width;
+ double link_base_height;
+ int link_start_wiring_level;
+};
+
+class OnChipNetworkStatistics {
+public:
+ double duty_cycle;
+ double total_access;
+};
+
+class OnChipNetwork : public McPATComponent {
+public:
+ Router* router;
+ Interconnect* link_bus;
+ Component link_bus_tot_per_Router;
+
+ int ithNoC;
+ InputParameter interface_ip;
+ double link_len;
+ double scktRatio, chip_PR_overhead, macro_PR_overhead;
+ OnChipNetworkParameters noc_params;
+ OnChipNetworkStatistics noc_stats;
+ uca_org_t local_result;
+ statsDef stats_t;
+ bool link_bus_exist;
+ bool router_exist;
+ string link_name;
- ParseXML *XML;
- int ithNoC;
- InputParameter interface_ip;
- double link_len;
- double executionTime;
- double scktRatio, chip_PR_overhead, macro_PR_overhead;
- Router * router;
- interconnect * link_bus;
- NoCParam nocdynp;
- uca_org_t local_result;
- statsDef tdp_stats;
- statsDef rtp_stats;
- statsDef stats_t;
- powerDef power_t;
- Component link_bus_tot_per_Router;
- bool link_bus_exist;
- bool router_exist;
- string name, link_name;
- double M_traffic_pattern;
- NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0);
- void set_noc_param();
- void computeEnergy(bool is_tdp=true);
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- void init_link_bus(double link_len_);
- void init_router();
- void computeEnergy_link_bus(bool is_tdp=true);
- void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
- ~NoC();
+ OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
+ InputParameter* interface_ip_);
+ void set_param_stats();
+ void computeEnergy();
+ void init_link_bus();
+ void init_router();
+ ~OnChipNetwork();
};
#endif /* NOC_H_ */
diff --git a/ext/mcpat/processor.cc b/ext/mcpat/processor.cc
deleted file mode 100644
index 8520c9633..000000000
--- a/ext/mcpat/processor.cc
+++ /dev/null
@@ -1,839 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <cstdio>
-#include <cstring>
-#include <fstream>
-#include <iostream>
-
-#include "XML_Parse.h"
-#include "array.h"
-#include "basic_circuit.h"
-#include "const.h"
-#include "parameter.h"
-#include "processor.h"
-#include "version.h"
-
-Processor::Processor(ParseXML *XML_interface)
-:XML(XML_interface),//TODO: using one global copy may have problems.
- mc(0),
- niu(0),
- pcie(0),
- flashcontroller(0)
-{
- /*
- * placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
- * There is no point to have heterogeneous memory controller on chip,
- * thus McPAT only support homogeneous memory controllers.
- */
- int i;
- double pppm_t[4] = {1,1,1,1};
- set_proc_param();
- if (procdynp.homoCore)
- numCore = procdynp.numCore==0? 0:1;
- else
- numCore = procdynp.numCore;
-
- if (procdynp.homoL2)
- numL2 = procdynp.numL2==0? 0:1;
- else
- numL2 = procdynp.numL2;
-
- if (XML->sys.Private_L2 && numCore != numL2)
- {
- cout<<"Number of private L2 does not match number of cores"<<endl;
- exit(0);
- }
-
- if (procdynp.homoL3)
- numL3 = procdynp.numL3==0? 0:1;
- else
- numL3 = procdynp.numL3;
-
- if (procdynp.homoNOC)
- numNOC = procdynp.numNOC==0? 0:1;
- else
- numNOC = procdynp.numNOC;
-
-// if (!procdynp.homoNOC)
-// {
-// cout<<"Current McPAT does not support heterogeneous NOC"<<endl;
-// exit(0);
-// }
-
- if (procdynp.homoL1Dir)
- numL1Dir = procdynp.numL1Dir==0? 0:1;
- else
- numL1Dir = procdynp.numL1Dir;
-
- if (procdynp.homoL2Dir)
- numL2Dir = procdynp.numL2Dir==0? 0:1;
- else
- numL2Dir = procdynp.numL2Dir;
-
- for (i = 0;i < numCore; i++)
- {
- cores.push_back(new Core(XML,i, &interface_ip));
- cores[i]->computeEnergy();
- cores[i]->computeEnergy(false);
- if (procdynp.homoCore){
- core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore);
- set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore);
- core.power = core.power + cores[i]->power*pppm_t;
- set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore);
- core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
- area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
- power = power + core.power;
- rt_power = rt_power + core.rt_power;
- }
- else{
- core.area.set_area(core.area.get_area() + cores[i]->area.get_area());
- area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
-
- set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1);
- core.power = core.power + cores[i]->power*pppm_t;
- power = power + cores[i]->power*pppm_t;
-
- set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1);
- core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
- rt_power = rt_power + cores[i]->rt_power*pppm_t;
- }
- }
-
- if (!XML->sys.Private_L2)
- {
- if (numL2 >0)
- for (i = 0;i < numL2; i++)
- {
- l2array.push_back(new SharedCache(XML,i, &interface_ip));
- l2array[i]->computeEnergy();
- l2array[i]->computeEnergy(false);
- if (procdynp.homoL2){
- l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2);
- set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2);
- l2.power = l2.power + l2array[i]->power*pppm_t;
- set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2);
- l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
- area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
- power = power + l2.power;
- rt_power = rt_power + l2.rt_power;
- }
- else{
- l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area());
- area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
-
- set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1);
- l2.power = l2.power + l2array[i]->power*pppm_t;
- power = power + l2array[i]->power*pppm_t;;
- set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1);
- l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
- rt_power = rt_power + l2array[i]->rt_power*pppm_t;
- }
- }
- }
-
- if (numL3 >0)
- for (i = 0;i < numL3; i++)
- {
- l3array.push_back(new SharedCache(XML,i, &interface_ip, L3));
- l3array[i]->computeEnergy();
- l3array[i]->computeEnergy(false);
- if (procdynp.homoL3){
- l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3);
- set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3);
- l3.power = l3.power + l3array[i]->power*pppm_t;
- set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3);
- l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
- area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
- power = power + l3.power;
- rt_power = rt_power + l3.rt_power;
-
- }
- else{
- l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area());
- area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
- set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1);
- l3.power = l3.power + l3array[i]->power*pppm_t;
- power = power + l3array[i]->power*pppm_t;
- set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1);
- l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
- rt_power = rt_power + l3array[i]->rt_power*pppm_t;
-
- }
- }
- if (numL1Dir >0)
- for (i = 0;i < numL1Dir; i++)
- {
- l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory));
- l1dirarray[i]->computeEnergy();
- l1dirarray[i]->computeEnergy(false);
- if (procdynp.homoL1Dir){
- l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir);
- set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
- l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
- set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
- l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
- area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm
- power = power + l1dir.power;
- rt_power = rt_power + l1dir.rt_power;
-
- }
- else{
- l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area());
- area.set_area(area.get_area() + l1dirarray[i]->area.get_area());
- set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1);
- l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
- power = power + l1dirarray[i]->power;
- set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1);
- l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
- rt_power = rt_power + l1dirarray[i]->rt_power;
- }
- }
-
- if (numL2Dir >0)
- for (i = 0;i < numL2Dir; i++)
- {
- l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory));
- l2dirarray[i]->computeEnergy();
- l2dirarray[i]->computeEnergy(false);
- if (procdynp.homoL2Dir){
- l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir);
- set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
- l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
- set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
- l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
- area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm
- power = power + l2dir.power;
- rt_power = rt_power + l2dir.rt_power;
-
- }
- else{
- l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area());
- area.set_area(area.get_area() + l2dirarray[i]->area.get_area());
- set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1);
- l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
- power = power + l2dirarray[i]->power*pppm_t;
- set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1);
- l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
- rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t;
- }
- }
-
- if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
- {
- mc = new MemoryController(XML, &interface_ip, MC);
- mc->computeEnergy();
- mc->computeEnergy(false);
- mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
- area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
- set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
- mcs.power = mc->power*pppm_t;
- power = power + mcs.power;
- set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
- mcs.rt_power = mc->rt_power*pppm_t;
- rt_power = rt_power + mcs.rt_power;
-
- }
-
- if (XML->sys.flashc.number_mcs >0 )//flash controller
- {
- flashcontroller = new FlashController(XML, &interface_ip);
- flashcontroller->computeEnergy();
- flashcontroller->computeEnergy(false);
- double number_fcs = flashcontroller->fcp.num_mcs;
- flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs);
- area.set_area(area.get_area()+flashcontrollers.area.get_area());
- set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs );
- flashcontrollers.power = flashcontroller->power*pppm_t;
- power = power + flashcontrollers.power;
- set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs );
- flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t;
- rt_power = rt_power + flashcontrollers.rt_power;
-
- }
-
- if (XML->sys.niu.number_units >0)
- {
- niu = new NIUController(XML, &interface_ip);
- niu->computeEnergy();
- niu->computeEnergy(false);
- nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
- area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
- set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
- nius.power = niu->power*pppm_t;
- power = power + nius.power;
- set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
- nius.rt_power = niu->rt_power*pppm_t;
- rt_power = rt_power + nius.rt_power;
-
- }
-
- if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0)
- {
- pcie = new PCIeController(XML, &interface_ip);
- pcie->computeEnergy();
- pcie->computeEnergy(false);
- pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
- area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
- set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
- pcies.power = pcie->power*pppm_t;
- power = power + pcies.power;
- set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
- pcies.rt_power = pcie->rt_power*pppm_t;
- rt_power = rt_power + pcies.rt_power;
-
- }
-
- if (numNOC >0)
- {
- for (i = 0;i < numNOC; i++)
- {
- if (XML->sys.NoC[i].type)
- {//First add up area of routers if NoC is used
- nocs.push_back(new NoC(XML,i, &interface_ip, 1));
- if (procdynp.homoNOC)
- {
- noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
- area.set_area(area.get_area() + noc.area.get_area());
- }
- else
- {
- noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
- area.set_area(area.get_area() + nocs[i]->area.get_area());
- }
- }
- else
- {//Bus based interconnect
- nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage)));
- if (procdynp.homoNOC){
- noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
- area.set_area(area.get_area() + noc.area.get_area());
- }
- else
- {
- noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
- area.set_area(area.get_area() + nocs[i]->area.get_area());
- }
- }
- }
-
- /*
- * Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip
- * area must be obtain to decide the link routing
- */
- for (i = 0;i < numNOC; i++)
- {
- if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type)
- {
- nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links
- if (procdynp.homoNOC)
- {
- noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
- * nocs[i]->nocdynp.total_nodes
- * procdynp.numNOC);
- area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
- * nocs[i]->nocdynp.total_nodes
- * procdynp.numNOC);
- }
- else
- {
- noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
- * nocs[i]->nocdynp.total_nodes);
- area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
- * nocs[i]->nocdynp.total_nodes);
- }
- }
- }
- //Compute energy of NoC (w or w/o links) or buses
- for (i = 0;i < numNOC; i++)
- {
- nocs[i]->computeEnergy();
- nocs[i]->computeEnergy(false);
- if (procdynp.homoNOC){
- set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
- noc.power = noc.power + nocs[i]->power*pppm_t;
- set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
- noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
- power = power + noc.power;
- rt_power = rt_power + noc.rt_power;
- }
- else
- {
- set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1);
- noc.power = noc.power + nocs[i]->power*pppm_t;
- power = power + nocs[i]->power*pppm_t;
- set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1);
- noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
- rt_power = rt_power + nocs[i]->rt_power*pppm_t;
-
-
- }
- }
- }
-
-// //clock power
-// globalClock.init_wire_external(is_default, &interface_ip);
-// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2
-// globalClock.end_wiring_level =5;//toplevel metal
-// globalClock.start_wiring_level =5;//toplevel metal
-// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes
-// globalClock.optimize_wire();
-
-}
-
-void Processor::displayDeviceType(int device_type_, uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- switch ( device_type_ ) {
-
- case 0 :
- cout <<indent_str<<"Device Type= "<<"ITRS high performance device type"<<endl;
- break;
- case 1 :
- cout <<indent_str<<"Device Type= "<<"ITRS low standby power device type"<<endl;
- break;
- case 2 :
- cout <<indent_str<<"Device Type= "<<"ITRS low operating power device type"<<endl;
- break;
- case 3 :
- cout <<indent_str<<"Device Type= "<<"LP-DRAM device type"<<endl;
- break;
- case 4 :
- cout <<indent_str<<"Device Type= "<<"COMM-DRAM device type"<<endl;
- break;
- default :
- {
- cout <<indent_str<<"Unknown Device Type"<<endl;
- exit(0);
- }
- }
-}
-
-void Processor::displayInterconnectType(int interconnect_type_, uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- switch ( interconnect_type_ ) {
-
- case 0 :
- cout <<indent_str<<"Interconnect metal projection= "<<"aggressive interconnect technology projection"<<endl;
- break;
- case 1 :
- cout <<indent_str<<"Interconnect metal projection= "<<"conservative interconnect technology projection"<<endl;
- break;
- default :
- {
- cout <<indent_str<<"Unknown Interconnect Projection Type"<<endl;
- exit(0);
- }
- }
-}
-
-void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp)
-{
- int i;
- bool long_channel = XML->sys.longer_channel_device;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- if (is_tdp)
- {
-
- if (plevel<5)
- {
- cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
- << " of " << VER_UPDATE << ") results (current print level is "<< plevel
- <<", please increase print level to see the details in components): "<<endl;
- }
- else
- {
- cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
- << " of " << VER_UPDATE << ") results (current print level is 5)"<< endl;
- }
- cout <<"*****************************************************************************************"<<endl;
- cout <<indent_str<<"Technology "<<XML->sys.core_tech_node<<" nm"<<endl;
- //cout <<indent_str<<"Device Type= "<<XML->sys.device_type<<endl;
- if (long_channel)
- cout <<indent_str<<"Using Long Channel Devices When Appropriate"<<endl;
- //cout <<indent_str<<"Interconnect metal projection= "<<XML->sys.interconnect_projection_type<<endl;
- displayInterconnectType(XML->sys.interconnect_projection_type, indent);
- cout <<indent_str<<"Core clock Rate(MHz) "<<XML->sys.core[0].clock_rate<<endl;
- cout <<endl;
- cout <<"*****************************************************************************************"<<endl;
- cout <<"Processor: "<<endl;
- cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Power = " << power.readOp.dynamic +
- (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
- cout << indent_str << "Total Leakage = " <<
- (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;
- cout << indent_str << "Subthreshold Leakage = " << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- if (numCore >0){
- cout <<indent_str<<"Total Cores: "<<XML->sys.number_of_cores << " cores "<<endl;
- displayDeviceType(XML->sys.device_type,indent);
- cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << core.power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (!XML->sys.Private_L2)
- {
- if (numL2 >0){
- cout <<indent_str<<"Total L2s: "<<endl;
- displayDeviceType(XML->sys.L2[0].device_type,indent);
- cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << l2.power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- }
- if (numL3 >0){
- cout <<indent_str<<"Total L3s: "<<endl;
- displayDeviceType(XML->sys.L3[0].device_type, indent);
- cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << l3.power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (numL1Dir >0){
- cout <<indent_str<<"Total First Level Directory: "<<endl;
- displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
- cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << l1dir.power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (numL2Dir >0){
- cout <<indent_str<<"Total First Level Directory: "<<endl;
- displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
- cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << l2dir.power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (numNOC >0){
- cout <<indent_str<<"Total NoCs (Network/Bus): "<<endl;
- displayDeviceType(XML->sys.device_type, indent);
- cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl;
- //cout << indent_str_next << "Subthreshold Leakage = " << noc.power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
- {
- cout <<indent_str<<"Total MCs: "<<XML->sys.mc.number_mcs << " Memory Controllers "<<endl;
- displayDeviceType(XML->sys.device_type, indent);
- cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (XML->sys.flashc.number_mcs >0)
- {
- cout <<indent_str<<"Total Flash/SSD Controllers: "<<flashcontroller->fcp.num_mcs << " Flash/SSD Controllers "<<endl;
- displayDeviceType(XML->sys.device_type, indent);
- cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (XML->sys.niu.number_units >0 )
- {
- cout <<indent_str<<"Total NIUs: "<<niu->niup.num_units << " Network Interface Units "<<endl;
- displayDeviceType(XML->sys.device_type, indent);
- cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
- {
- cout <<indent_str<<"Total PCIes: "<<pcie->pciep.num_units << " PCIe Controllers "<<endl;
- displayDeviceType(XML->sys.device_type, indent);
- cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl;
- cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl;
- cout <<endl;
- }
- cout <<"*****************************************************************************************"<<endl;
- if (plevel >1)
- {
- for (i = 0;i < numCore; i++)
- {
- cores[i]->displayEnergy(indent+4,plevel,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- if (!XML->sys.Private_L2)
- {
- for (i = 0;i < numL2; i++)
- {
- l2array[i]->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- }
- for (i = 0;i < numL3; i++)
- {
- l3array[i]->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- for (i = 0;i < numL1Dir; i++)
- {
- l1dirarray[i]->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- for (i = 0;i < numL2Dir; i++)
- {
- l2dirarray[i]->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
- {
- mc->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- if (XML->sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0)
- {
- flashcontroller->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- if (XML->sys.niu.number_units >0 )
- {
- niu->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
- {
- pcie->displayEnergy(indent+4,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
-
- for (i = 0;i < numNOC; i++)
- {
- nocs[i]->displayEnergy(indent+4,plevel,is_tdp);
- cout <<"*****************************************************************************************"<<endl;
- }
- }
- }
- else
- {
-
- }
-
-}
-
-void Processor::set_proc_param()
-{
- bool debug = false;
-
- procdynp.homoCore = bool(debug?1:XML->sys.homogeneous_cores);
- procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s);
- procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s);
- procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs);
- procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories);
- procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories);
-
- procdynp.numCore = XML->sys.number_of_cores;
- procdynp.numL2 = XML->sys.number_of_L2s;
- procdynp.numL3 = XML->sys.number_of_L3s;
- procdynp.numNOC = XML->sys.number_of_NoCs;
- procdynp.numL1Dir = XML->sys.number_of_L1Directories;
- procdynp.numL2Dir = XML->sys.number_of_L2Directories;
- procdynp.numMC = XML->sys.mc.number_mcs;
- procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc;
-
-// if (procdynp.numCore<1)
-// {
-// cout<<" The target processor should at least have one core on chip." <<endl;
-// exit(0);
-// }
-
- // if (numNOCs<0 || numNOCs>2)
- // {
- // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<<endl;
- // exit(0);
- // }
-
- /* Basic parameters*/
- interface_ip.data_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
- interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
- interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
- interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
-
- interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type;
- interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
- interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components.
- interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components.
- interface_ip.leakage_power_wt = 0;
- interface_ip.cycle_time_wt = 0;
-
- interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied.
- interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
- interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
- interface_ip.leakage_power_dev = 10000;
- interface_ip.cycle_time_dev = 10000;
-
- interface_ip.ed = 2;
- interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately
- interface_ip.int_prefetch_w = 1;
- interface_ip.page_sz_bits = 0;
- interface_ip.temp = debug?360: XML->sys.temperature;
- interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node;
- interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
-
- //***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors.
- //They will be overridden during each components initialization
- interface_ip.cache_sz =64;
- interface_ip.line_sz = 1;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = 64;
- interface_ip.access_mode = 2;
-
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
-
- interface_ip.is_main_mem = false;
- interface_ip.rpters_in_htree = true ;
- interface_ip.ver_htree_wires_over_array = 0;
- interface_ip.broadcast_addr_din_over_ver_htrees = 0;
-
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = 1;
- interface_ip.nuca = 0;
- interface_ip.nuca_bank_count = 0;
- interface_ip.is_cache =true;
- interface_ip.pure_ram =false;
- interface_ip.pure_cam =false;
- interface_ip.force_cache_config =false;
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 0;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;
- interface_ip.wire_os_mat_type = 2;
- }
- interface_ip.force_wiretype = false;
- interface_ip.print_detail = 1;
- interface_ip.add_ecc_b_ =true;
-}
-
-Processor::~Processor(){
- while (!cores.empty())
- {
- delete cores.back();
- cores.pop_back();
- }
- while (!l2array.empty())
- {
- delete l2array.back();
- l2array.pop_back();
- }
- while (!l3array.empty())
- {
- delete l3array.back();
- l3array.pop_back();
- }
- while (!nocs.empty())
- {
- delete nocs.back();
- nocs.pop_back();
- }
- if (!mc)
- {
- delete mc;
- }
- if (!niu)
- {
- delete niu;
- }
- if (!pcie)
- {
- delete pcie;
- }
- if (!flashcontroller)
- {
- delete flashcontroller;
- }
-};
diff --git a/ext/mcpat/sharedcache.cc b/ext/mcpat/sharedcache.cc
deleted file mode 100644
index 3a61e1b6d..000000000
--- a/ext/mcpat/sharedcache.cc
+++ /dev/null
@@ -1,1162 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <cstring>
-#include <iostream>
-
-#include "XML_Parse.h"
-#include "arbiter.h"
-#include "array.h"
-#include "basic_circuit.h"
-#include "const.h"
-#include "io.h"
-#include "logic.h"
-#include "parameter.h"
-#include "sharedcache.h"
-
-SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* interface_ip_, enum cache_level cacheL_)
-:XML(XML_interface),
- ithCache(ithCache_),
- interface_ip(*interface_ip_),
- cacheL(cacheL_),
- dir_overhead(0)
-{
- int idx;
- int tag, data;
- bool is_default, debug;
- enum Device_ty device_t;
- enum Core_type core_t;
- double size, line, assoc, banks;
- if (cacheL==L2 && XML->sys.Private_L2)
- {
- device_t=Core_device;
- core_t = (enum Core_type)XML->sys.core[ithCache].machine_type;
- }
- else
- {
- device_t=LLC_device;
- core_t = Inorder;
- }
-
- debug = false;
- is_default=true;//indication for default setup
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 1;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;
- interface_ip.wire_os_mat_type = 2;
- }
- set_cache_param();
-
- //All lower level cache are physically indexed and tagged.
- size = cachep.capacity;
- line = cachep.blockW;
- assoc = cachep.assoc;
- banks = cachep.nbanks;
- if ((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))
- {
- assoc = 0;
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- interface_ip.num_search_ports = 1;
- }
- else
- {
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.num_search_ports = 0;
- if (cachep.dir_ty==SBT)
- {
- dir_overhead = ceil(XML->sys.number_of_cores/8.0)*8/(cachep.blockW*8);
- line = cachep.blockW*(1+ dir_overhead) ;
- size = cachep.capacity*(1+ dir_overhead);
-
- }
- }
-// if (XML->sys.first_level_dir==2)
-// tag += int(XML->sys.domain_size + 5);
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = (int)size;
- interface_ip.line_sz = (int)line;
- interface_ip.assoc = (int)assoc;
- interface_ip.nbanks = (int)banks;
- interface_ip.out_w = interface_ip.line_sz*8/2;
- interface_ip.access_mode = 1;
- interface_ip.throughput = cachep.throughput;
- interface_ip.latency = cachep.latency;
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- interface_ip.pure_cam = false;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//lower level cache usually has one port.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
-// interface_ip.force_cache_config =true;
-// interface_ip.ndwl = 4;
-// interface_ip.ndbl = 8;
-// interface_ip.nspd = 1;
-// interface_ip.ndcm =1 ;
-// interface_ip.ndsam1 =1;
-// interface_ip.ndsam2 =1;
- unicache.caches = new ArrayST(&interface_ip, cachep.name + "cache", device_t, true, core_t);
- unicache.area.set_area(unicache.area.get_area()+ unicache.caches->local_result.area);
- area.set_area(area.get_area()+ unicache.caches->local_result.area);
- interface_ip.force_cache_config =false;
-
- if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)))
- {
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + unicache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = cachep.missb_size*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- interface_ip.pure_cam = false;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8/2;
- interface_ip.access_mode = 0;
- interface_ip.throughput = cachep.throughput;//means cycle time
- interface_ip.latency = cachep.latency;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = 1;
- unicache.missb = new ArrayST(&interface_ip, cachep.name + "MissB", device_t, true, core_t);
- unicache.area.set_area(unicache.area.get_area()+ unicache.missb->local_result.area);
- area.set_area(area.get_area()+ unicache.missb->local_result.area);
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = unicache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*cachep.fu_size ;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8/2;
- interface_ip.access_mode = 0;
- interface_ip.throughput = cachep.throughput;
- interface_ip.latency = cachep.latency;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- unicache.ifb = new ArrayST(&interface_ip, cachep.name + "FillB", device_t, true, core_t);
- unicache.area.set_area(unicache.area.get_area()+ unicache.ifb->local_result.area);
- area.set_area(area.get_area()+ unicache.ifb->local_result.area);
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = unicache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = cachep.prefetchb_size*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8/2;
- interface_ip.access_mode = 0;
- interface_ip.throughput = cachep.throughput;
- interface_ip.latency = cachep.latency;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- unicache.prefetchb = new ArrayST(&interface_ip, cachep.name + "PrefetchB", device_t, true, core_t);
- unicache.area.set_area(unicache.area.get_area()+ unicache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ unicache.prefetchb->local_result.area);
- //WBB
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = unicache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = cachep.wbb_size*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8/2;
- interface_ip.access_mode = 0;
- interface_ip.throughput = cachep.throughput;
- interface_ip.latency = cachep.latency;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- unicache.wbb = new ArrayST(&interface_ip, cachep.name + "WBB", device_t, true, core_t);
- unicache.area.set_area(unicache.area.get_area()+ unicache.wbb->local_result.area);
- area.set_area(area.get_area()+ unicache.wbb->local_result.area);
- }
- // //pipeline
-// interface_ip.pipeline_stages = int(ceil(llCache.caches.local_result.access_time/llCache.caches.local_result.cycle_time));
-// interface_ip.per_stage_vector = llCache.caches.l_ip.out_w + llCache.caches.l_ip.tag_w ;
-// pipeLogicCache.init_pipeline(is_default, &interface_ip);
-// pipeLogicCache.compute_pipeline();
-
- /*
- if (!((XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==1)
- ||(XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==2)))//not single level IC and DIC
- {
- //directory Now assuming one directory per bank, TODO:should change it later
- size = XML->sys.L2directory.L2Dir_config[0];
- line = XML->sys.L2directory.L2Dir_config[1];
- assoc = XML->sys.L2directory.L2Dir_config[2];
- banks = XML->sys.L2directory.L2Dir_config[3];
- tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate
- interface_ip.specific_tag = 0;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0];
- interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1];
- interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2];
- interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
- interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//lower level cache usually has one port.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
-
- strcpy(directory.caches.name,"L2 Directory");
- directory.caches.init_cache(&interface_ip);
- directory.caches.optimize_array();
- directory.area += directory.caches.local_result.area;
- //output_data_csv(directory.caches.local_result);
- ///cout<<"area="<<area<<endl;
-
- //miss buffer Each MSHR contains enough state to handle one or more accesses of any type to a single memory line.
- //Due to the generality of the MSHR mechanism, the amount of state involved is non-trivial,
- //including the address, pointers to the cache entry and destination register, written data, and various other pieces of state.
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory.caches.l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory.missb.name,"directoryMissB");
- directory.missb.init_cache(&interface_ip);
- directory.missb.optimize_array();
- directory.area += directory.missb.local_result.area;
- //output_data_csv(directory.missb.local_result);
- ///cout<<"area="<<area<<endl;
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = directory.caches.l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory.ifb.name,"directoryFillB");
- directory.ifb.init_cache(&interface_ip);
- directory.ifb.optimize_array();
- directory.area += directory.ifb.local_result.area;
- //output_data_csv(directory.ifb.local_result);
- ///cout<<"area="<<area<<endl;
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = directory.caches.l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory.prefetchb.name,"directoryPrefetchB");
- directory.prefetchb.init_cache(&interface_ip);
- directory.prefetchb.optimize_array();
- directory.area += directory.prefetchb.local_result.area;
- //output_data_csv(directory.prefetchb.local_result);
- ///cout<<"area="<<area<<endl;
-
- //WBB
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = directory.caches.l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory.wbb.name,"directoryWBB");
- directory.wbb.init_cache(&interface_ip);
- directory.wbb.optimize_array();
- directory.area += directory.wbb.local_result.area;
- }
-
- if (XML->sys.number_of_dir_levels ==2 && XML->sys.first_level_dir==0)
- {
- //first level directory
- size = XML->sys.L2directory.L2Dir_config[0]*XML->sys.domain_size/128;
- line = int(ceil(XML->sys.domain_size/8.0));
- assoc = XML->sys.L2directory.L2Dir_config[2];
- banks = XML->sys.L2directory.L2Dir_config[3];
- tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0];
- interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1];
- interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2];
- interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
- interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//lower level cache usually has one port.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
-
- strcpy(directory1.caches.name,"first level Directory");
- directory1.caches.init_cache(&interface_ip);
- directory1.caches.optimize_array();
- directory1.area += directory1.caches.local_result.area;
- //output_data_csv(directory.caches.local_result);
- ///cout<<"area="<<area<<endl;
-
- //miss buffer Each MSHR contains enough state to handle one or more accesses of any type to a single memory line.
- //Due to the generality of the MSHR mechanism, the amount of state involved is non-trivial,
- //including the address, pointers to the cache entry and destination register, written data, and various other pieces of state.
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory1.caches.l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory1.missb.name,"directory1MissB");
- directory1.missb.init_cache(&interface_ip);
- directory1.missb.optimize_array();
- directory1.area += directory1.missb.local_result.area;
- //output_data_csv(directory.missb.local_result);
- ///cout<<"area="<<area<<endl;
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = directory1.caches.l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory1.ifb.name,"directory1FillB");
- directory1.ifb.init_cache(&interface_ip);
- directory1.ifb.optimize_array();
- directory1.area += directory1.ifb.local_result.area;
- //output_data_csv(directory.ifb.local_result);
- ///cout<<"area="<<area<<endl;
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = directory1.caches.l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory1.prefetchb.name,"directory1PrefetchB");
- directory1.prefetchb.init_cache(&interface_ip);
- directory1.prefetchb.optimize_array();
- directory1.area += directory1.prefetchb.local_result.area;
- //output_data_csv(directory.prefetchb.local_result);
- ///cout<<"area="<<area<<endl;
-
- //WBB
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = directory1.caches.l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(directory1.wbb.name,"directoryWBB");
- directory1.wbb.init_cache(&interface_ip);
- directory1.wbb.optimize_array();
- directory1.area += directory1.wbb.local_result.area;
- }
-
- if (XML->sys.first_level_dir==1)//IC
- {
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = int(ceil(XML->sys.domain_size/8.0));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.domain_size*data*XML->sys.L2[ithCache].L2_config[0]/XML->sys.L2[ithCache].L2_config[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1024;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;
- interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- strcpy(inv_dir.caches.name,"inv_dir");
- inv_dir.caches.init_cache(&interface_ip);
- inv_dir.caches.optimize_array();
- inv_dir.area = inv_dir.caches.local_result.area;
-
- }
-*/
-// //pipeline
-// interface_ip.pipeline_stages = int(ceil(directory.caches.local_result.access_time/directory.caches.local_result.cycle_time));
-// interface_ip.per_stage_vector = directory.caches.l_ip.out_w + directory.caches.l_ip.tag_w ;
-// pipeLogicDirectory.init_pipeline(is_default, &interface_ip);
-// pipeLogicDirectory.compute_pipeline();
-//
-// //clock power
-// clockNetwork.init_wire_external(is_default, &interface_ip);
-// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
-// clockNetwork.end_wiring_level =5;//toplevel metal
-// clockNetwork.start_wiring_level =5;//toplevel metal
-// clockNetwork.num_regs = pipeLogicCache.tot_stage_vector + pipeLogicDirectory.tot_stage_vector;
-// clockNetwork.optimize_wire();
-
-}
-
-
-void SharedCache::computeEnergy(bool is_tdp)
-{
- double homenode_data_access = (cachep.dir_ty==SBT)? 0.9:1.0;
- if (is_tdp)
- {
- if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)))
- {
- //init stats for Peak
- unicache.caches->stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access;
- unicache.caches->stats_t.readAc.miss = 0;
- unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss;
- unicache.caches->stats_t.writeAc.access = .33*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access;
- unicache.caches->stats_t.writeAc.miss = 0;
- unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss;
- unicache.caches->tdp_stats = unicache.caches->stats_t;
-
- if (cachep.dir_ty==SBT)
- {
- homenode_stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access);
- homenode_stats_t.readAc.miss = 0;
- homenode_stats_t.readAc.hit = homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss;
- homenode_stats_t.writeAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access);
- homenode_stats_t.writeAc.miss = 0;
- homenode_stats_t.writeAc.hit = homenode_stats_t.writeAc.access - homenode_stats_t.writeAc.miss;
- homenode_tdp_stats = homenode_stats_t;
- }
-
- unicache.missb->stats_t.readAc.access = unicache.missb->l_ip.num_search_ports;
- unicache.missb->stats_t.writeAc.access = unicache.missb->l_ip.num_search_ports;
- unicache.missb->tdp_stats = unicache.missb->stats_t;
-
- unicache.ifb->stats_t.readAc.access = unicache.ifb->l_ip.num_search_ports;
- unicache.ifb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports;
- unicache.ifb->tdp_stats = unicache.ifb->stats_t;
-
- unicache.prefetchb->stats_t.readAc.access = unicache.prefetchb->l_ip.num_search_ports;
- unicache.prefetchb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports;
- unicache.prefetchb->tdp_stats = unicache.prefetchb->stats_t;
-
- unicache.wbb->stats_t.readAc.access = unicache.wbb->l_ip.num_search_ports;
- unicache.wbb->stats_t.writeAc.access = unicache.wbb->l_ip.num_search_ports;
- unicache.wbb->tdp_stats = unicache.wbb->stats_t;
- }
- else
- {
- unicache.caches->stats_t.readAc.access = unicache.caches->l_ip.num_search_ports*cachep.duty_cycle;
- unicache.caches->stats_t.readAc.miss = 0;
- unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss;
- unicache.caches->stats_t.writeAc.access = 0;
- unicache.caches->stats_t.writeAc.miss = 0;
- unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss;
- unicache.caches->tdp_stats = unicache.caches->stats_t;
-
- }
-
- }
- else
- {
- //init stats for runtime power (RTP)
- if (cacheL==L2)
- {
- unicache.caches->stats_t.readAc.access = XML->sys.L2[ithCache].read_accesses;
- unicache.caches->stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses;
- unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss;
- unicache.caches->stats_t.writeAc.access = XML->sys.L2[ithCache].write_accesses;
- unicache.caches->stats_t.writeAc.miss = XML->sys.L2[ithCache].write_misses;
- unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss;
- unicache.caches->rtp_stats = unicache.caches->stats_t;
-
- if (cachep.dir_ty==SBT)
- {
- homenode_rtp_stats.readAc.access = XML->sys.L2[ithCache].homenode_read_accesses;
- homenode_rtp_stats.readAc.miss = XML->sys.L2[ithCache].homenode_read_misses;
- homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss;
- homenode_rtp_stats.writeAc.access = XML->sys.L2[ithCache].homenode_write_accesses;
- homenode_rtp_stats.writeAc.miss = XML->sys.L2[ithCache].homenode_write_misses;
- homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss;
- }
- }
- else if (cacheL==L3)
- {
- unicache.caches->stats_t.readAc.access = XML->sys.L3[ithCache].read_accesses;
- unicache.caches->stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses;
- unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss;
- unicache.caches->stats_t.writeAc.access = XML->sys.L3[ithCache].write_accesses;
- unicache.caches->stats_t.writeAc.miss = XML->sys.L3[ithCache].write_misses;
- unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss;
- unicache.caches->rtp_stats = unicache.caches->stats_t;
-
- if (cachep.dir_ty==SBT)
- {
- homenode_rtp_stats.readAc.access = XML->sys.L3[ithCache].homenode_read_accesses;
- homenode_rtp_stats.readAc.miss = XML->sys.L3[ithCache].homenode_read_misses;
- homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss;
- homenode_rtp_stats.writeAc.access = XML->sys.L3[ithCache].homenode_write_accesses;
- homenode_rtp_stats.writeAc.miss = XML->sys.L3[ithCache].homenode_write_misses;
- homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss;
- }
- }
- else if (cacheL==L1Directory)
- {
- unicache.caches->stats_t.readAc.access = XML->sys.L1Directory[ithCache].read_accesses;
- unicache.caches->stats_t.readAc.miss = XML->sys.L1Directory[ithCache].read_misses;
- unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss;
- unicache.caches->stats_t.writeAc.access = XML->sys.L1Directory[ithCache].write_accesses;
- unicache.caches->stats_t.writeAc.miss = XML->sys.L1Directory[ithCache].write_misses;
- unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss;
- unicache.caches->rtp_stats = unicache.caches->stats_t;
- }
- else if (cacheL==L2Directory)
- {
- unicache.caches->stats_t.readAc.access = XML->sys.L2Directory[ithCache].read_accesses;
- unicache.caches->stats_t.readAc.miss = XML->sys.L2Directory[ithCache].read_misses;
- unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss;
- unicache.caches->stats_t.writeAc.access = XML->sys.L2Directory[ithCache].write_accesses;
- unicache.caches->stats_t.writeAc.miss = XML->sys.L2Directory[ithCache].write_misses;
- unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss;
- unicache.caches->rtp_stats = unicache.caches->stats_t;
- }
- if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)))
- { //Assuming write back and write-allocate cache
-
- unicache.missb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss ;
- unicache.missb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss;
- unicache.missb->rtp_stats = unicache.missb->stats_t;
-
- unicache.ifb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss;
- unicache.ifb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss;
- unicache.ifb->rtp_stats = unicache.ifb->stats_t;
-
- unicache.prefetchb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss;
- unicache.prefetchb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss;
- unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t;
-
- unicache.wbb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss;
- unicache.wbb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss;
- if (cachep.dir_ty==SBT)
- {
- unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.missb->rtp_stats = unicache.missb->stats_t;
-
- unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.missb->rtp_stats = unicache.missb->stats_t;
-
- unicache.ifb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.ifb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.ifb->rtp_stats = unicache.ifb->stats_t;
-
- unicache.prefetchb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.prefetchb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t;
-
- unicache.wbb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss;
- unicache.wbb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss;
- }
- unicache.wbb->rtp_stats = unicache.wbb->stats_t;
-
- }
-
- }
-
- unicache.power_t.reset();
- if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)))
- {
- unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.hit*unicache.caches->local_result.power.readOp.dynamic+
- unicache.caches->stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic+
- unicache.caches->stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.writeOp.dynamic+
- unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic);//write miss will also generate a write later
-
- if (cachep.dir_ty==SBT)
- {
- unicache.power_t.readOp.dynamic += homenode_stats_t.readAc.hit * (unicache.caches->local_result.data_array2->power.readOp.dynamic*dir_overhead +
- unicache.caches->local_result.tag_array2->power.readOp.dynamic) +
- homenode_stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic +
- homenode_stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic +
- homenode_stats_t.writeAc.hit*(unicache.caches->local_result.data_array2->power.writeOp.dynamic*dir_overhead +
- unicache.caches->local_result.tag_array2->power.readOp.dynamic+
- homenode_stats_t.writeAc.miss*unicache.caches->local_result.power.writeOp.dynamic);//write miss on dynamic home node will generate a replacement write on whole cache block
-
-
- }
-
- unicache.power_t.readOp.dynamic += unicache.missb->stats_t.readAc.access*unicache.missb->local_result.power.searchOp.dynamic +
- unicache.missb->stats_t.writeAc.access*unicache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- unicache.power_t.readOp.dynamic += unicache.ifb->stats_t.readAc.access*unicache.ifb->local_result.power.searchOp.dynamic +
- unicache.ifb->stats_t.writeAc.access*unicache.ifb->local_result.power.writeOp.dynamic;
- unicache.power_t.readOp.dynamic += unicache.prefetchb->stats_t.readAc.access*unicache.prefetchb->local_result.power.searchOp.dynamic +
- unicache.prefetchb->stats_t.writeAc.access*unicache.prefetchb->local_result.power.writeOp.dynamic;
- unicache.power_t.readOp.dynamic += unicache.wbb->stats_t.readAc.access*unicache.wbb->local_result.power.searchOp.dynamic +
- unicache.wbb->stats_t.writeAc.access*unicache.wbb->local_result.power.writeOp.dynamic;
- }
- else
- {
- unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.access*unicache.caches->local_result.power.searchOp.dynamic+
- unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic);
- }
-
- if (is_tdp)
- {
- unicache.power = unicache.power_t + (unicache.caches->local_result.power)*pppm_lkg;
- if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)))
- {
- unicache.power = unicache.power+
- (unicache.missb->local_result.power +
- unicache.ifb->local_result.power +
- unicache.prefetchb->local_result.power +
- unicache.wbb->local_result.power)*pppm_lkg;
- }
- power = power + unicache.power;
-// cout<<"unicache.caches->local_result.power.readOp.dynamic"<<unicache.caches->local_result.power.readOp.dynamic<<endl;
-// cout<<"unicache.caches->local_result.power.writeOp.dynamic"<<unicache.caches->local_result.power.writeOp.dynamic<<endl;
- }
- else
- {
- unicache.rt_power = unicache.power_t + (unicache.caches->local_result.power)*pppm_lkg;
- if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)))
- {
- (unicache.rt_power = unicache.rt_power +
- unicache.missb->local_result.power +
- unicache.ifb->local_result.power +
- unicache.prefetchb->local_result.power +
- unicache.wbb->local_result.power)*pppm_lkg;
- }
- rt_power = rt_power + unicache.rt_power;
- }
-}
-
-void SharedCache::displayEnergy(uint32_t indent,bool is_tdp)
-{
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
-
- if (is_tdp)
- {
- cout << (XML->sys.Private_L2? indent_str:"")<< cachep.name << endl;
- cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*cachep.clockRate << " W" << endl;
- cout << indent_str << "Subthreshold Leakage = "
- << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
- //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
- cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
- cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/cachep.executionTime << " W" << endl;
- cout <<endl;
- }
- else
- {
- }
-}
-
-//void SharedCache::computeMaxPower()
-//{
-// //Compute maximum power and runtime power.
-// //When computing runtime power, McPAT gets or reasons out the statistics based on XML input.
-// maxPower = 0.0;
-// //llCache,itlb
-// llCache.maxPower = 0.0;
-// llCache.maxPower += (llCache.caches.l_ip.num_rw_ports*(0.67*llCache.caches.local_result.power.readOp.dynamic+0.33*llCache.caches.local_result.power.writeOp.dynamic)
-// +llCache.caches.l_ip.num_rd_ports*llCache.caches.local_result.power.readOp.dynamic+llCache.caches.l_ip.num_wr_ports*llCache.caches.local_result.power.writeOp.dynamic
-// +llCache.caches.l_ip.num_se_rd_ports*llCache.caches.local_result.power.readOp.dynamic)*clockRate;
-// ///cout<<"llCache.maxPower=" <<llCache.maxPower<<endl;
-//
-// llCache.maxPower += llCache.missb.l_ip.num_search_ports*llCache.missb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"llCache.maxPower=" <<llCache.maxPower<<endl;
-//
-// llCache.maxPower += llCache.ifb.l_ip.num_search_ports*llCache.ifb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"llCache.maxPower=" <<llCache.maxPower<<endl;
-//
-// llCache.maxPower += llCache.prefetchb.l_ip.num_search_ports*llCache.prefetchb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"llCache.maxPower=" <<llCache.maxPower<<endl;
-//
-// llCache.maxPower += llCache.wbb.l_ip.num_search_ports*llCache.wbb.local_result.power.searchOp.dynamic*clockRate;
-// //llCache.maxPower *= scktRatio; //TODO: this calculation should be self-contained
-// ///cout<<"llCache.maxPower=" <<llCache.maxPower<<endl;
-//
-//// directory_power = (directory.caches.l_ip.num_rw_ports*(0.67*directory.caches.local_result.power.readOp.dynamic+0.33*directory.caches.local_result.power.writeOp.dynamic)
-//// +directory.caches.l_ip.num_rd_ports*directory.caches.local_result.power.readOp.dynamic+directory.caches.l_ip.num_wr_ports*directory.caches.local_result.power.writeOp.dynamic
-//// +directory.caches.l_ip.num_se_rd_ports*directory.caches.local_result.power.readOp.dynamic)*clockRate;
-//
-// L2Tot.power.readOp.dynamic = llCache.maxPower;
-// L2Tot.power.readOp.leakage = llCache.caches.local_result.power.readOp.leakage +
-// llCache.missb.local_result.power.readOp.leakage +
-// llCache.ifb.local_result.power.readOp.leakage +
-// llCache.prefetchb.local_result.power.readOp.leakage +
-// llCache.wbb.local_result.power.readOp.leakage;
-//
-// L2Tot.area.set_area(llCache.area*1.1*1e-6);//placement and routing overhead
-//
-// if (XML->sys.number_of_dir_levels==1)
-// {
-// if (XML->sys.first_level_dir==0)
-// {
-// directory.maxPower = 0.0;
-// directory.maxPower += (directory.caches.l_ip.num_rw_ports*(0.67*directory.caches.local_result.power.readOp.dynamic+0.33*directory.caches.local_result.power.writeOp.dynamic)
-// +directory.caches.l_ip.num_rd_ports*directory.caches.local_result.power.readOp.dynamic+directory.caches.l_ip.num_wr_ports*directory.caches.local_result.power.writeOp.dynamic
-// +directory.caches.l_ip.num_se_rd_ports*directory.caches.local_result.power.readOp.dynamic)*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.missb.l_ip.num_search_ports*directory.missb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.ifb.l_ip.num_search_ports*directory.ifb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.prefetchb.l_ip.num_search_ports*directory.prefetchb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.wbb.l_ip.num_search_ports*directory.wbb.local_result.power.searchOp.dynamic*clockRate;
-//
-// cc.power.readOp.dynamic = directory.maxPower*scktRatio*8;//8 is the memory controller counts
-// cc.power.readOp.leakage = directory.caches.local_result.power.readOp.leakage +
-// directory.missb.local_result.power.readOp.leakage +
-// directory.ifb.local_result.power.readOp.leakage +
-// directory.prefetchb.local_result.power.readOp.leakage +
-// directory.wbb.local_result.power.readOp.leakage;
-//
-// cc.power.readOp.leakage *=8;
-//
-// cc.area.set_area(directory.area*8);
-// cout<<"CC area="<<cc.area.get_area()*1e-6<<endl;
-// cout<<"CC Power="<<cc.power.readOp.dynamic<<endl;
-// ccTot.area.set_area(cc.area.get_area()*1e-6);
-// ccTot.power = cc.power;
-// cout<<"DC energy per access" << cc.power.readOp.dynamic/clockRate/8;
-// }
-// else if (XML->sys.first_level_dir==1)
-// {
-// inv_dir.maxPower = inv_dir.caches.local_result.power.searchOp.dynamic*clockRate*XML->sys.domain_size;
-// cc.power.readOp.dynamic = inv_dir.maxPower*scktRatio*64/XML->sys.domain_size;
-// cc.power.readOp.leakage = inv_dir.caches.local_result.power.readOp.leakage*inv_dir.caches.l_ip.nbanks*64/XML->sys.domain_size;
-//
-// cc.area.set_area(inv_dir.area*64/XML->sys.domain_size);
-// cout<<"CC area="<<cc.area.get_area()*1e-6<<endl;
-// cout<<"CC Power="<<cc.power.readOp.dynamic<<endl;
-// ccTot.area.set_area(cc.area.get_area()*1e-6);
-// cout<<"DC energy per access" << cc.power.readOp.dynamic/clockRate/8;
-// ccTot.power = cc.power;
-// }
-// }
-//
-// else if (XML->sys.number_of_dir_levels==2)
-// {
-//
-// directory.maxPower = 0.0;
-// directory.maxPower += (directory.caches.l_ip.num_rw_ports*(0.67*directory.caches.local_result.power.readOp.dynamic+0.33*directory.caches.local_result.power.writeOp.dynamic)
-// +directory.caches.l_ip.num_rd_ports*directory.caches.local_result.power.readOp.dynamic+directory.caches.l_ip.num_wr_ports*directory.caches.local_result.power.writeOp.dynamic
-// +directory.caches.l_ip.num_se_rd_ports*directory.caches.local_result.power.readOp.dynamic)*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.missb.l_ip.num_search_ports*directory.missb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.ifb.l_ip.num_search_ports*directory.ifb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.prefetchb.l_ip.num_search_ports*directory.prefetchb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory.maxPower=" <<directory.maxPower<<endl;
-//
-// directory.maxPower += directory.wbb.l_ip.num_search_ports*directory.wbb.local_result.power.searchOp.dynamic*clockRate;
-//
-// cc.power.readOp.dynamic = directory.maxPower*scktRatio*8;//8 is the memory controller counts
-// cc.power.readOp.leakage = directory.caches.local_result.power.readOp.leakage +
-// directory.missb.local_result.power.readOp.leakage +
-// directory.ifb.local_result.power.readOp.leakage +
-// directory.prefetchb.local_result.power.readOp.leakage +
-// directory.wbb.local_result.power.readOp.leakage;
-// cc.power.readOp.leakage *=8;
-// cc.area.set_area(directory.area*8);
-//
-// if (XML->sys.first_level_dir==0)
-// {
-// directory1.maxPower = 0.0;
-// directory1.maxPower += (directory1.caches.l_ip.num_rw_ports*(0.67*directory1.caches.local_result.power.readOp.dynamic+0.33*directory1.caches.local_result.power.writeOp.dynamic)
-// +directory1.caches.l_ip.num_rd_ports*directory1.caches.local_result.power.readOp.dynamic+directory1.caches.l_ip.num_wr_ports*directory1.caches.local_result.power.writeOp.dynamic
-// +directory1.caches.l_ip.num_se_rd_ports*directory1.caches.local_result.power.readOp.dynamic)*clockRate;
-// ///cout<<"directory1.maxPower=" <<directory1.maxPower<<endl;
-//
-// directory1.maxPower += directory1.missb.l_ip.num_search_ports*directory1.missb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory1.maxPower=" <<directory1.maxPower<<endl;
-//
-// directory1.maxPower += directory1.ifb.l_ip.num_search_ports*directory1.ifb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory1.maxPower=" <<directory1.maxPower<<endl;
-//
-// directory1.maxPower += directory1.prefetchb.l_ip.num_search_ports*directory1.prefetchb.local_result.power.searchOp.dynamic*clockRate;
-// ///cout<<"directory1.maxPower=" <<directory1.maxPower<<endl;
-//
-// directory1.maxPower += directory1.wbb.l_ip.num_search_ports*directory1.wbb.local_result.power.searchOp.dynamic*clockRate;
-//
-// cc1.power.readOp.dynamic = directory1.maxPower*scktRatio*64/XML->sys.domain_size;
-// cc1.power.readOp.leakage = directory1.caches.local_result.power.readOp.leakage +
-// directory1.missb.local_result.power.readOp.leakage +
-// directory1.ifb.local_result.power.readOp.leakage +
-// directory1.prefetchb.local_result.power.readOp.leakage +
-// directory1.wbb.local_result.power.readOp.leakage;
-// cc1.power.readOp.leakage *= 64/XML->sys.domain_size;
-// cc1.area.set_area(directory1.area*64/XML->sys.domain_size);
-//
-// cout<<"CC area="<<(cc.area.get_area()+cc1.area.get_area())*1e-6<<endl;
-// cout<<"CC Power="<<cc.power.readOp.dynamic + cc1.power.readOp.dynamic <<endl;
-// ccTot.area.set_area((cc.area.get_area()+cc1.area.get_area())*1e-6);
-// ccTot.power = cc.power + cc1.power;
-// }
-// else if (XML->sys.first_level_dir==1)
-// {
-// inv_dir.maxPower = inv_dir.caches.local_result.power.searchOp.dynamic*clockRate*XML->sys.domain_size;
-// cc1.power.readOp.dynamic = inv_dir.maxPower*scktRatio*(64/XML->sys.domain_size);
-// cc1.power.readOp.leakage = inv_dir.caches.local_result.power.readOp.leakage*inv_dir.caches.l_ip.nbanks*XML->sys.domain_size;
-//
-// cc1.area.set_area(inv_dir.area*64/XML->sys.domain_size);
-// cout<<"CC area="<<(cc.area.get_area()+cc1.area.get_area())*1e-6<<endl;
-// cout<<"CC Power="<<cc.power.readOp.dynamic + cc1.power.readOp.dynamic <<endl;
-// ccTot.area.set_area((cc.area.get_area()+cc1.area.get_area())*1e-6);
-// ccTot.power = cc.power + cc1.power;
-//
-// }
-// else if (XML->sys.first_level_dir==2)
-// {
-// cout<<"CC area="<<cc.area.get_area()*1e-6<<endl;
-// cout<<"CC Power="<<cc.power.readOp.dynamic<<endl;
-// ccTot.area.set_area(cc.area.get_area()*1e-6);
-// ccTot.power = cc.power;
-// }
-// }
-//
-//cout<<"L2cache size="<<L2Tot.area.get_area()*1e-6<<endl;
-//cout<<"L2cache dynamic power="<<L2Tot.power.readOp.dynamic<<endl;
-//cout<<"L2cache laeakge power="<<L2Tot.power.readOp.leakage<<endl;
-//
-// ///cout<<"llCache.maxPower=" <<llCache.maxPower<<endl;
-//
-//
-// maxPower += llCache.maxPower;
-// ///cout<<"maxpower=" <<maxPower<<endl;
-//
-//// maxPower += pipeLogicCache.power.readOp.dynamic*clockRate;
-//// ///cout<<"pipeLogic.power="<<pipeLogicCache.power.readOp.dynamic*clockRate<<endl;
-//// ///cout<<"maxpower=" <<maxPower<<endl;
-////
-//// maxPower += pipeLogicDirectory.power.readOp.dynamic*clockRate;
-//// ///cout<<"pipeLogic.power="<<pipeLogicDirectory.power.readOp.dynamic*clockRate<<endl;
-//// ///cout<<"maxpower=" <<maxPower<<endl;
-////
-//// //clock power
-//// maxPower += clockNetwork.total_power.readOp.dynamic*clockRate;
-//// ///cout<<"clockNetwork.total_power="<<clockNetwork.total_power.readOp.dynamic*clockRate<<endl;
-//// ///cout<<"maxpower=" <<maxPower<<endl;
-//
-//}
-
-void SharedCache::set_cache_param()
-{
- if (cacheL==L2)
- {
- cachep.name = "L2";
- cachep.clockRate = XML->sys.L2[ithCache].clockrate;
- cachep.clockRate *= 1e6;
- cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
- interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type;//long channel device LSTP
- interface_ip.data_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type;
- interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type;
- interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type;
- cachep.capacity = XML->sys.L2[ithCache].L2_config[0];
- cachep.blockW = XML->sys.L2[ithCache].L2_config[1];
- cachep.assoc = XML->sys.L2[ithCache].L2_config[2];
- cachep.nbanks = XML->sys.L2[ithCache].L2_config[3];
- cachep.throughput = XML->sys.L2[ithCache].L2_config[4]/cachep.clockRate;
- cachep.latency = XML->sys.L2[ithCache].L2_config[5]/cachep.clockRate;
- cachep.missb_size = XML->sys.L2[ithCache].buffer_sizes[0];
- cachep.fu_size = XML->sys.L2[ithCache].buffer_sizes[1];
- cachep.prefetchb_size= XML->sys.L2[ithCache].buffer_sizes[2];
- cachep.wbb_size = XML->sys.L2[ithCache].buffer_sizes[3];
- cachep.duty_cycle = XML->sys.L2[ithCache].duty_cycle;
- if (!XML->sys.L2[ithCache].merged_dir)
- {
- cachep.dir_ty = NonDir;
- }
- else
- {
- cachep.dir_ty = SBT;
- cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle;
- }
- }
- else if (cacheL==L3)
- {
- cachep.name = "L3";
- cachep.clockRate = XML->sys.L3[ithCache].clockrate;
- cachep.clockRate *= 1e6;
- cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
- interface_ip.data_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type;//long channel device LSTP
- interface_ip.data_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type;
- interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type;
- interface_ip.tag_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type;
- cachep.capacity = XML->sys.L3[ithCache].L3_config[0];
- cachep.blockW = XML->sys.L3[ithCache].L3_config[1];
- cachep.assoc = XML->sys.L3[ithCache].L3_config[2];
- cachep.nbanks = XML->sys.L3[ithCache].L3_config[3];
- cachep.throughput = XML->sys.L3[ithCache].L3_config[4]/cachep.clockRate;
- cachep.latency = XML->sys.L3[ithCache].L3_config[5]/cachep.clockRate;
- cachep.missb_size = XML->sys.L3[ithCache].buffer_sizes[0];
- cachep.fu_size = XML->sys.L3[ithCache].buffer_sizes[1];
- cachep.prefetchb_size= XML->sys.L3[ithCache].buffer_sizes[2];
- cachep.wbb_size = XML->sys.L3[ithCache].buffer_sizes[3];
- cachep.duty_cycle = XML->sys.L3[ithCache].duty_cycle;
- if (!XML->sys.L2[ithCache].merged_dir)
- {
- cachep.dir_ty = NonDir;
- }
- else
- {
- cachep.dir_ty = SBT;
- cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle;
- }
- }
- else if (cacheL==L1Directory)
- {
- cachep.name = "First Level Directory";
- cachep.dir_ty = (enum Dir_type) XML->sys.L1Directory[ithCache].Directory_type;
- cachep.clockRate = XML->sys.L1Directory[ithCache].clockrate;
- cachep.clockRate *= 1e6;
- cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
- interface_ip.data_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type;//long channel device LSTP
- interface_ip.data_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type;
- interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type;
- interface_ip.tag_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type;
- cachep.capacity = XML->sys.L1Directory[ithCache].Dir_config[0];
- cachep.blockW = XML->sys.L1Directory[ithCache].Dir_config[1];
- cachep.assoc = XML->sys.L1Directory[ithCache].Dir_config[2];
- cachep.nbanks = XML->sys.L1Directory[ithCache].Dir_config[3];
- cachep.throughput = XML->sys.L1Directory[ithCache].Dir_config[4]/cachep.clockRate;
- cachep.latency = XML->sys.L1Directory[ithCache].Dir_config[5]/cachep.clockRate;
- cachep.missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0];
- cachep.fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1];
- cachep.prefetchb_size= XML->sys.L1Directory[ithCache].buffer_sizes[2];
- cachep.wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3];
- cachep.duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle;
- }
- else if (cacheL==L2Directory)
- {
- cachep.name = "Second Level Directory";
- cachep.dir_ty = (enum Dir_type) XML->sys.L2Directory[ithCache].Directory_type;
- cachep.clockRate = XML->sys.L2Directory[ithCache].clockrate;
- cachep.clockRate *= 1e6;
- cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
- interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type;//long channel device LSTP
- interface_ip.data_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type;
- interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type;
- interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type;
- cachep.capacity = XML->sys.L2Directory[ithCache].Dir_config[0];
- cachep.blockW = XML->sys.L2Directory[ithCache].Dir_config[1];
- cachep.assoc = XML->sys.L2Directory[ithCache].Dir_config[2];
- cachep.nbanks = XML->sys.L2Directory[ithCache].Dir_config[3];
- cachep.throughput = XML->sys.L2Directory[ithCache].Dir_config[4]/cachep.clockRate;
- cachep.latency = XML->sys.L2Directory[ithCache].Dir_config[5]/cachep.clockRate;
- cachep.missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0];
- cachep.fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1];
- cachep.prefetchb_size= XML->sys.L2Directory[ithCache].buffer_sizes[2];
- cachep.wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3];
- cachep.duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle;
- }
- //cachep.cache_duty_cycle=cachep.dir_duty_cycle = 0.35;
-}
-
diff --git a/ext/mcpat/system.cc b/ext/mcpat/system.cc
new file mode 100644
index 000000000..657f7f38d
--- /dev/null
+++ b/ext/mcpat/system.cc
@@ -0,0 +1,350 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "array.h"
+#include "basic_circuit.h"
+#include "common.h"
+#include "const.h"
+#include "parameter.h"
+#include "system.h"
+#include "version.h"
+
+// TODO: Fix this constructor to default initialize all pointers to NULL
+System::System(XMLNode* _xml_data)
+ : McPATComponent(_xml_data) {
+ int i;
+ int currCore = 0;
+ int currNOC = 0;
+ name = "System";
+ set_proc_param();
+
+ // TODO: This loop can (and should) be called by every component in
+ // the hierarchy. Consider moving it to McPATComponent
+ int numChildren = xml_data->nChildNode("component");
+ for (i = 0; i < numChildren; i++ ) {
+ // For each child node of the system,
+ XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
+ XMLCSTR type = childXML->getAttribute("type");
+
+ if (!type) {
+ warnMissingComponentType(childXML->getAttribute("id"));
+
+ } STRCMP(type, "Core") {
+ // TODO: If homogeneous cores, and currCore > 0, just copy core 0
+ children.push_back(new Core(childXML, currCore, &interface_ip));
+ currCore++;
+ } STRCMP(type, "CacheUnit") {
+ children.push_back(new CacheUnit(childXML, &interface_ip));
+ } STRCMP(type, "CacheController") {
+ // TODO: Remove reliance on interface_ip - there should be a better
+ // way to share global variables than passing, copying
+ children.push_back(new CacheController(childXML, &interface_ip));
+ } STRCMP(type, "MemoryController") {
+ children.push_back(new MemoryController(childXML, &interface_ip));
+ } STRCMP(type, "FlashController") {
+ children.push_back(new FlashController(childXML, &interface_ip));
+ } STRCMP(type, "NIUController") {
+ children.push_back(new NIUController(childXML, &interface_ip));
+ } STRCMP(type, "PCIeController") {
+ children.push_back(new PCIeController(childXML, &interface_ip));
+ } STRCMP(type, "Memory") {
+ // TODO:
+ warnIncompleteComponentType(type);
+ } STRCMP(type, "OnChipNetwork") {
+ // TODO: Many of the parameters to this constructor should be
+ // handled in another way
+ children.push_back(new OnChipNetwork(childXML, currNOC,
+ &interface_ip));
+ currNOC++;
+ warnIncompleteComponentType(type);
+ } STRCMP(type, "BusInterconnect") {
+ // TODO: Many of the parameters to this constructor should be
+ // handled in another way
+ children.push_back(new BusInterconnect(childXML, &interface_ip));
+ warnIncompleteComponentType(type);
+
+ // TODO: Add a directory data type that can handle the directories
+ // as defined by certain McScript output
+ } else {
+ warnUnrecognizedComponent(type);
+ }
+ }
+}
+
+void System::displayDeviceType(int device_type_, uint32_t indent) {
+ string indent_str(indent, ' ');
+ cout << indent_str << "Device Type = ";
+
+ switch ( device_type_ ) {
+ case 0:
+ cout << "ITRS high performance device type" << endl;
+ break;
+ case 1:
+ cout << "ITRS low standby power device type" << endl;
+ break;
+ case 2:
+ cout << "ITRS low operating power device type" << endl;
+ break;
+ case 3:
+ cout << "LP-DRAM device type" << endl;
+ break;
+ case 4:
+ cout << "COMM-DRAM device type" << endl;
+ break;
+ default:
+ cout << indent_str << "Unknown!" << endl;
+ exit(0);
+ }
+}
+
+void System::displayInterconnectType(int interconnect_type_, uint32_t indent) {
+ string indent_str(indent, ' ');
+ cout << indent_str << "Interconnect metal projection = ";
+
+ switch ( interconnect_type_ ) {
+ case 0:
+ cout << "aggressive interconnect technology projection" << endl;
+ break;
+ case 1:
+ cout << "conservative interconnect technology projection" << endl;
+ break;
+ default:
+ cout << indent_str << "Unknown!" << endl;
+ exit(0);
+ }
+}
+
+// TODO: Migrate this down to the McPATComponent::displayData function
+void System::displayData(uint32_t indent, int plevel) {
+ string indent_str(indent, ' ');
+ string indent_str_next(indent + 2, ' ');
+ if (plevel < 5) {
+ cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
+ << " of " << VER_UPDATE << ") results (current print level is "
+ << plevel
+ << ", please increase print level to see the details in "
+ << "components) " << endl;
+ } else {
+ cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
+ << " of " << VER_UPDATE << ") results (current print level is 5)"
+ << endl;
+ }
+
+ cout << "*****************************************************************"
+ << "************************" << endl;
+ cout << indent_str << "Technology " << core_tech_node << " nm" << endl;
+ if (longer_channel_device)
+ cout << indent_str << "Using Long Channel Devices When Appropriate" << endl;
+ displayInterconnectType(interconnect_projection_type, indent);
+ cout << indent_str << "Target Clock Rate (MHz) " << target_core_clockrate / 1e6 << endl;
+ cout << endl;
+
+ cout << "*****************************************************************"
+ << "************************" << endl;
+
+ McPATComponent::displayData(indent, plevel);
+}
+
+void System::set_proc_param() {
+ // TODO: Consider creating a SystemParams class that tracks system-wide
+ // parameters like these
+ longer_channel_device = false;
+ core_tech_node = -1;
+ temperature = -1;
+ interconnect_projection_type = -1;
+ device_type = -1;
+ physical_address_width = -1;
+
+ int num_children = xml_data->nChildNode("param");
+ int i;
+ for (i = 0; i < num_children; i++) {
+ XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
+ XMLCSTR node_name = paramNode->getAttribute("name");
+ XMLCSTR value = paramNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingParamName(paramNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("core_tech_node", core_tech_node);
+ ASSIGN_INT_IF("target_core_clockrate", target_core_clockrate);
+ ASSIGN_INT_IF("temperature", temperature);
+ ASSIGN_INT_IF("device_type", device_type);
+ ASSIGN_INT_IF("longer_channel_device", longer_channel_device);
+ ASSIGN_INT_IF("interconnect_projection_type",
+ interconnect_projection_type);
+ ASSIGN_INT_IF("machine_bits", data_path_width);
+ ASSIGN_INT_IF("virtual_address_width", virtual_address_width);
+ ASSIGN_INT_IF("physical_address_width", physical_address_width);
+ ASSIGN_INT_IF("virtual_memory_page_size", virtual_memory_page_size);
+ ASSIGN_INT_IF("wire_is_mat_type", interface_ip.wire_is_mat_type);
+ ASSIGN_INT_IF("wire_os_mat_type", interface_ip.wire_os_mat_type);
+ ASSIGN_INT_IF("delay_wt", interface_ip.delay_wt);
+ ASSIGN_INT_IF("area_wt", interface_ip.area_wt);
+ ASSIGN_INT_IF("dynamic_power_wt", interface_ip.dynamic_power_wt);
+ ASSIGN_INT_IF("leakage_power_wt", interface_ip.leakage_power_wt);
+ ASSIGN_INT_IF("cycle_time_wt", interface_ip.cycle_time_wt);
+ ASSIGN_INT_IF("delay_dev", interface_ip.delay_dev);
+ ASSIGN_INT_IF("area_dev", interface_ip.area_dev);
+ ASSIGN_INT_IF("dynamic_power_dev", interface_ip.dynamic_power_dev);
+ ASSIGN_INT_IF("leakage_power_dev", interface_ip.leakage_power_dev);
+ ASSIGN_INT_IF("cycle_time_dev", interface_ip.cycle_time_dev);
+ ASSIGN_INT_IF("ed", interface_ip.ed);
+ ASSIGN_INT_IF("burst_len", interface_ip.burst_len);
+ ASSIGN_INT_IF("int_prefetch_w", interface_ip.int_prefetch_w);
+ ASSIGN_INT_IF("page_sz_bits", interface_ip.page_sz_bits);
+ ASSIGN_ENUM_IF("rpters_in_htree", interface_ip.rpters_in_htree, bool);
+ ASSIGN_INT_IF("ver_htree_wires_over_array",
+ interface_ip.ver_htree_wires_over_array);
+ ASSIGN_INT_IF("broadcast_addr_din_over_ver_htrees",
+ interface_ip.broadcast_addr_din_over_ver_htrees);
+ ASSIGN_INT_IF("nuca", interface_ip.nuca);
+ ASSIGN_INT_IF("nuca_bank_count", interface_ip.nuca_bank_count);
+ ASSIGN_ENUM_IF("force_cache_config",
+ interface_ip.force_cache_config, bool);
+ ASSIGN_ENUM_IF("wt", interface_ip.wt, Wire_type);
+ ASSIGN_INT_IF("force_wiretype", interface_ip.force_wiretype);
+ ASSIGN_INT_IF("print_detail", interface_ip.print_detail);
+ ASSIGN_ENUM_IF("add_ecc_b_", interface_ip.add_ecc_b_, bool);
+
+ else {
+ warnUnrecognizedParam(node_name);
+ }
+ }
+
+ // Change from MHz to Hz
+ target_core_clockrate *= 1e6;
+ interconnect_projection_type =
+ (interconnect_projection_type == 0) ? 0 : 1;
+
+ num_children = xml_data->nChildNode("stat");
+ for (i = 0; i < num_children; i++) {
+ XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
+ XMLCSTR node_name = statNode->getAttribute("name");
+ XMLCSTR value = statNode->getAttribute("value");
+
+ if (!node_name)
+ warnMissingStatName(statNode->getAttribute("id"));
+
+ ASSIGN_FP_IF("total_cycles", total_cycles);
+
+ else {
+ warnUnrecognizedStat(node_name);
+ }
+ }
+
+ if (temperature < 0) {
+ errorUnspecifiedParam("temperature");
+ }
+
+ if (core_tech_node < 0) {
+ errorUnspecifiedParam("core_tech_node");
+ }
+
+ if (interconnect_projection_type < 0) {
+ errorUnspecifiedParam("interconnect_projection_type");
+ }
+
+ if (device_type < 0) {
+ errorUnspecifiedParam("device_type");
+ }
+
+ if (physical_address_width <= 0) {
+ errorNonPositiveParam("physical_address_width");
+ }
+
+ if (data_path_width <= 0) {
+ errorNonPositiveParam("machine_bits");
+ }
+
+ if (total_cycles <= 0) {
+ fprintf(stderr, "WARNING: total_cycles <= 0 in system component, ",
+ "power numbers will be funky...\n");
+ }
+
+ clockRate = target_core_clockrate;
+ execution_time = total_cycles / (target_core_clockrate);
+
+ /* Basic parameters*/
+ interface_ip.data_arr_ram_cell_tech_type = device_type;
+ interface_ip.data_arr_peri_global_tech_type = device_type;
+ interface_ip.tag_arr_ram_cell_tech_type = device_type;
+ interface_ip.tag_arr_peri_global_tech_type = device_type;
+
+ interface_ip.ic_proj_type = interconnect_projection_type;
+ interface_ip.temp = temperature;
+ interface_ip.F_sz_nm = core_tech_node;
+ interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
+ interface_ip.is_main_mem = false;
+
+ // These are there just to make CACTI's error_checking() happy.
+ // They are either not actually used or overwritten by each component.
+ interface_ip.cache_sz = MIN_BUFFER_SIZE;
+ interface_ip.nbanks = 1;
+ interface_ip.out_w = 0;
+ interface_ip.line_sz = 1;
+ interface_ip.assoc = 1;
+ interface_ip.num_rw_ports = 1;
+ interface_ip.num_search_ports = 1;
+ interface_ip.is_cache = true;
+ interface_ip.pure_ram = false;
+ interface_ip.pure_cam = false;
+
+
+ //This section of code does not have real meaning; it is just to ensure
+ //all data will have initial value to prevent errors.
+ //They will be overridden during each components initialization
+ interface_ip.specific_tag = 1;
+ interface_ip.tag_w = 64;
+ interface_ip.access_mode = 2;
+
+ interface_ip.obj_func_dyn_energy = 0;
+ interface_ip.obj_func_dyn_power = 0;
+ interface_ip.obj_func_leak_power = 0;
+ interface_ip.obj_func_cycle_t = 1;
+ interface_ip.num_rw_ports = 1;
+ interface_ip.num_rd_ports = 0;
+ interface_ip.num_wr_ports = 0;
+ interface_ip.num_se_rd_ports = 0;
+}
+
+System::~System() {
+ // TODO: Delete children... do this in McPATComponent
+};
diff --git a/ext/mcpat/processor.h b/ext/mcpat/system.h
index 5a7a2f7f5..d2e263720 100644
--- a/ext/mcpat/processor.h
+++ b/ext/mcpat/system.h
@@ -1,7 +1,7 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,19 +25,23 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
*
***************************************************************************/
-#ifndef PROCESSOR_H_
-#define PROCESSOR_H_
-#include <vector>
+#ifndef SYSTEM_H_
+#define SYSTEM_H_
-#include "XML_Parse.h"
#include "arbiter.h"
#include "area.h"
#include "array.h"
#include "basic_components.h"
+#include "bus_interconnect.h"
+#include "cachecontroller.h"
+#include "cacheunit.h"
#include "core.h"
#include "decoder.h"
#include "iocontrollers.h"
@@ -45,35 +49,23 @@
#include "noc.h"
#include "parameter.h"
#include "router.h"
-#include "sharedcache.h"
-class Processor : public Component
-{
- public:
- ParseXML *XML;
- vector<Core *> cores;
- vector<SharedCache *> l2array;
- vector<SharedCache *> l3array;
- vector<SharedCache *> l1dirarray;
- vector<SharedCache *> l2dirarray;
- vector<NoC *> nocs;
- MemoryController * mc;
- NIUController * niu;
- PCIeController * pcie;
- FlashController * flashcontroller;
+class System : public McPATComponent {
+public:
InputParameter interface_ip;
- ProcParam procdynp;
- //wire globalInterconnect;
- //clock_network globalClock;
- Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers;
- int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir;
- Processor(ParseXML *XML_interface);
- void compute();
+
+ int device_type;
+ double core_tech_node;
+ int interconnect_projection_type;
+ int temperature;
+
+ System(XMLNode* _xml_data);
void set_proc_param();
- void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
+ // TODO: make this recursively compute energy on subcomponents
+ void displayData(uint32_t indent = 0, int plevel = 100);
void displayDeviceType(int device_type_, uint32_t indent = 0);
void displayInterconnectType(int interconnect_type_, uint32_t indent = 0);
- ~Processor();
+ ~System();
};
-#endif /* PROCESSOR_H_ */
+#endif /* SYSTEM_H_ */
diff --git a/ext/mcpat/technology_xeon_core.cc b/ext/mcpat/technology_xeon_core.cc
deleted file mode 100644
index 4e60edc1b..000000000
--- a/ext/mcpat/technology_xeon_core.cc
+++ /dev/null
@@ -1,2772 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-#include "basic_circuit.h"
-
-#include "parameter.h"
-
-double wire_resistance(double resistivity, double wire_width, double wire_thickness,
- double barrier_thickness, double dishing_thickness, double alpha_scatter)
-{
- double resistance;
- resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
- return(resistance);
-}
-
-double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
- double ild_thickness, double miller_value, double horiz_dielectric_constant,
- double vert_dielectric_constant, double fringe_cap)
-{
- double vertical_cap, sidewall_cap, total_cap;
- vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
- sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
- total_cap = vertical_cap + sidewall_cap + fringe_cap;
- return(total_cap);
-}
-
-
-void init_tech_params(double technology, bool is_tag)
-{
- int iter, tech, tech_lo, tech_hi;
- double curr_alpha, curr_vpp;
- double wire_width, wire_thickness, wire_spacing,
- fringe_cap, pmos_to_nmos_sizing_r;
-// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant;
- double barrier_thickness, dishing_thickness, alpha_scatter;
- double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
-
- uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
-
- technology = technology * 1000.0; // in the unit of nm
-
- // initialize parameters
- g_tp.reset();
- double gmp_to_gmn_multiplier_periph_global = 0;
-
- double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
- curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
- curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
- curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
- double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
- curr_asp_ratio_cell_cam;
- double SENSE_AMP_D, SENSE_AMP_P; // J
- double area_cell_dram = 0;
- double asp_ratio_cell_dram = 0;
- double area_cell_sram = 0;
- double asp_ratio_cell_sram = 0;
- double area_cell_cam = 0;
- double asp_ratio_cell_cam = 0;
- double mobility_eff_periph_global = 0;
- double Vdsat_periph_global = 0;
- double nmos_effective_resistance_multiplier;
- double width_dram_access_transistor;
-
- double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
- double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
- double curr_chip_layout_overhead = 0;
- double curr_macro_layout_overhead = 0;
- double curr_sckt_co_eff = 0;
-
- if (technology < 91 && technology > 89)
- {
- tech_lo = 90;
- tech_hi = 90;
- }
- else if (technology < 66 && technology > 64)
- {
- tech_lo = 65;
- tech_hi = 65;
- }
- else if (technology < 46 && technology > 44)
- {
- tech_lo = 45;
- tech_hi = 45;
- }
- else if (technology < 33 && technology > 31)
- {
- tech_lo = 32;
- tech_hi = 32;
- }
- else if (technology < 23 && technology > 21)
- {
- tech_lo = 22;
- tech_hi = 22;
- if (ram_cell_tech_type == 3)
- {
- cout<<"current version does not support eDRAM technologies at 22nm"<<endl;
- exit(0);
- }
- }
-// else if (technology < 17 && technology > 15)
-// {
-// tech_lo = 16;
-// tech_hi = 16;
-// }
- else if (technology < 90 && technology > 65)
- {
- tech_lo = 90;
- tech_hi = 65;
- }
- else if (technology < 65 && technology > 45)
- {
- tech_lo = 65;
- tech_hi = 45;
- }
- else if (technology < 45 && technology > 32)
- {
- tech_lo = 45;
- tech_hi = 32;
- }
- else if (technology < 32 && technology > 22)
- {
- tech_lo = 32;
- tech_hi = 22;
- }
-// else if (technology < 22 && technology > 16)
-// {
-// tech_lo = 22;
-// tech_hi = 16;
-// }
- else
- {
- cout<<"Invalid technology nodes"<<endl;
- exit(0);
- }
-
- double vdd[NUMBER_TECH_FLAVORS];
- double Lphy[NUMBER_TECH_FLAVORS];
- double Lelec[NUMBER_TECH_FLAVORS];
- double t_ox[NUMBER_TECH_FLAVORS];
- double v_th[NUMBER_TECH_FLAVORS];
- double c_ox[NUMBER_TECH_FLAVORS];
- double mobility_eff[NUMBER_TECH_FLAVORS];
- double Vdsat[NUMBER_TECH_FLAVORS];
- double c_g_ideal[NUMBER_TECH_FLAVORS];
- double c_fringe[NUMBER_TECH_FLAVORS];
- double c_junc[NUMBER_TECH_FLAVORS];
- double I_on_n[NUMBER_TECH_FLAVORS];
- double I_on_p[NUMBER_TECH_FLAVORS];
- double Rnchannelon[NUMBER_TECH_FLAVORS];
- double Rpchannelon[NUMBER_TECH_FLAVORS];
- double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
- double I_off_n[NUMBER_TECH_FLAVORS][101];
- double I_g_on_n[NUMBER_TECH_FLAVORS][101];
- //double I_off_p[NUMBER_TECH_FLAVORS][101];
- double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
- //double curr_sckt_co_eff[NUMBER_TECH_FLAVORS];
- double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
-
- for (iter = 0; iter <= 1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
- }
-
- if (tech == 90)
- {
- SENSE_AMP_D = .28e-9; // s
- SENSE_AMP_P = 14.7e-15; // J
- //90nm technology-node. Corresponds to year 2004 in ITRS
- //ITRS HP device type
- vdd[0] = 1.2;
- Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3;//micron
- v_th[0] = 0.23707;//V
- c_ox[0] = 1.79e-14;//F/micron2
- mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128; //V
- c_g_ideal[0] = 6.64e-16;//F/micron
- c_fringe[0] = 0.08e-15;//F/micron
- c_junc[0] = 1e-15;//F/micron2
- I_on_n[0] = 1076.9e-6;//A/micron
- I_on_p[0] = 712.6e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 3.24e-8;//A/micron
- I_off_n[0][10] = 4.01e-8;
- I_off_n[0][20] = 4.90e-8;
- I_off_n[0][30] = 5.92e-8;
- I_off_n[0][40] = 7.08e-8;
- I_off_n[0][50] = 8.38e-8;
- I_off_n[0][60] = 9.82e-8;
- I_off_n[0][70] = 1.14e-7;
- I_off_n[0][80] = 1.29e-7;
- I_off_n[0][90] = 1.43e-7;
- I_off_n[0][100] = 1.54e-7;
-
- I_g_on_n[0][0] = 1.65e-8;//A/micron
- I_g_on_n[0][10] = 1.65e-8;
- I_g_on_n[0][20] = 1.65e-8;
- I_g_on_n[0][30] = 1.65e-8;
- I_g_on_n[0][40] = 1.65e-8;
- I_g_on_n[0][50] = 1.65e-8;
- I_g_on_n[0][60] = 1.65e-8;
- I_g_on_n[0][70] = 1.65e-8;
- I_g_on_n[0][80] = 1.65e-8;
- I_g_on_n[0][90] = 1.65e-8;
- I_g_on_n[0][100] = 1.65e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.3;
- Lphy[1] = 0.075;
- Lelec[1] = 0.0486;
- t_ox[1] = 2.2e-3;
- v_th[1] = 0.48203;
- c_ox[1] = 1.22e-14;
- mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.373;
- c_g_ideal[1] = 9.15e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 503.6e-6;
- I_on_p[1] = 235.1e-6;
- nmos_effective_resistance_multiplier = 1.92;
- n_to_p_eff_curr_drv_ratio[1] = 2.44;
- gmp_to_gmn_multiplier[1] =0.88;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1;
- I_off_n[1][0] = 2.81e-12;
- I_off_n[1][10] = 4.76e-12;
- I_off_n[1][20] = 7.82e-12;
- I_off_n[1][30] = 1.25e-11;
- I_off_n[1][40] = 1.94e-11;
- I_off_n[1][50] = 2.94e-11;
- I_off_n[1][60] = 4.36e-11;
- I_off_n[1][70] = 6.32e-11;
- I_off_n[1][80] = 8.95e-11;
- I_off_n[1][90] = 1.25e-10;
- I_off_n[1][100] = 1.7e-10;
-
- I_g_on_n[1][0] = 3.87e-11;//A/micron
- I_g_on_n[1][10] = 3.87e-11;
- I_g_on_n[1][20] = 3.87e-11;
- I_g_on_n[1][30] = 3.87e-11;
- I_g_on_n[1][40] = 3.87e-11;
- I_g_on_n[1][50] = 3.87e-11;
- I_g_on_n[1][60] = 3.87e-11;
- I_g_on_n[1][70] = 3.87e-11;
- I_g_on_n[1][80] = 3.87e-11;
- I_g_on_n[1][90] = 3.87e-11;
- I_g_on_n[1][100] = 3.87e-11;
-
- //ITRS LOP device type
- vdd[2] = 0.9;
- Lphy[2] = 0.053;
- Lelec[2] = 0.0354;
- t_ox[2] = 1.5e-3;
- v_th[2] = 0.30764;
- c_ox[2] = 1.59e-14;
- mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.113;
- c_g_ideal[2] = 8.45e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 386.6e-6;
- I_on_p[2] = 209.7e-6;
- nmos_effective_resistance_multiplier = 1.77;
- n_to_p_eff_curr_drv_ratio[2] = 2.54;
- gmp_to_gmn_multiplier[2] = 0.98;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1;
- I_off_n[2][0] = 2.14e-9;
- I_off_n[2][10] = 2.9e-9;
- I_off_n[2][20] = 3.87e-9;
- I_off_n[2][30] = 5.07e-9;
- I_off_n[2][40] = 6.54e-9;
- I_off_n[2][50] = 8.27e-8;
- I_off_n[2][60] = 1.02e-7;
- I_off_n[2][70] = 1.20e-7;
- I_off_n[2][80] = 1.36e-8;
- I_off_n[2][90] = 1.52e-8;
- I_off_n[2][100] = 1.73e-8;
-
- I_g_on_n[2][0] = 4.31e-8;//A/micron
- I_g_on_n[2][10] = 4.31e-8;
- I_g_on_n[2][20] = 4.31e-8;
- I_g_on_n[2][30] = 4.31e-8;
- I_g_on_n[2][40] = 4.31e-8;
- I_g_on_n[2][50] = 4.31e-8;
- I_g_on_n[2][60] = 4.31e-8;
- I_g_on_n[2][70] = 4.31e-8;
- I_g_on_n[2][80] = 4.31e-8;
- I_g_on_n[2][90] = 4.31e-8;
- I_g_on_n[2][100] = 4.31e-8;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.4545;
- width_dram_access_transistor = 0.14;
- curr_I_on_dram_cell = 45e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.168;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.4545;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.3;
- c_g_ideal[3] = 1.47e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 321.6e-6;
- I_on_p[3] = 203.3e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.42e-11;
- I_off_n[3][10] = 2.25e-11;
- I_off_n[3][20] = 3.46e-11;
- I_off_n[3][30] = 5.18e-11;
- I_off_n[3][40] = 7.58e-11;
- I_off_n[3][50] = 1.08e-10;
- I_off_n[3][60] = 1.51e-10;
- I_off_n[3][70] = 2.02e-10;
- I_off_n[3][80] = 2.57e-10;
- I_off_n[3][90] = 3.14e-10;
- I_off_n[3][100] = 3.85e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.6;
- Lphy[3] = 0.09;
- Lelec[3] = 0.0576;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.09*0.09;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.7;
- t_ox[3] = 5.5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 5.65e-15;
- mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.32;
- c_g_ideal[3] = 5.08e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1094.3e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.62;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 5.80e-15;
- I_off_n[3][10] = 1.21e-14;
- I_off_n[3][20] = 2.42e-14;
- I_off_n[3][30] = 4.65e-14;
- I_off_n[3][40] = 8.60e-14;
- I_off_n[3][50] = 1.54e-13;
- I_off_n[3][60] = 2.66e-13;
- I_off_n[3][70] = 4.45e-13;
- I_off_n[3][80] = 7.17e-13;
- I_off_n[3][90] = 1.11e-12;
- I_off_n[3][100] = 1.67e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1;
- curr_core_tx_density = 1.25*0.7*0.7;
- curr_sckt_co_eff = 1.1539;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
-
-
- }
-
- if (tech == 65)
- { //65nm technology-node. Corresponds to year 2007 in ITRS
- //ITRS HP device type
-// SENSE_AMP_D = .2e-9; // s
-// SENSE_AMP_P = 5.7e-15; // J
-// vdd[0] = 1.1;
-// Lphy[0] = 0.025;
-// Lelec[0] = 0.019;
-// t_ox[0] = 1.1e-3;
-// v_th[0] = .19491;
-// c_ox[0] = 1.88e-14;
-// mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
-// Vdsat[0] = 7.71e-2;
-// c_g_ideal[0] = 4.69e-16;
-// c_fringe[0] = 0.077e-15;
-// c_junc[0] = 1e-15;
-// I_on_n[0] = 1197.2e-6;
-// I_on_p[0] = 870.8e-6;
-// nmos_effective_resistance_multiplier = 1.50;
-// n_to_p_eff_curr_drv_ratio[0] = 2.41;
-// gmp_to_gmn_multiplier[0] = 1.38;
-// Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
-// Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
-// long_channel_leakage_reduction[0] = 1/3.74;
-// //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
-// //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
-// I_off_n[0][0] = 1.96e-7;
-// I_off_n[0][10] = 2.29e-7;
-// I_off_n[0][20] = 2.66e-7;
-// I_off_n[0][30] = 3.05e-7;
-// I_off_n[0][40] = 3.49e-7;
-// I_off_n[0][50] = 3.95e-7;
-// I_off_n[0][60] = 4.45e-7;
-// I_off_n[0][70] = 4.97e-7;
-// I_off_n[0][80] = 5.48e-7;
-// I_off_n[0][90] = 5.94e-7;
-// I_off_n[0][100] = 6.3e-7;
-// I_g_on_n[0][0] = 4.09e-8;//A/micron
-// I_g_on_n[0][10] = 4.09e-8;
-// I_g_on_n[0][20] = 4.09e-8;
-// I_g_on_n[0][30] = 4.09e-8;
-// I_g_on_n[0][40] = 4.09e-8;
-// I_g_on_n[0][50] = 4.09e-8;
-// I_g_on_n[0][60] = 4.09e-8;
-// I_g_on_n[0][70] = 4.09e-8;
-// I_g_on_n[0][80] = 4.09e-8;
-// I_g_on_n[0][90] = 4.09e-8;
-// I_g_on_n[0][100] = 4.09e-8;
-
- SENSE_AMP_D = .2e-9; // s
- SENSE_AMP_P = 5.7e-15; // J
- vdd[0] = 1.25;
- Lphy[0] = 0.025;
- Lelec[0] = 0.019;
- t_ox[0] = 1.1e-3;
- v_th[0] = .12491;
- c_ox[0] = 1.88e-14;
- mobility_eff[0] = 409.31 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 9.08e-2;
- c_g_ideal[0] = 4.72e-16;
- c_fringe[0] = 0.08e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 1486.4e-6;
- I_on_p[0] = 1131.5e-6;
- nmos_effective_resistance_multiplier = 1.57;
- n_to_p_eff_curr_drv_ratio[0] = 2;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1.0/4.97;
- //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
- //Ioff(Lgate normal)/Ioff(Lgate long)= 4.97@Vdd=1.25; (3.74@Vdd=1.1), however, Intel paper suggest the reduction factor is 3.
- I_off_n[0][0] = 8.62e-7;
- I_off_n[0][10] = 9.08e-7;
- I_off_n[0][20] = 9.55e-7;
- I_off_n[0][30] = 1.00e-6;
- I_off_n[0][40] = 1.05e-6;
- I_off_n[0][50] = 1.09e-6;
- I_off_n[0][60] = 1.14e-6;
- I_off_n[0][70] = 1.18e-6;
- I_off_n[0][80] = 1.23e-6;
- I_off_n[0][90] = 1.27e-6;
- I_off_n[0][100] = 1.31e-6;
-
-
- I_g_on_n[0][0] = 7.02e-8;//A/micron
- I_g_on_n[0][10] = 7.02e-8;
- I_g_on_n[0][20] = 7.02e-8;
- I_g_on_n[0][30] = 7.02e-8;
- I_g_on_n[0][40] = 7.02e-8;
- I_g_on_n[0][50] = 7.02e-8;
- I_g_on_n[0][60] = 7.02e-8;
- I_g_on_n[0][70] = 7.02e-8;
- I_g_on_n[0][80] = 7.02e-8;
- I_g_on_n[0][90] = 7.02e-8;
- I_g_on_n[0][100] = 7.02e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.2;
- Lphy[1] = 0.045;
- Lelec[1] = 0.0298;
- t_ox[1] = 1.9e-3;
- v_th[1] = 0.52354;
- c_ox[1] = 1.36e-14;
- mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.128;
- c_g_ideal[1] = 6.14e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 519.2e-6;
- I_on_p[1] = 266e-6;
- nmos_effective_resistance_multiplier = 1.96;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.82;
- I_off_n[1][0] = 9.12e-12;
- I_off_n[1][10] = 1.49e-11;
- I_off_n[1][20] = 2.36e-11;
- I_off_n[1][30] = 3.64e-11;
- I_off_n[1][40] = 5.48e-11;
- I_off_n[1][50] = 8.05e-11;
- I_off_n[1][60] = 1.15e-10;
- I_off_n[1][70] = 1.59e-10;
- I_off_n[1][80] = 2.1e-10;
- I_off_n[1][90] = 2.62e-10;
- I_off_n[1][100] = 3.21e-10;
-
- I_g_on_n[1][0] = 1.09e-10;//A/micron
- I_g_on_n[1][10] = 1.09e-10;
- I_g_on_n[1][20] = 1.09e-10;
- I_g_on_n[1][30] = 1.09e-10;
- I_g_on_n[1][40] = 1.09e-10;
- I_g_on_n[1][50] = 1.09e-10;
- I_g_on_n[1][60] = 1.09e-10;
- I_g_on_n[1][70] = 1.09e-10;
- I_g_on_n[1][80] = 1.09e-10;
- I_g_on_n[1][90] = 1.09e-10;
- I_g_on_n[1][100] = 1.09e-10;
-
- //ITRS LOP device type
- vdd[2] = 0.8;
- Lphy[2] = 0.032;
- Lelec[2] = 0.0216;
- t_ox[2] = 1.2e-3;
- v_th[2] = 0.28512;
- c_ox[2] = 1.87e-14;
- mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.292;
- c_g_ideal[2] = 6e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 573.1e-6;
- I_on_p[2] = 340.6e-6;
- nmos_effective_resistance_multiplier = 1.82;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/2.05;
- I_off_n[2][0] = 4.9e-9;
- I_off_n[2][10] = 6.49e-9;
- I_off_n[2][20] = 8.45e-9;
- I_off_n[2][30] = 1.08e-8;
- I_off_n[2][40] = 1.37e-8;
- I_off_n[2][50] = 1.71e-8;
- I_off_n[2][60] = 2.09e-8;
- I_off_n[2][70] = 2.48e-8;
- I_off_n[2][80] = 2.84e-8;
- I_off_n[2][90] = 3.13e-8;
- I_off_n[2][100] = 3.42e-8;
-
- I_g_on_n[2][0] = 9.61e-9;//A/micron
- I_g_on_n[2][10] = 9.61e-9;
- I_g_on_n[2][20] = 9.61e-9;
- I_g_on_n[2][30] = 9.61e-9;
- I_g_on_n[2][40] = 9.61e-9;
- I_g_on_n[2][50] = 9.61e-9;
- I_g_on_n[2][60] = 9.61e-9;
- I_g_on_n[2][70] = 9.61e-9;
- I_g_on_n[2][80] = 9.61e-9;
- I_g_on_n[2][90] = 9.61e-9;
- I_g_on_n[2][100] = 9.61e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.43806;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.11;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.43806;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.43806;
- c_g_ideal[3] = 1.46e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 399.8e-6;
- I_on_p[3] = 243.4e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.23e-11;
- I_off_n[3][10] = 3.46e-11;
- I_off_n[3][20] = 5.24e-11;
- I_off_n[3][30] = 7.75e-11;
- I_off_n[3][40] = 1.12e-10;
- I_off_n[3][50] = 1.58e-10;
- I_off_n[3][60] = 2.18e-10;
- I_off_n[3][70] = 2.88e-10;
- I_off_n[3][80] = 3.63e-10;
- I_off_n[3][90] = 4.41e-10;
- I_off_n[3][100] = 5.36e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.3;
- Lphy[3] = 0.065;
- Lelec[3] = 0.0426;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.065;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.065*0.065;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.3;
- t_ox[3] = 5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 6.16e-15;
- mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.385;
- c_g_ideal[3] = 4e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 1031e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 2.39;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.80e-14;
- I_off_n[3][10] = 3.64e-14;
- I_off_n[3][20] = 7.03e-14;
- I_off_n[3][30] = 1.31e-13;
- I_off_n[3][40] = 2.35e-13;
- I_off_n[3][50] = 4.09e-13;
- I_off_n[3][60] = 6.89e-13;
- I_off_n[3][70] = 1.13e-12;
- I_off_n[3][80] = 1.78e-12;
- I_off_n[3][90] = 2.71e-12;
- I_off_n[3][100] = 3.99e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7;
- curr_core_tx_density = 1.25*0.7;
- curr_sckt_co_eff = 1.1359;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if (tech == 45)
- { //45nm technology-node. Corresponds to year 2010 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .04e-9; // s
- SENSE_AMP_P = 2.7e-15; // J
- vdd[0] = 1.0;
- Lphy[0] = 0.018;
- Lelec[0] = 0.01345;
- t_ox[0] = 0.65e-3;
- v_th[0] = .18035;
- c_ox[0] = 3.77e-14;
- mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 9.38E-2;
- c_g_ideal[0] = 6.78e-16;
- c_fringe[0] = 0.05e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2046.6e-6;
- //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
- //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
- I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
- nmos_effective_resistance_multiplier = 1.51;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
- I_off_n[0][0] = 2.8e-7;
- I_off_n[0][10] = 3.28e-7;
- I_off_n[0][20] = 3.81e-7;
- I_off_n[0][30] = 4.39e-7;
- I_off_n[0][40] = 5.02e-7;
- I_off_n[0][50] = 5.69e-7;
- I_off_n[0][60] = 6.42e-7;
- I_off_n[0][70] = 7.2e-7;
- I_off_n[0][80] = 8.03e-7;
- I_off_n[0][90] = 8.91e-7;
- I_off_n[0][100] = 9.84e-7;
-
- I_g_on_n[0][0] = 3.59e-8;//A/micron
- I_g_on_n[0][10] = 3.59e-8;
- I_g_on_n[0][20] = 3.59e-8;
- I_g_on_n[0][30] = 3.59e-8;
- I_g_on_n[0][40] = 3.59e-8;
- I_g_on_n[0][50] = 3.59e-8;
- I_g_on_n[0][60] = 3.59e-8;
- I_g_on_n[0][70] = 3.59e-8;
- I_g_on_n[0][80] = 3.59e-8;
- I_g_on_n[0][90] = 3.59e-8;
- I_g_on_n[0][100] = 3.59e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.1;
- Lphy[1] = 0.028;
- Lelec[1] = 0.0212;
- t_ox[1] = 1.4e-3;
- v_th[1] = 0.50245;
- c_ox[1] = 2.01e-14;
- mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 9.12e-2;
- c_g_ideal[1] = 5.18e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 666.2e-6;
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.08;
- I_off_n[1][0] = 1.01e-11;
- I_off_n[1][10] = 1.65e-11;
- I_off_n[1][20] = 2.62e-11;
- I_off_n[1][30] = 4.06e-11;
- I_off_n[1][40] = 6.12e-11;
- I_off_n[1][50] = 9.02e-11;
- I_off_n[1][60] = 1.3e-10;
- I_off_n[1][70] = 1.83e-10;
- I_off_n[1][80] = 2.51e-10;
- I_off_n[1][90] = 3.29e-10;
- I_off_n[1][100] = 4.1e-10;
-
- I_g_on_n[1][0] = 9.47e-12;//A/micron
- I_g_on_n[1][10] = 9.47e-12;
- I_g_on_n[1][20] = 9.47e-12;
- I_g_on_n[1][30] = 9.47e-12;
- I_g_on_n[1][40] = 9.47e-12;
- I_g_on_n[1][50] = 9.47e-12;
- I_g_on_n[1][60] = 9.47e-12;
- I_g_on_n[1][70] = 9.47e-12;
- I_g_on_n[1][80] = 9.47e-12;
- I_g_on_n[1][90] = 9.47e-12;
- I_g_on_n[1][100] = 9.47e-12;
-
- //ITRS LOP device type
- vdd[2] = 0.7;
- Lphy[2] = 0.022;
- Lelec[2] = 0.016;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.22599;
- c_ox[2] = 2.82e-14;//F/micron2
- mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 5.71e-2;
- c_g_ideal[2] = 6.2e-16;
- c_fringe[2] = 0.073e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 748.9e-6;
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.76;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.92;
- I_off_n[2][0] = 4.03e-9;
- I_off_n[2][10] = 5.02e-9;
- I_off_n[2][20] = 6.18e-9;
- I_off_n[2][30] = 7.51e-9;
- I_off_n[2][40] = 9.04e-9;
- I_off_n[2][50] = 1.08e-8;
- I_off_n[2][60] = 1.27e-8;
- I_off_n[2][70] = 1.47e-8;
- I_off_n[2][80] = 1.66e-8;
- I_off_n[2][90] = 1.84e-8;
- I_off_n[2][100] = 2.03e-8;
-
- I_g_on_n[2][0] = 3.24e-8;//A/micron
- I_g_on_n[2][10] = 4.01e-8;
- I_g_on_n[2][20] = 4.90e-8;
- I_g_on_n[2][30] = 5.92e-8;
- I_g_on_n[2][40] = 7.08e-8;
- I_g_on_n[2][50] = 8.38e-8;
- I_g_on_n[2][60] = 9.82e-8;
- I_g_on_n[2][70] = 1.14e-7;
- I_g_on_n[2][80] = 1.29e-7;
- I_g_on_n[2][90] = 1.43e-7;
- I_g_on_n[2][100] = 1.54e-7;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.078;
- Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44559;
- width_dram_access_transistor = 0.079;
- curr_I_on_dram_cell = 36e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2.1e-3;
- v_th[3] = 0.44559;
- c_ox[3] = 1.41e-14;
- mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.181;
- c_g_ideal[3] = 1.10e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 456e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.54e-11;
- I_off_n[3][10] = 3.94e-11;
- I_off_n[3][20] = 5.95e-11;
- I_off_n[3][30] = 8.79e-11;
- I_off_n[3][40] = 1.27e-10;
- I_off_n[3][50] = 1.79e-10;
- I_off_n[3][60] = 2.47e-10;
- I_off_n[3][70] = 3.31e-10;
- I_off_n[3][80] = 4.26e-10;
- I_off_n[3][90] = 5.27e-10;
- I_off_n[3][100] = 6.46e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.045;
- Lelec[3] = 0.0298;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.045;
- curr_I_on_dram_cell = 20e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.045*0.045;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.7;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.98e-15;
- mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.147;
- c_g_ideal[3] = 3.59e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 999.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.31e-14;
- I_off_n[3][10] = 2.68e-14;
- I_off_n[3][20] = 5.25e-14;
- I_off_n[3][30] = 9.88e-14;
- I_off_n[3][40] = 1.79e-13;
- I_off_n[3][50] = 3.15e-13;
- I_off_n[3][60] = 5.36e-13;
- I_off_n[3][70] = 8.86e-13;
- I_off_n[3][80] = 1.42e-12;
- I_off_n[3][90] = 2.20e-12;
- I_off_n[3][100] = 3.29e-12;
- }
-
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7;
- curr_core_tx_density = 1.25;
- curr_sckt_co_eff = 1.1387;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if (tech == 32)
- {
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
- //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
- //HP and LSTP.
- vdd[0] = 0.9;
- Lphy[0] = 0.013;
- Lelec[0] = 0.01013;
- t_ox[0] = 0.5e-3;
- v_th[0] = 0.21835;
- c_ox[0] = 4.11e-14;
- mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 5.09E-2;
- c_g_ideal[0] = 5.34e-16;
- c_fringe[0] = 0.04e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2211.7e-6;
- I_on_p[0] = I_on_n[0] / 2;
- nmos_effective_resistance_multiplier = 1.49;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.706;
- //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
- //whichever comes first
- I_off_n[0][0] = 1.52e-7;
- I_off_n[0][10] = 1.55e-7;
- I_off_n[0][20] = 1.59e-7;
- I_off_n[0][30] = 1.68e-7;
- I_off_n[0][40] = 1.90e-7;
- I_off_n[0][50] = 2.69e-7;
- I_off_n[0][60] = 5.32e-7;
- I_off_n[0][70] = 1.02e-6;
- I_off_n[0][80] = 1.62e-6;
- I_off_n[0][90] = 2.73e-6;
- I_off_n[0][100] = 6.1e-6;
-
- I_g_on_n[0][0] = 6.55e-8;//A/micron
- I_g_on_n[0][10] = 6.55e-8;
- I_g_on_n[0][20] = 6.55e-8;
- I_g_on_n[0][30] = 6.55e-8;
- I_g_on_n[0][40] = 6.55e-8;
- I_g_on_n[0][50] = 6.55e-8;
- I_g_on_n[0][60] = 6.55e-8;
- I_g_on_n[0][70] = 6.55e-8;
- I_g_on_n[0][80] = 6.55e-8;
- I_g_on_n[0][90] = 6.55e-8;
- I_g_on_n[0][100] = 6.55e-8;
-
-// 32 DG
-// I_g_on_n[0][0] = 2.71e-9;//A/micron
-// I_g_on_n[0][10] = 2.71e-9;
-// I_g_on_n[0][20] = 2.71e-9;
-// I_g_on_n[0][30] = 2.71e-9;
-// I_g_on_n[0][40] = 2.71e-9;
-// I_g_on_n[0][50] = 2.71e-9;
-// I_g_on_n[0][60] = 2.71e-9;
-// I_g_on_n[0][70] = 2.71e-9;
-// I_g_on_n[0][80] = 2.71e-9;
-// I_g_on_n[0][90] = 2.71e-9;
-// I_g_on_n[0][100] = 2.71e-9;
-
- //LSTP device type
- vdd[1] = 1;
- Lphy[1] = 0.020;
- Lelec[1] = 0.0173;
- t_ox[1] = 1.2e-3;
- v_th[1] = 0.513;
- c_ox[1] = 2.29e-14;
- mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 8.64e-2;
- c_g_ideal[1] = 4.58e-16;
- c_fringe[1] = 0.053e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 683.6e-6;
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/1.93;
- I_off_n[1][0] = 2.06e-11;
- I_off_n[1][10] = 3.30e-11;
- I_off_n[1][20] = 5.15e-11;
- I_off_n[1][30] = 7.83e-11;
- I_off_n[1][40] = 1.16e-10;
- I_off_n[1][50] = 1.69e-10;
- I_off_n[1][60] = 2.40e-10;
- I_off_n[1][70] = 3.34e-10;
- I_off_n[1][80] = 4.54e-10;
- I_off_n[1][90] = 5.96e-10;
- I_off_n[1][100] = 7.44e-10;
-
- I_g_on_n[1][0] = 3.73e-11;//A/micron
- I_g_on_n[1][10] = 3.73e-11;
- I_g_on_n[1][20] = 3.73e-11;
- I_g_on_n[1][30] = 3.73e-11;
- I_g_on_n[1][40] = 3.73e-11;
- I_g_on_n[1][50] = 3.73e-11;
- I_g_on_n[1][60] = 3.73e-11;
- I_g_on_n[1][70] = 3.73e-11;
- I_g_on_n[1][80] = 3.73e-11;
- I_g_on_n[1][90] = 3.73e-11;
- I_g_on_n[1][100] = 3.73e-11;
-
-
- //LOP device type
- vdd[2] = 0.6;
- Lphy[2] = 0.016;
- Lelec[2] = 0.01232;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.24227;
- c_ox[2] = 2.84e-14;
- mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 4.64e-2;
- c_g_ideal[2] = 4.54e-16;
- c_fringe[2] = 0.057e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 827.8e-6;
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.89;
- I_off_n[2][0] = 5.94e-8;
- I_off_n[2][10] = 7.23e-8;
- I_off_n[2][20] = 8.7e-8;
- I_off_n[2][30] = 1.04e-7;
- I_off_n[2][40] = 1.22e-7;
- I_off_n[2][50] = 1.43e-7;
- I_off_n[2][60] = 1.65e-7;
- I_off_n[2][70] = 1.90e-7;
- I_off_n[2][80] = 2.15e-7;
- I_off_n[2][90] = 2.39e-7;
- I_off_n[2][100] = 2.63e-7;
-
- I_g_on_n[2][0] = 2.93e-9;//A/micron
- I_g_on_n[2][10] = 2.93e-9;
- I_g_on_n[2][20] = 2.93e-9;
- I_g_on_n[2][30] = 2.93e-9;
- I_g_on_n[2][40] = 2.93e-9;
- I_g_on_n[2][50] = 2.93e-9;
- I_g_on_n[2][60] = 2.93e-9;
- I_g_on_n[2][70] = 2.93e-9;
- I_g_on_n[2][80] = 2.93e-9;
- I_g_on_n[2][90] = 2.93e-9;
- I_g_on_n[2][100] = 2.93e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.056;
- Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44129;
- width_dram_access_transistor = 0.056;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2e-3;
- v_th[3] = 0.44467;
- c_ox[3] = 1.48e-14;
- mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.174;
- c_g_ideal[3] = 7.45e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1055.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.57e-11;
- I_off_n[3][10] = 5.51e-11;
- I_off_n[3][20] = 8.27e-11;
- I_off_n[3][30] = 1.21e-10;
- I_off_n[3][40] = 1.74e-10;
- I_off_n[3][50] = 2.45e-10;
- I_off_n[3][60] = 3.38e-10;
- I_off_n[3][70] = 4.53e-10;
- I_off_n[3][80] = 5.87e-10;
- I_off_n[3][90] = 7.29e-10;
- I_off_n[3][100] = 8.87e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.032;
- Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.032;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.032*0.032;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.6;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.99e-15;
- mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.129;
- c_g_ideal[3] = 2.56e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1024.5e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.63e-14;
- I_off_n[3][10] = 7.18e-14;
- I_off_n[3][20] = 1.36e-13;
- I_off_n[3][30] = 2.49e-13;
- I_off_n[3][40] = 4.41e-13;
- I_off_n[3][50] = 7.55e-13;
- I_off_n[3][60] = 1.26e-12;
- I_off_n[3][70] = 2.03e-12;
- I_off_n[3][80] = 3.19e-12;
- I_off_n[3][90] = 4.87e-12;
- I_off_n[3][100] = 7.16e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7;
- curr_sckt_co_eff = 1.1111;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if(tech == 22){
- //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
- //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
- //22 nm HP
- vdd[0] = 0.8;
- Lphy[0] = 0.009;//Lphy is the physical gate-length.
- Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
- t_ox[0] = 0.55e-3;//micron
- v_th[0] = 0.1395;//V
- c_ox[0] = 3.63e-14;//F/micron2
- mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 2.33e-2; //V/micron
- c_g_ideal[0] = 3.27e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron
- c_junc[0] = 0;//F/micron2
- I_on_n[0] = 2626.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.45;
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.274;
- I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there.
- I_off_n[0][10] = 1.55e-7/1.5*1.2;
- I_off_n[0][20] = 1.59e-7/1.5*1.2;
- I_off_n[0][30] = 1.68e-7/1.5*1.2;
- I_off_n[0][40] = 1.90e-7/1.5*1.2;
- I_off_n[0][50] = 2.69e-7/1.5*1.2;
- I_off_n[0][60] = 5.32e-7/1.5*1.2;
- I_off_n[0][70] = 1.02e-6/1.5*1.2;
- I_off_n[0][80] = 1.62e-6/1.5*1.2;
- I_off_n[0][90] = 2.73e-6/1.5*1.2;
- I_off_n[0][100] = 6.1e-6/1.5*1.2;
- //for 22nm DG HP
- I_g_on_n[0][0] = 1.81e-9;//A/micron
- I_g_on_n[0][10] = 1.81e-9;
- I_g_on_n[0][20] = 1.81e-9;
- I_g_on_n[0][30] = 1.81e-9;
- I_g_on_n[0][40] = 1.81e-9;
- I_g_on_n[0][50] = 1.81e-9;
- I_g_on_n[0][60] = 1.81e-9;
- I_g_on_n[0][70] = 1.81e-9;
- I_g_on_n[0][80] = 1.81e-9;
- I_g_on_n[0][90] = 1.81e-9;
- I_g_on_n[0][100] = 1.81e-9;
-
- //22 nm LSTP DG
- vdd[1] = 0.8;
- Lphy[1] = 0.014;
- Lelec[1] = 0.008;//Lelec is the electrical gate-length.
- t_ox[1] = 1.1e-3;//micron
- v_th[1] = 0.40126;//V
- c_ox[1] = 2.30e-14;//F/micron2
- mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[1] = 6.64e-2; //V/micron
- c_g_ideal[1] = 3.22e-16;//F/micron
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 0;//F/micron2
- I_on_n[1] = 727.6e-6;//A/micron
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
- long_channel_leakage_reduction[1] = 1/1.89;
- I_off_n[1][0] = 2.43e-11;
- I_off_n[1][10] = 4.85e-11;
- I_off_n[1][20] = 9.68e-11;
- I_off_n[1][30] = 1.94e-10;
- I_off_n[1][40] = 3.87e-10;
- I_off_n[1][50] = 7.73e-10;
- I_off_n[1][60] = 3.55e-10;
- I_off_n[1][70] = 3.09e-9;
- I_off_n[1][80] = 6.19e-9;
- I_off_n[1][90] = 1.24e-8;
- I_off_n[1][100]= 2.48e-8;
-
- I_g_on_n[1][0] = 4.51e-10;//A/micron
- I_g_on_n[1][10] = 4.51e-10;
- I_g_on_n[1][20] = 4.51e-10;
- I_g_on_n[1][30] = 4.51e-10;
- I_g_on_n[1][40] = 4.51e-10;
- I_g_on_n[1][50] = 4.51e-10;
- I_g_on_n[1][60] = 4.51e-10;
- I_g_on_n[1][70] = 4.51e-10;
- I_g_on_n[1][80] = 4.51e-10;
- I_g_on_n[1][90] = 4.51e-10;
- I_g_on_n[1][100] = 4.51e-10;
-
- //22 nm LOP
- vdd[2] = 0.6;
- Lphy[2] = 0.011;
- Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
- t_ox[2] = 0.8e-3;//micron
- v_th[2] = 0.2315;//V
- c_ox[2] = 2.87e-14;//F/micron2
- mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[2] = 1.81e-2; //V/micron
- c_g_ideal[2] = 3.16e-16;//F/micron
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
- I_on_n[2] = 916.1e-6;//A/micron
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
- long_channel_leakage_reduction[2] = 1/2.38;
-
- I_off_n[2][0] = 1.31e-8;
- I_off_n[2][10] = 2.60e-8;
- I_off_n[2][20] = 5.14e-8;
- I_off_n[2][30] = 1.02e-7;
- I_off_n[2][40] = 2.02e-7;
- I_off_n[2][50] = 3.99e-7;
- I_off_n[2][60] = 7.91e-7;
- I_off_n[2][70] = 1.09e-6;
- I_off_n[2][80] = 2.09e-6;
- I_off_n[2][90] = 4.04e-6;
- I_off_n[2][100]= 4.48e-6;
-
- I_g_on_n[2][0] = 2.74e-9;//A/micron
- I_g_on_n[2][10] = 2.74e-9;
- I_g_on_n[2][20] = 2.74e-9;
- I_g_on_n[2][30] = 2.74e-9;
- I_g_on_n[2][40] = 2.74e-9;
- I_g_on_n[2][50] = 2.74e-9;
- I_g_on_n[2][60] = 2.74e-9;
- I_g_on_n[2][70] = 2.74e-9;
- I_g_on_n[2][80] = 2.74e-9;
- I_g_on_n[2][90] = 2.74e-9;
- I_g_on_n[2][100] = 2.74e-9;
-
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
- //parameters
- curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
- //2005 ITRS, the value was about twice the value in 2007 ITRS
- Lphy[3] = 0.022;//micron
- Lelec[3] = 0.0181;//micron.
- curr_v_th_dram_access_transistor = 1;//V
- width_dram_access_transistor = 0.022;//micron
- curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always
- //kept constant. In reality this could perhaps be lower
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
- curr_asp_ratio_cell_dram = 0.667;
- curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
- //kept constant.
-
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
- curr_vpp = 2.3;//vpp. V
- t_ox[3] = 3.5e-3;//micron
- v_th[3] = 1.0;//V
- c_ox[3] = 9.06e-15;//F/micron2
- mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs
- Vdsat[3] = 0.0972; //V/micron
- c_g_ideal[3] = 1.99e-16;//F/micron
- c_fringe[3] = 0.053e-15;//F/micron
- c_junc[3] = 1e-15;//F/micron2
- I_on_n[3] = 910.5e-6;//A/micron
- I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm.
- //
- n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.1e-13; //A/micron
- I_off_n[3][10] = 2.11e-13;
- I_off_n[3][20] = 3.88e-13;
- I_off_n[3][30] = 6.9e-13;
- I_off_n[3][40] = 1.19e-12;
- I_off_n[3][50] = 1.98e-12;
- I_off_n[3][60] = 3.22e-12;
- I_off_n[3][70] = 5.09e-12;
- I_off_n[3][80] = 7.85e-12;
- I_off_n[3][90] = 1.18e-11;
- I_off_n[3][100] = 1.72e-11;
-
- }
- else
- {
- //some error handler
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if(tech == 16){
- //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
- //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
- //16 nm HP
- vdd[0] = 0.7;
- Lphy[0] = 0.006;//Lphy is the physical gate-length.
- Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
- t_ox[0] = 0.5e-3;//micron
- v_th[0] = 0.1489;//V
- c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
- mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
- c_g_ideal[0] = 2.30e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
- c_junc[0] = 0;//F/micron2 MASTAR result dynamic
- I_on_n[0] = 2768.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/2.655;
- I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07;
- I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07;
- I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07;
- I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07;
- I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07;
- I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07;
- I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07;
- I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07;
- I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07;
- I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07;
- I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07;
- //for 16nm DG HP
- I_g_on_n[0][0] = 1.07e-9;//A/micron
- I_g_on_n[0][10] = 1.07e-9;
- I_g_on_n[0][20] = 1.07e-9;
- I_g_on_n[0][30] = 1.07e-9;
- I_g_on_n[0][40] = 1.07e-9;
- I_g_on_n[0][50] = 1.07e-9;
- I_g_on_n[0][60] = 1.07e-9;
- I_g_on_n[0][70] = 1.07e-9;
- I_g_on_n[0][80] = 1.07e-9;
- I_g_on_n[0][90] = 1.07e-9;
- I_g_on_n[0][100] = 1.07e-9;
-
-// //16 nm LSTP DG
-// vdd[1] = 0.8;
-// Lphy[1] = 0.014;
-// Lelec[1] = 0.008;//Lelec is the electrical gate-length.
-// t_ox[1] = 1.1e-3;//micron
-// v_th[1] = 0.40126;//V
-// c_ox[1] = 2.30e-14;//F/micron2
-// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
-// Vdsat[1] = 6.64e-2; //V/micron
-// c_g_ideal[1] = 3.22e-16;//F/micron
-// c_fringe[1] = 0.008e-15;
-// c_junc[1] = 0;//F/micron2
-// I_on_n[1] = 727.6e-6;//A/micron
-// I_on_p[1] = I_on_n[1] / 2;
-// nmos_effective_resistance_multiplier = 1.99;
-// n_to_p_eff_curr_drv_ratio[1] = 2;
-// gmp_to_gmn_multiplier[1] = 0.99;
-// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
-// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
-// I_off_n[1][0] = 2.43e-11;
-// I_off_n[1][10] = 4.85e-11;
-// I_off_n[1][20] = 9.68e-11;
-// I_off_n[1][30] = 1.94e-10;
-// I_off_n[1][40] = 3.87e-10;
-// I_off_n[1][50] = 7.73e-10;
-// I_off_n[1][60] = 3.55e-10;
-// I_off_n[1][70] = 3.09e-9;
-// I_off_n[1][80] = 6.19e-9;
-// I_off_n[1][90] = 1.24e-8;
-// I_off_n[1][100]= 2.48e-8;
-//
-// // for 22nm LSTP HP
-// I_g_on_n[1][0] = 4.51e-10;//A/micron
-// I_g_on_n[1][10] = 4.51e-10;
-// I_g_on_n[1][20] = 4.51e-10;
-// I_g_on_n[1][30] = 4.51e-10;
-// I_g_on_n[1][40] = 4.51e-10;
-// I_g_on_n[1][50] = 4.51e-10;
-// I_g_on_n[1][60] = 4.51e-10;
-// I_g_on_n[1][70] = 4.51e-10;
-// I_g_on_n[1][80] = 4.51e-10;
-// I_g_on_n[1][90] = 4.51e-10;
-// I_g_on_n[1][100] = 4.51e-10;
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
- //parameters
- curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
- //2005 ITRS, the value was about twice the value in 2007 ITRS
- Lphy[3] = 0.022;//micron
- Lelec[3] = 0.0181;//micron.
- curr_v_th_dram_access_transistor = 1;//V
- width_dram_access_transistor = 0.022;//micron
- curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always
- //kept constant. In reality this could perhaps be lower
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
- curr_asp_ratio_cell_dram = 0.667;
- curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
- //kept constant.
-
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
- curr_vpp = 2.3;//vpp. V
- t_ox[3] = 3.5e-3;//micron
- v_th[3] = 1.0;//V
- c_ox[3] = 9.06e-15;//F/micron2
- mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs
- Vdsat[3] = 0.0972; //V/micron
- c_g_ideal[3] = 1.99e-16;//F/micron
- c_fringe[3] = 0.053e-15;//F/micron
- c_junc[3] = 1e-15;//F/micron2
- I_on_n[3] = 910.5e-6;//A/micron
- I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm.
- //
- n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.1e-13; //A/micron
- I_off_n[3][10] = 2.11e-13;
- I_off_n[3][20] = 3.88e-13;
- I_off_n[3][30] = 6.9e-13;
- I_off_n[3][40] = 1.19e-12;
- I_off_n[3][50] = 1.98e-12;
- I_off_n[3][60] = 3.22e-12;
- I_off_n[3][70] = 5.09e-12;
- I_off_n[3][80] = 7.85e-12;
- I_off_n[3][90] = 1.18e-11;
- I_off_n[3][100] = 1.72e-11;
-
- }
- else
- {
- //some error handler
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
-
- g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type];
- g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
- g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
- g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
- g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
- g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
- g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
- g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
- g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
- g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
- g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
- g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
- g_tp.peri_global.n_to_p_eff_curr_drv_ratio
- += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
- g_tp.peri_global.long_channel_leakage_reduction
- += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
- g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
-
- g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
- g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
- g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
- g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
- g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
- g_tp.vpp += curr_alpha * curr_vpp;
- g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
- g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
- g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
- g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
-
- g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
- g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
- g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
- area_cell_dram += curr_alpha * curr_area_cell_dram;
- asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
-
- g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
- g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
- g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
- area_cell_sram += curr_alpha * curr_area_cell_sram;
- asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
-
- g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
- g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
- g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
- area_cell_cam += curr_alpha * curr_area_cell_cam;
- asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
-
- //Sense amplifier latch Gm calculation
- mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
- Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
-
- //Empirical undifferetiated core/FU coefficient
- g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
- g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
- g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
- g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
- g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
- }
-
-
- //Currently we are not modeling the resistance/capacitance of poly anywhere.
- //Continuous function (or date have been processed) does not need linear interpolation
- g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
- g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
- g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
- g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
- g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
- g_tp.cell_h_def = 50 * g_ip->F_sz_um;
- g_tp.w_poly_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
- g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
-
- g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
- g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
- g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process
- g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process
- g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process
- g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process
- g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
- g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_;
-
- if (ram_cell_tech_type == comm_dram)
- {
- g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
- g_tp.h_dec = 8; // in the unit of memory cell height
- }
- else
- {
- g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
- g_tp.h_dec = 4; // in the unit of memory cell height
- }
-
- g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
- g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
- g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
-
- g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
- g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
- //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
-
- g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
-
- double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
- double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
- g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch;
-
- g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
- g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
- g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
- g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
- g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
- g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
-
- g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
- g_tp.sram.Vbitpre = vdd[ram_cell_tech_type];
- g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng
- pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
- g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
-
-
- double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
-
- for (iter=0; iter<=1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
- }
-
- if (tech == 90)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.4;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.01;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.48;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.48;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.7;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.96;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.008;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.48;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.48;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.1;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.09;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
- wire_r_per_micron[1][3] = 12 / 0.09;
- }
- else if (tech == 65)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 2.7;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.405;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.303;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.7;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.405;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.303;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.8;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.81;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.303;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.006;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.405;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.734;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.405;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.734;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.77;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.734;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.065;
- wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
- wire_r_per_micron[1][3] = 12 / 0.065;
- }
- else if (tech == 45)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.315;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.958;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.315;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.958;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.63;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.958;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.004;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.315;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.46;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.315;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.46;
- vert_dielectric_constant[1][1] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.55;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.46;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.045;
- wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
- wire_r_per_micron[1][3] = 12 / 0.045;
- }
- else if (tech == 32)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.21;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.664;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.21;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.664;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.42;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.664;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.21;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.214;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- aspect_ratio[1][1] = 2.0;
- wire_width = wire_pitch[1][1] / 2;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.21;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.214;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.385;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.214;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.032;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
- }
- else if (tech == 22)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.15;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.414;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.15;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.414;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.3;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.414;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.15;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.104;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.15;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.104;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.275;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.104;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.022;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- }
-
- else if (tech == 16)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.108;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.202;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- aspect_ratio[0][1] = 3.0;
- wire_width = wire_pitch[0][1] / 2;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.108;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.202;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.216;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.202;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.002;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.108;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 1.998;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.108;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 1.998;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.198;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 1.998;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.016;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- }
- g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
-
- g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type];
-
- g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type];
-
- g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2;
-
- g_tp.sense_delay += curr_alpha *SENSE_AMP_D;
- g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P;
-// g_tp.horiz_dielectric_constant += horiz_dielectric_constant;
-// g_tp.vert_dielectric_constant += vert_dielectric_constant;
-// g_tp.aspect_ratio += aspect_ratio;
-// g_tp.miller_value += miller_value;
-// g_tp.ild_thickness += ild_thickness;
-
- }
- g_tp.fringe_cap = fringe_cap;
-
- double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
- double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
- double tf = rd * c_load;
- g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
- double KLOAD = 1;
- c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
- tf = rd * c_load;
- g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
-}
-
diff --git a/ext/mcpat/xmlParser.cc b/ext/mcpat/xmlParser.cc
index 5ac45edae..97532d506 100644
--- a/ext/mcpat/xmlParser.cc
+++ b/ext/mcpat/xmlParser.cc
@@ -75,6 +75,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Copyright (c) 2002, Business-Insight
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* <a href="http://www.Business-Insight.com">Business-Insight</a>
* All rights reserved.
*
@@ -91,7 +92,7 @@
//#endif
#define WIN32_LEAN_AND_MEAN
#include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
- // to have "MessageBoxA" to display error messages for openFilHelper
+// to have "MessageBoxA" to display error messages for openFilHelper
#endif
#include <memory.h>
@@ -101,37 +102,49 @@
#include <cstdlib>
#include <cstring>
-XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); }
-void freeXMLString(XMLSTR t){if(t)free(t);}
+XMLCSTR XMLNode::getVersion() {
+ return _CXML("v2.39");
+}
+void freeXMLString(XMLSTR t) {
+ if (t)free(t);
+}
static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8;
static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1;
-inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
+inline int mmin( const int t1, const int t2 ) {
+ return t1 < t2 ? t1 : t2;
+}
// You can modify the initialization of the variable "XMLClearTags" below
// to change the clearTags that are currently recognized by the library.
// The number on the second columns is the length of the string inside the
// first column. The "<!DOCTYPE" declaration must be the second in the list.
// The "<!--" declaration must be the third in the list.
-typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;
-static ALLXMLClearTag XMLClearTags[] =
-{
- { _CXML("<![CDATA["),9, _CXML("]]>") },
- { _CXML("<!DOCTYPE"),9, _CXML(">") },
- { _CXML("<!--") ,4, _CXML("-->") },
- { _CXML("<PRE>") ,5, _CXML("</PRE>") },
+typedef struct {
+ XMLCSTR lpszOpen;
+ int openTagLen;
+ XMLCSTR lpszClose;
+} ALLXMLClearTag;
+static ALLXMLClearTag XMLClearTags[] = {
+ { _CXML("<![CDATA["), 9, _CXML("]]>") },
+ { _CXML("<!DOCTYPE"), 9, _CXML(">") },
+ { _CXML("<!--") , 4, _CXML("-->") },
+ { _CXML("<PRE>") , 5, _CXML("</PRE>") },
// { _CXML("<Script>") ,8, _CXML("</Script>")},
- { NULL ,0, NULL }
+ { NULL , 0, NULL }
};
// You can modify the initialization of the variable "XMLEntities" below
// to change the character entities that are currently recognized by the library.
// The number on the second columns is the length of the string inside the
// first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
-typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
-static XMLCharacterEntity XMLEntities[] =
-{
+typedef struct {
+ XMLCSTR s;
+ int l;
+ XMLCHAR c;
+} XMLCharacterEntity;
+static XMLCharacterEntity XMLEntities[] = {
{ _CXML("&amp;" ), 5, _CXML('&' )},
{ _CXML("&lt;" ), 4, _CXML('<' )},
{ _CXML("&gt;" ), 4, _CXML('>' )},
@@ -147,32 +160,51 @@ static XMLCharacterEntity XMLEntities[] =
// The following function parses the XML errors into a user friendly string.
// You can edit this to change the output language of the library to something else.
-XMLCSTR XMLNode::getError(XMLError xerror)
-{
- switch (xerror)
- {
- case eXMLErrorNone: return _CXML("No error");
- case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag");
- case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found");
- case eXMLErrorEmpty: return _CXML("Error: No XML data");
- case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name");
- case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name");
- case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag");
- case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end");
- case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found");
- case eXMLErrorNoElements: return _CXML("Error: No elements found");
- case eXMLErrorFileNotFound: return _CXML("Error: File not found");
- case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found");
- case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity");
- case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode.");
- case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars");
- case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing");
- case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file");
-
- case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4");
- case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated");
- case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character");
- case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small");
+XMLCSTR XMLNode::getError(XMLError xerror) {
+ switch (xerror) {
+ case eXMLErrorNone:
+ return _CXML("No error");
+ case eXMLErrorMissingEndTag:
+ return _CXML("Warning: Unmatched end tag");
+ case eXMLErrorNoXMLTagFound:
+ return _CXML("Warning: No XML tag found");
+ case eXMLErrorEmpty:
+ return _CXML("Error: No XML data");
+ case eXMLErrorMissingTagName:
+ return _CXML("Error: Missing start tag name");
+ case eXMLErrorMissingEndTagName:
+ return _CXML("Error: Missing end tag name");
+ case eXMLErrorUnmatchedEndTag:
+ return _CXML("Error: Unmatched end tag");
+ case eXMLErrorUnmatchedEndClearTag:
+ return _CXML("Error: Unmatched clear tag end");
+ case eXMLErrorUnexpectedToken:
+ return _CXML("Error: Unexpected token found");
+ case eXMLErrorNoElements:
+ return _CXML("Error: No elements found");
+ case eXMLErrorFileNotFound:
+ return _CXML("Error: File not found");
+ case eXMLErrorFirstTagNotFound:
+ return _CXML("Error: First Tag not found");
+ case eXMLErrorUnknownCharacterEntity:
+ return _CXML("Error: Unknown character entity");
+ case eXMLErrorCharacterCodeAbove255:
+ return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode.");
+ case eXMLErrorCharConversionError:
+ return _CXML("Error: unable to convert between WideChar and MultiByte chars");
+ case eXMLErrorCannotOpenWriteFile:
+ return _CXML("Error: unable to open file for writing");
+ case eXMLErrorCannotWriteFile:
+ return _CXML("Error: cannot write into file");
+
+ case eXMLErrorBase64DataSizeIsNotMultipleOf4:
+ return _CXML("Warning: Base64-string length is not a multiple of 4");
+ case eXMLErrorBase64DecodeTruncatedData:
+ return _CXML("Warning: Base64-string is truncated");
+ case eXMLErrorBase64DecodeIllegalCharacter:
+ return _CXML("Error: Base64-string contains an illegal character");
+ case eXMLErrorBase64DecodeBufferTooSmall:
+ return _CXML("Error: Base64 decode output buffer is too small");
};
return _CXML("Unknown");
}
@@ -187,168 +219,244 @@ XMLCSTR XMLNode::getError(XMLError xerror)
// If you plan to "port" the library to a new system/compiler, all you have to do is
// to edit the following lines.
#ifdef XML_NO_WIDE_CHAR
-char myIsTextWideChar(const void *b, int len) { return FALSE; }
+char myIsTextWideChar(const void *b, int len) {
+ return FALSE;
+}
#else
- #if defined (UNDER_CE) || !defined(_XMLWINDOWS)
- char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
- {
+#if defined (UNDER_CE) || !defined(_XMLWINDOWS)
+// inspired by the Wine API: RtlIsTextUnicode
+char myIsTextWideChar(const void *b, int len) {
#ifdef sun
- // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
- if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
+ // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
+ if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
#endif
- const wchar_t *s=(const wchar_t*)b;
+ const wchar_t *s = (const wchar_t*)b;
- // buffer too small:
- if (len<(int)sizeof(wchar_t)) return FALSE;
+ // buffer too small:
+ if (len < (int)sizeof(wchar_t)) return FALSE;
- // odd length test
- if (len&1) return FALSE;
+ // odd length test
+ if (len&1) return FALSE;
- /* only checks the first 256 characters */
- len=mmin(256,len/sizeof(wchar_t));
+ /* only checks the first 256 characters */
+ len = mmin(256, len / sizeof(wchar_t));
- // Check for the special byte order:
- if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
- if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
+ // Check for the special byte order:
+ if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
+ if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
- // checks for ASCII characters in the UNICODE stream
- int i,stats=0;
- for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
- if (stats>len/2) return TRUE;
+ // checks for ASCII characters in the UNICODE stream
+ int i, stats=0;
+ for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
+ if (stats>len/2) return TRUE;
- // Check for UNICODE NULL chars
- for (i=0; i<len; i++) if (!s[i]) return TRUE;
+ // Check for UNICODE NULL chars
+ for (i=0; i<len; i++) if (!s[i]) return TRUE;
- return FALSE;
- }
- #else
- char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); };
- #endif
+ return FALSE;
+}
+#else
+char myIsTextWideChar(const void *b, int l) {
+ return (char)IsTextUnicode((CONST LPVOID)b, l, NULL);
+};
+#endif
#endif
#ifdef _XMLWINDOWS
// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
- #ifdef _XMLWIDECHAR
- wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
- {
- int i;
- if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0);
- else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0);
- if (i<0) return NULL;
- wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
- if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i);
- else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i);
- d[i]=0;
- return d;
- }
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); }
- static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); }
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); }
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
- #else
- char *myWideCharToMultiByte(const wchar_t *s)
- {
- UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8;
- int i=(int)WideCharToMultiByte(codePage, // code page
- 0, // performance and mapping flags
- s, // wide-character string
- -1, // number of chars in string
- NULL, // buffer for new string
- 0, // size of buffer
- NULL, // default for unmappable chars
- NULL // set when default char used
- );
- if (i<0) return NULL;
- char *d=(char*)malloc(i+1);
- WideCharToMultiByte(codePage, // code page
- 0, // performance and mapping flags
- s, // wide-character string
- -1, // number of chars in string
- d, // buffer for new string
- i, // size of buffer
- NULL, // default for unmappable chars
- NULL // set when default char used
- );
- d[i]=0;
- return d;
- }
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
- static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); }
- #ifdef __BORLANDC__
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); }
- #else
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); }
- #endif
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
- #endif
+#ifdef _XMLWIDECHAR
+wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) {
+ int i;
+ if (ce == XMLNode::char_encoding_UTF8) {
+ i = (int)MultiByteToWideChar(CP_UTF8, 0, s, -1, NULL, 0);
+ } else {
+ i = (int)MultiByteToWideChar(CP_ACP , MB_PRECOMPOSED, s, -1, NULL, 0);
+ }
+ if (i < 0) {
+ return NULL;
+ }
+ wchar_t *d = (wchar_t *)malloc((i + 1) * sizeof(XMLCHAR));
+ if (ce == XMLNode::char_encoding_UTF8) {
+ i = (int)MultiByteToWideChar(CP_UTF8, 0, s, -1, d, i);
+ } else {
+ i = (int)MultiByteToWideChar(CP_ACP , MB_PRECOMPOSED, s, -1, d, i);
+ }
+ d[i] = 0;
+ return d;
+}
+static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) {
+ return _wfopen(filename, mode);
+}
+static inline int xstrlen(XMLCSTR c) {
+ return (int)wcslen(c);
+}
+static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return _wcsnicmp(c1, c2, l);
+}
+static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return wcsncmp(c1, c2, l);
+}
+static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) {
+ return _wcsicmp(c1, c2);
+}
+static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) {
+ return (XMLSTR)wcsstr(c1, c2);
+}
+static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) {
+ return (XMLSTR)wcscpy(c1, c2);
+}
+#else
+char *myWideCharToMultiByte(const wchar_t *s) {
+ UINT codePage = CP_ACP;
+ if (characterEncoding == XMLNode::char_encoding_UTF8) codePage = CP_UTF8;
+ int i = (int)WideCharToMultiByte(codePage, // code page
+ 0, // performance and mapping flags
+ s, // wide-character string
+ -1, // number of chars in string
+ NULL, // buffer for new string
+ 0, // size of buffer
+ NULL, // default for unmappable chars
+ NULL // set when default char used
+ );
+ if (i<0) return NULL;
+ char *d=(char*)malloc(i+1);
+ WideCharToMultiByte(codePage, // code page
+ 0, // performance and mapping flags
+ s, // wide-character string
+ -1, // number of chars in string
+ d, // buffer for new string
+ i, // size of buffer
+ NULL, // default for unmappable chars
+ NULL // set when default char used
+ );
+ d[i] = 0;
+ return d;
+}
+static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) {
+ return fopen(filename, mode);
+}
+static inline int xstrlen(XMLCSTR c) {
+ return (int)strlen(c);
+}
+#ifdef __BORLANDC__
+static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return strnicmp(c1, c2, l);
+}
+static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) {
+ return stricmp(c1, c2);
+}
+#else
+static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return _strnicmp(c1, c2, l);
+}
+static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) {
+ return _stricmp(c1, c2);
+}
+#endif
+static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return strncmp(c1, c2, l);
+}
+static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) {
+ return (XMLSTR)strstr(c1, c2);
+}
+static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) {
+ return (XMLSTR)strcpy(c1, c2);
+}
+#endif
#else
// for gcc and CC
- #ifdef XML_NO_WIDE_CHAR
- char *myWideCharToMultiByte(const wchar_t *s) { return NULL; }
- #else
- char *myWideCharToMultiByte(const wchar_t *s)
- {
- const wchar_t *ss=s;
- int i=(int)wcsrtombs(NULL,&ss,0,NULL);
- if (i<0) return NULL;
- char *d=(char *)malloc(i+1);
- wcsrtombs(d,&s,i,NULL);
- d[i]=0;
- return d;
- }
- #endif
- #ifdef _XMLWIDECHAR
- wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
- {
- const char *ss=s;
- int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
- if (i<0) return NULL;
- wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
- mbsrtowcs(d,&s,i,NULL);
- d[i]=0;
- return d;
- }
- int xstrlen(XMLCSTR c) { return wcslen(c); }
- #ifdef sun
- // for CC
- #include <widec.h>
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
- #else
- // for gcc
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
- #endif
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode)
- {
- char *filenameAscii=myWideCharToMultiByte(filename);
- FILE *f;
- if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb");
- else f=fopen(filenameAscii,"wb");
- free(filenameAscii);
- return f;
- }
- #else
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
- static inline int xstrlen(XMLCSTR c) { return strlen(c); }
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
- #endif
- static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
+#ifdef XML_NO_WIDE_CHAR
+char *myWideCharToMultiByte(const wchar_t *s) {
+ return NULL;
+}
+#else
+char *myWideCharToMultiByte(const wchar_t *s) {
+ const wchar_t *ss = s;
+ int i = (int)wcsrtombs(NULL, &ss, 0, NULL);
+ if (i < 0) return NULL;
+ char *d = (char *)malloc(i + 1);
+ wcsrtombs(d, &s, i, NULL);
+ d[i] = 0;
+ return d;
+}
+#endif
+#ifdef _XMLWIDECHAR
+wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) {
+ const char *ss = s;
+ int i = (int)mbsrtowcs(NULL, &ss, 0, NULL);
+ if (i < 0) return NULL;
+ wchar_t *d = (wchar_t *)malloc((i + 1) * sizeof(wchar_t));
+ mbsrtowcs(d, &s, i, NULL);
+ d[i] = 0;
+ return d;
+}
+int xstrlen(XMLCSTR c) {
+ return wcslen(c);
+}
+#ifdef sun
+// for CC
+#include <widec.h>
+static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return wsncasecmp(c1, c2, l);
+}
+static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return wsncmp(c1, c2, l);
+}
+static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) {
+ return wscasecmp(c1, c2);
+}
+#else
+// for gcc
+static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return wcsncasecmp(c1, c2, l);
+}
+static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return wcsncmp(c1, c2, l);
+}
+static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) {
+ return wcscasecmp(c1, c2);
+}
+#endif
+static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) {
+ return (XMLSTR)wcsstr(c1, c2);
+}
+static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) {
+ return (XMLSTR)wcscpy(c1, c2);
+}
+static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) {
+ char *filenameAscii = myWideCharToMultiByte(filename);
+ FILE *f;
+ if (mode[0] == _CXML('r')) f = fopen(filenameAscii, "rb");
+ else f = fopen(filenameAscii, "wb");
+ free(filenameAscii);
+ return f;
+}
+#else
+static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) {
+ return fopen(filename, mode);
+}
+static inline int xstrlen(XMLCSTR c) {
+ return strlen(c);
+}
+static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return strncasecmp(c1, c2, l);
+}
+static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) {
+ return strncmp(c1, c2, l);
+}
+static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) {
+ return strcasecmp(c1, c2);
+}
+static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) {
+ return (XMLSTR)strstr(c1, c2);
+}
+static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) {
+ return (XMLSTR)strcpy(c1, c2);
+}
+#endif
+static inline int _strnicmp(const char *c1, const char *c2, int l) {
+ return strncasecmp(c1, c2, l);
+}
#endif
@@ -359,35 +467,86 @@ char myIsTextWideChar(const void *b, int len) { return FALSE; }
// There are only here as "convenience" functions for the user.
// If you don't need them, you can delete them without any trouble.
#ifdef _XMLWIDECHAR
- #ifdef _XMLWINDOWS
- // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
- char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; }
- long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; }
- double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
- #else
- #ifdef sun
- // for CC
- #include <widec.h>
- char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; }
- long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; }
- #else
- // for gcc
- char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; }
- long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; }
- #endif
- double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
- #endif
+#ifdef _XMLWINDOWS
+// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
+char xmltob(XMLCSTR t, int v) {
+ if (t && (*t)) return (char)_wtoi(t);
+ return v;
+}
+int xmltoi(XMLCSTR t, int v) {
+ if (t && (*t)) return _wtoi(t);
+ return v;
+}
+long xmltol(XMLCSTR t, long v) {
+ if (t && (*t)) return _wtol(t);
+ return v;
+}
+double xmltof(XMLCSTR t, double v) {
+ if (t && (*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/
+ return v;
+}
#else
- char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; }
- long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; }
- double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; }
+#ifdef sun
+// for CC
+#include <widec.h>
+char xmltob(XMLCSTR t, int v) {
+ if (t) return (char)wstol(t, NULL, 10);
+ return v;
+}
+int xmltoi(XMLCSTR t, int v) {
+ if (t) return (int)wstol(t, NULL, 10);
+ return v;
+}
+long xmltol(XMLCSTR t, long v) {
+ if (t) return wstol(t, NULL, 10);
+ return v;
+}
+#else
+// for gcc
+char xmltob(XMLCSTR t, int v) {
+ if (t) return (char)wcstol(t, NULL, 10);
+ return v;
+}
+int xmltoi(XMLCSTR t, int v) {
+ if (t) return (int)wcstol(t, NULL, 10);
+ return v;
+}
+long xmltol(XMLCSTR t, long v) {
+ if (t) return wcstol(t, NULL, 10);
+ return v;
+}
+#endif
+double xmltof(XMLCSTR t, double v) {
+ if (t && (*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/
+ return v;
+}
+#endif
+#else
+char xmltob(XMLCSTR t, char v) {
+ if (t && (*t)) return (char)atoi(t);
+ return v;
+}
+int xmltoi(XMLCSTR t, int v) {
+ if (t && (*t)) return atoi(t);
+ return v;
+}
+long xmltol(XMLCSTR t, long v) {
+ if (t && (*t)) return atol(t);
+ return v;
+}
+double xmltof(XMLCSTR t, double v) {
+ if (t && (*t)) return atof(t);
+ return v;
+}
#endif
-XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; }
-XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; }
+XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v) {
+ if (t) return t;
+ return v;
+}
+XMLCHAR xmltoc(XMLCSTR t, XMLCHAR v) {
+ if (t && (*t)) return *t;
+ return v;
+}
/////////////////////////////////////////////////////////////////////////
// the "openFileHelper" function //
@@ -395,42 +554,47 @@ XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; }
// Since each application has its own way to report and deal with errors, you should modify & rewrite
// the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.
-XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
-{
+XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) {
// guess the value of the global parameter "characterEncoding"
// (the guess is based on the first 200 bytes of the file).
- FILE *f=xfopen(filename,_CXML("rb"));
- if (f)
- {
+ FILE *f = xfopen(filename, _CXML("rb"));
+ if (f) {
char bb[205];
- int l=(int)fread(bb,1,200,f);
- setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText);
+ int l = (int)fread(bb, 1, 200, f);
+ setGlobalOptions(guessCharEncoding(bb, l), guessWideCharChars,
+ dropWhiteSpace, removeCommentsInMiddleOfText);
fclose(f);
}
// parse the file
XMLResults pResults;
- XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
+ XMLNode xnode = XMLNode::parseFile(filename, tag, &pResults);
// display error message (if any)
- if (pResults.error != eXMLErrorNone)
- {
+ if (pResults.error != eXMLErrorNone) {
// create message
- char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML("");
- if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
+ char message[2000], *s1 = (char*)"", *s3 = (char*)"";
+ XMLCSTR s2 = _CXML("");
+ if (pResults.error == eXMLErrorFirstTagNotFound) {
+ s1 = (char*)"First Tag should be '";
+ s2 = tag;
+ s3 = (char*)"'.\n";
+ }
sprintf(message,
#ifdef _XMLWIDECHAR
- "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
+ "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
#else
- "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
+ "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
#endif
- ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
+ , filename, XMLNode::getError(pResults.error), pResults.nLine,
+ pResults.nColumn, s1, s2, s3);
// display message
#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)
- MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
+ MessageBoxA(NULL, message, "XML Parsing error", MB_OK | MB_ICONERROR |
+ MB_TOPMOST);
#else
- printf("%s",message);
+ printf("%s", message);
#endif
exit(255);
}
@@ -450,106 +614,101 @@ XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
// This table is used as lookup-table to know the length of a character (in byte) based on the
// content of the first byte of the character.
// (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
-static const char XML_utf8ByteTable[256] =
-{
+static const char XML_utf8ByteTable[256] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
- 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
- 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 End of ASCII range
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x80 0x80 to 0xc1 invalid
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x90
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xa0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xb0
+ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 0xc2 to 0xdf 2 byte
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,// 0xe0 0xe0 to 0xef 3 byte
+ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
};
-static const char XML_legacyByteTable[256] =
-{
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+static const char XML_legacyByteTable[256] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
-static const char XML_sjisByteTable[256] =
-{
+static const char XML_sjisByteTable[256] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
- 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x80 0x81 to 0x9F 2 bytes
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x90
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xa0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xb0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xc0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xd0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 0xe0 to 0xef 2 bytes
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0
};
-static const char XML_gb2312ByteTable[256] =
-{
+static const char XML_gb2312ByteTable[256] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
- 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
- 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x80
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x90
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xa0 0xa1 to 0xf7 2 bytes
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xb0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0
+ 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0
};
-static const char XML_gbk_big5_ByteTable[256] =
-{
+static const char XML_gbk_big5_ByteTable[256] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
- 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x80 0x81 to 0xfe 2 bytes
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x90
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xa0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xb0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 // 0xf0
};
-static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"
+// the default is "characterEncoding=XMLNode::encoding_UTF8"
+static const char *XML_ByteTable = (const char *)XML_utf8ByteTable;
#endif
XMLNode XMLNode::emptyXMLNode;
-XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
-XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
+XMLClear XMLNode::emptyXMLClear = { NULL, NULL, NULL};
+XMLAttribute XMLNode::emptyXMLAttribute = { NULL, NULL};
// Enumeration used to decipher what type a token is
-typedef enum XMLTokenTypeTag
-{
+typedef enum XMLTokenTypeTag {
eTokenText = 0,
eTokenQuotedText,
eTokenTagStart, /* "<" */
@@ -563,8 +722,7 @@ typedef enum XMLTokenTypeTag
} XMLTokenType;
// Main structure used for parsing XML
-typedef struct XML
-{
+typedef struct XML {
XMLCSTR lpXML;
XMLCSTR lpszText;
int nIndex,nIndexMissigEndTag;
@@ -576,15 +734,13 @@ typedef struct XML
int nFirst;
} XML;
-typedef struct
-{
+typedef struct {
ALLXMLClearTag *pClr;
XMLCSTR pStr;
} NextToken;
// Enumeration used when parsing attributes
-typedef enum Attrib
-{
+typedef enum Attrib {
eAttribName = 0,
eAttribEquals,
eAttribValue
@@ -592,118 +748,126 @@ typedef enum Attrib
// Enumeration used when parsing elements to dictate whether we are currently
// inside a tag
-typedef enum Status
-{
+typedef enum Status {
eInsideTag = 0,
eOutsideTag
} Status;
-XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
-{
+XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const {
if (!d) return eXMLErrorNone;
- FILE *f=xfopen(filename,_CXML("wb"));
+ FILE *f = xfopen(filename, _CXML("wb"));
if (!f) return eXMLErrorCannotOpenWriteFile;
#ifdef _XMLWIDECHAR
- unsigned char h[2]={ 0xFF, 0xFE };
- if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;
- if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
- {
- if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f))
+ unsigned char h[2] = { 0xFF, 0xFE };
+ if (!fwrite(h, 2, 1, f)) return eXMLErrorCannotWriteFile;
+ if ((!isDeclaration()) && ((d->lpszName) ||
+ (!getChildNode().isDeclaration()))) {
+ if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",
+ sizeof(wchar_t)*40, 1, f))
return eXMLErrorCannotWriteFile;
}
#else
- if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
- {
- if (characterEncoding==char_encoding_UTF8)
- {
+ if ((!isDeclaration()) && ((d->lpszName) ||
+ (!getChildNode().isDeclaration()))) {
+ if (characterEncoding == char_encoding_UTF8) {
// header so that windows recognize the file as UTF-8:
- unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
- encoding="utf-8";
- } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS";
-
- if (!encoding) encoding="ISO-8859-1";
- if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;
- } else
- {
- if (characterEncoding==char_encoding_UTF8)
- {
- unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
+ unsigned char h[3] = {0xEF, 0xBB, 0xBF};
+ if (!fwrite(h, 3, 1, f)) return eXMLErrorCannotWriteFile;
+ encoding = "utf-8";
+ } else if (characterEncoding == char_encoding_ShiftJIS)
+ encoding = "SHIFT-JIS";
+
+ if (!encoding) encoding = "ISO-8859-1";
+ if (fprintf(f, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", encoding)
+ < 0)
+ return eXMLErrorCannotWriteFile;
+ } else {
+ if (characterEncoding == char_encoding_UTF8) {
+ unsigned char h[3] = {0xEF, 0xBB, 0xBF};
+ if (!fwrite(h, 3, 1, f)) return eXMLErrorCannotWriteFile;
}
}
#endif
int i;
- XMLSTR t=createXMLString(nFormat,&i);
- if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;
- if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
+ XMLSTR t = createXMLString(nFormat, &i);
+ if (!fwrite(t, sizeof(XMLCHAR)*i, 1, f)) return eXMLErrorCannotWriteFile;
+ if (fclose(f) != 0) return eXMLErrorCannotWriteFile;
free(t);
return eXMLErrorNone;
}
// Duplicate a given string.
-XMLSTR stringDup(XMLCSTR lpszData, int cbData)
-{
- if (lpszData==NULL) return NULL;
+XMLSTR stringDup(XMLCSTR lpszData, int cbData) {
+ if (lpszData == NULL) return NULL;
XMLSTR lpszNew;
- if (cbData==-1) cbData=(int)xstrlen(lpszData);
- lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
- if (lpszNew)
- {
+ if (cbData == -1) cbData = (int)xstrlen(lpszData);
+ lpszNew = (XMLSTR)malloc((cbData + 1) * sizeof(XMLCHAR));
+ if (lpszNew) {
memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
lpszNew[cbData] = (XMLCHAR)NULL;
}
return lpszNew;
}
-XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source)
-{
- XMLSTR dd=dest;
+XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest, XMLCSTR source) {
+ XMLSTR dd = dest;
XMLCHAR ch;
XMLCharacterEntity *entity;
- while ((ch=*source))
- {
- entity=XMLEntities;
- do
- {
- if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
+ while ((ch = *source)) {
+ entity = XMLEntities;
+ do {
+ if (ch == entity->c) {
+ xstrcpy(dest, entity->s);
+ dest += entity->l;
+ source++;
+ goto out_of_loop1;
+ }
entity++;
- } while(entity->s);
+ } while (entity->s);
#ifdef _XMLWIDECHAR
- *(dest++)=*(source++);
+ *(dest++) = *(source++);
#else
- switch(XML_ByteTable[(unsigned char)ch])
- {
- case 4: *(dest++)=*(source++);
- case 3: *(dest++)=*(source++);
- case 2: *(dest++)=*(source++);
- case 1: *(dest++)=*(source++);
+ switch (XML_ByteTable[(unsigned char)ch]) {
+ case 4:
+ *(dest++) = *(source++);
+ case 3:
+ *(dest++) = *(source++);
+ case 2:
+ *(dest++) = *(source++);
+ case 1:
+ *(dest++) = *(source++);
}
#endif
out_of_loop1:
;
}
- *dest=0;
+ *dest = 0;
return dd;
}
// private (used while rendering):
-int ToXMLStringTool::lengthXMLString(XMLCSTR source)
-{
- int r=0;
+int ToXMLStringTool::lengthXMLString(XMLCSTR source) {
+ int r = 0;
XMLCharacterEntity *entity;
XMLCHAR ch;
- while ((ch=*source))
- {
- entity=XMLEntities;
- do
- {
- if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
+ while ((ch = *source)) {
+ entity = XMLEntities;
+ do {
+ if (ch == entity->c) {
+ r += entity->l;
+ source++;
+ goto out_of_loop1;
+ }
entity++;
- } while(entity->s);
+ } while (entity->s);
#ifdef _XMLWIDECHAR
- r++; source++;
+ r++;
+ source++;
#else
- ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
+ ch = XML_ByteTable[(unsigned char)ch];
+ r += ch;
+ source += ch;
#endif
out_of_loop1:
;
@@ -711,18 +875,25 @@ out_of_loop1:
return r;
}
-ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }
-void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
-XMLSTR ToXMLStringTool::toXML(XMLCSTR source)
-{
- int l=lengthXMLString(source)+1;
- if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); }
- return toXMLUnSafe(buf,source);
+ToXMLStringTool::~ToXMLStringTool() {
+ freeBuffer();
+}
+void ToXMLStringTool::freeBuffer() {
+ if (buf) free(buf);
+ buf = NULL;
+ buflen = 0;
+}
+XMLSTR ToXMLStringTool::toXML(XMLCSTR source) {
+ int l = lengthXMLString(source) + 1;
+ if (l > buflen) {
+ buflen = l;
+ buf = (XMLSTR)realloc(buf, l * sizeof(XMLCHAR));
+ }
+ return toXMLUnSafe(buf, source);
}
// private:
-XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
-{
+XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) {
// This function is the opposite of the function "toXMLString". It decodes the escape
// sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
// &,",',<,>. This function is used internally by the XML Parser. All the calls to
@@ -732,108 +903,134 @@ XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
// out: new allocated string converted from xml
if (!s) return NULL;
- int ll=0,j;
+ int ll = 0, j;
XMLSTR d;
- XMLCSTR ss=s;
+ XMLCSTR ss = s;
XMLCharacterEntity *entity;
- while ((lo>0)&&(*s))
- {
- if (*s==_CXML('&'))
- {
- if ((lo>2)&&(s[1]==_CXML('#')))
- {
- s+=2; lo-=2;
- if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; }
- while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++;
- if (*s!=_CXML(';'))
- {
- pXML->error=eXMLErrorUnknownCharacterEntity;
+ while ((lo > 0) && (*s)) {
+ if (*s == _CXML('&')) {
+ if ((lo > 2) && (s[1] == _CXML('#'))) {
+ s += 2;
+ lo -= 2;
+ if ((*s == _CXML('X')) || (*s == _CXML('x'))) {
+ s++;
+ lo--;
+ }
+ while ((*s) && (*s != _CXML(';')) && ((lo--) > 0)) {
+ s++;
+ }
+ if (*s != _CXML(';')) {
+ pXML->error = eXMLErrorUnknownCharacterEntity;
return NULL;
}
- s++; lo--;
- } else
- {
- entity=XMLEntities;
- do
- {
- if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
+ s++;
+ lo--;
+ } else {
+ entity = XMLEntities;
+ do {
+ if ((lo >= entity->l) &&
+ (xstrnicmp(s, entity->s, entity->l) == 0)) {
+ s += entity->l;
+ lo -= entity->l;
+ break;
+ }
entity++;
- } while(entity->s);
- if (!entity->s)
- {
- pXML->error=eXMLErrorUnknownCharacterEntity;
+ } while (entity->s);
+ if (!entity->s) {
+ pXML->error = eXMLErrorUnknownCharacterEntity;
return NULL;
}
}
- } else
- {
+ } else {
#ifdef _XMLWIDECHAR
- s++; lo--;
+ s++;
+ lo--;
#else
- j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
+ j = XML_ByteTable[(unsigned char)*s];
+ s += j;
+ lo -= j;
+ ll += j - 1;
#endif
}
ll++;
}
- d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
- s=d;
- while (ll-->0)
- {
- if (*ss==_CXML('&'))
- {
- if (ss[1]==_CXML('#'))
- {
- ss+=2; j=0;
- if ((*ss==_CXML('X'))||(*ss==_CXML('x')))
- {
+ d = (XMLSTR)malloc((ll + 1) * sizeof(XMLCHAR));
+ s = d;
+ while (ll-- > 0) {
+ if (*ss == _CXML('&')) {
+ if (ss[1] == _CXML('#')) {
+ ss += 2;
+ j = 0;
+ if ((*ss == _CXML('X')) || (*ss == _CXML('x'))) {
ss++;
- while (*ss!=_CXML(';'))
- {
- if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0');
- else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10;
- else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10;
- else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
+ while (*ss != _CXML(';')) {
+ if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) {
+ j = (j << 4) + *ss - _CXML('0');
+ } else if ((*ss >= _CXML('A')) && (*ss <= _CXML('F'))) {
+ j = (j << 4) + *ss - _CXML('A') + 10;
+ } else if ((*ss >= _CXML('a')) && (*ss <= _CXML('f'))) {
+ j = (j << 4) + *ss - _CXML('a') + 10;
+ } else {
+ free((void*)s);
+ pXML->error = eXMLErrorUnknownCharacterEntity;
+ return NULL;
+ }
ss++;
}
- } else
- {
- while (*ss!=_CXML(';'))
- {
- if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0');
- else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
+ } else {
+ while (*ss != _CXML(';')) {
+ if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) {
+ j = (j * 10) + *ss - _CXML('0');
+ } else {
+ free((void*)s);
+ pXML->error = eXMLErrorUnknownCharacterEntity;
+ return NULL;
+ }
ss++;
}
}
#ifndef _XMLWIDECHAR
- if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;}
+ if (j > 255) {
+ free((void*)s);
+ pXML->error = eXMLErrorCharacterCodeAbove255;
+ return NULL;
+ }
#endif
- (*d++)=(XMLCHAR)j; ss++;
- } else
- {
- entity=XMLEntities;
- do
- {
- if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
+ (*d++) = (XMLCHAR)j;
+ ss++;
+ } else {
+ entity = XMLEntities;
+ do {
+ if (xstrnicmp(ss, entity->s, entity->l) == 0) {
+ *(d++) = entity->c;
+ ss += entity->l;
+ break;
+ }
entity++;
- } while(entity->s);
+ } while (entity->s);
}
- } else
- {
+ } else {
#ifdef _XMLWIDECHAR
- *(d++)=*(ss++);
+ *(d++) = *(ss++);
#else
- switch(XML_ByteTable[(unsigned char)*ss])
- {
- case 4: *(d++)=*(ss++); ll--;
- case 3: *(d++)=*(ss++); ll--;
- case 2: *(d++)=*(ss++); ll--;
- case 1: *(d++)=*(ss++);
+ switch (XML_ByteTable[(unsigned char)*ss]) {
+ case 4:
+ *(d++) = *(ss++);
+ ll--;
+ case 3:
+ *(d++) = *(ss++);
+ ll--;
+ case 2:
+ *(d++) = *(ss++);
+ ll--;
+ case 1:
+ *(d++) = *(ss++);
}
#endif
}
}
- *d=0;
+ *d = 0;
return (XMLSTR)s;
}
@@ -846,66 +1043,64 @@ char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
// return 1 if different
{
if (!cclose) return 1;
- int l=(int)xstrlen(cclose);
- if (xstrnicmp(cclose, copen, l)!=0) return 1;
- const XMLCHAR c=copen[l];
- if (XML_isSPACECHAR(c)||
- (c==_CXML('/' ))||
- (c==_CXML('<' ))||
- (c==_CXML('>' ))||
- (c==_CXML('=' ))) return 0;
+ int l = (int)xstrlen(cclose);
+ if (xstrnicmp(cclose, copen, l) != 0) return 1;
+ const XMLCHAR c = copen[l];
+ if (XML_isSPACECHAR(c) ||
+ (c == _CXML('/' )) ||
+ (c == _CXML('<' )) ||
+ (c == _CXML('>' )) ||
+ (c == _CXML('=' ))) return 0;
return 1;
}
// Obtain the next character from the string.
-static inline XMLCHAR getNextChar(XML *pXML)
-{
+static inline XMLCHAR getNextChar(XML *pXML) {
XMLCHAR ch = pXML->lpXML[pXML->nIndex];
#ifdef _XMLWIDECHAR
- if (ch!=0) pXML->nIndex++;
+ if (ch != 0) pXML->nIndex++;
#else
- pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
+ pXML->nIndex += XML_ByteTable[(unsigned char)ch];
#endif
return ch;
}
// Find the next token in a string.
// pcbToken contains the number of characters that have been read.
-static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
-{
+static NextToken GetNextToken(XML *pXML, int *pcbToken,
+ enum XMLTokenTypeTag *pType) {
NextToken result;
XMLCHAR ch;
XMLCHAR chTemp;
- int indexStart,nFoundMatch,nIsText=FALSE;
- result.pClr=NULL; // prevent warning
+ int indexStart, nFoundMatch, nIsText = FALSE;
+ result.pClr = NULL; // prevent warning
// Find next non-white space character
- do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
+ do {
+ indexStart = pXML->nIndex;
+ ch = getNextChar(pXML);
+ } while XML_isSPACECHAR(ch);
- if (ch)
- {
+ if (ch) {
// Cache the current string pointer
result.pStr = &pXML->lpXML[indexStart];
// First check whether the token is in the clear tag list (meaning it
// does not need formatting).
- ALLXMLClearTag *ctag=XMLClearTags;
- do
- {
- if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
- {
- result.pClr=ctag;
- pXML->nIndex+=ctag->openTagLen-1;
- *pType=eTokenClear;
+ ALLXMLClearTag *ctag = XMLClearTags;
+ do {
+ if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen) == 0) {
+ result.pClr = ctag;
+ pXML->nIndex += ctag->openTagLen - 1;
+ *pType = eTokenClear;
return result;
}
ctag++;
- } while(ctag->lpszOpen);
+ } while (ctag->lpszOpen);
// If we didn't find a clear tag then check for standard tokens
- switch(ch)
- {
- // Check for quotes
+ switch (ch) {
+ // Check for quotes
case _CXML('\''):
case _CXML('\"'):
// Type of token
@@ -916,17 +1111,20 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT
nFoundMatch = FALSE;
// Search through the string to find a matching quote
- while((ch = getNextChar(pXML)))
- {
- if (ch==chTemp) { nFoundMatch = TRUE; break; }
- if (ch==_CXML('<')) break;
+ while ((ch = getNextChar(pXML))) {
+ if (ch == chTemp) {
+ nFoundMatch = TRUE;
+ break;
+ }
+ if (ch == _CXML('<')) {
+ break;
+ }
}
// If we failed to find a matching quote
- if (nFoundMatch == FALSE)
- {
- pXML->nIndex=indexStart+1;
- nIsText=TRUE;
+ if (nFoundMatch == FALSE) {
+ pXML->nIndex = indexStart + 1;
+ nIsText = TRUE;
break;
}
@@ -935,17 +1133,17 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT
break;
- // Equals (used with attribute values)
+ // Equals (used with attribute values)
case _CXML('='):
*pType = eTokenEquals;
break;
- // Close tag
+ // Close tag
case _CXML('>'):
*pType = eTokenCloseTag;
break;
- // Check for tag start and tag end
+ // Check for tag start and tag end
case _CXML('<'):
// Peek at the next character to see if we have an end tag '</',
@@ -953,16 +1151,14 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT
chTemp = pXML->lpXML[pXML->nIndex];
// If we have a tag end...
- if (chTemp == _CXML('/'))
- {
+ if (chTemp == _CXML('/')) {
// Set the type and ensure we point at the next character
getNextChar(pXML);
*pType = eTokenTagEnd;
}
// If we have an XML declaration tag
- else if (chTemp == _CXML('?'))
- {
+ else if (chTemp == _CXML('?')) {
// Set the type and ensure we point at the next character
getNextChar(pXML);
@@ -970,21 +1166,19 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT
}
// Otherwise we must have a start tag
- else
- {
+ else {
*pType = eTokenTagStart;
}
break;
- // Check to see if we have a short hand type end tag ('/>').
+ // Check to see if we have a short hand type end tag ('/>').
case _CXML('/'):
// Peek at the next character to see if we have a short end tag '/>'
chTemp = pXML->lpXML[pXML->nIndex];
// If we have a short hand end tag...
- if (chTemp == _CXML('>'))
- {
+ if (chTemp == _CXML('>')) {
// Set the type and ensure we point at the next character
getNextChar(pXML);
*pType = eTokenShortHandClose;
@@ -994,65 +1188,69 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT
// If we haven't found a short hand closing tag then drop into the
// text process
- // Other characters
+ // Other characters
default:
nIsText = TRUE;
}
// If this is a TEXT node
- if (nIsText)
- {
+ if (nIsText) {
// Indicate we are dealing with text
*pType = eTokenText;
- while((ch = getNextChar(pXML)))
- {
- if XML_isSPACECHAR(ch)
- {
- indexStart++; break;
-
- } else if (ch==_CXML('/'))
- {
+ while ((ch = getNextChar(pXML))) {
+ if XML_isSPACECHAR(ch) {
+ indexStart++;
+ break;
+
+ } else if (ch == _CXML('/')) {
// If we find a slash then this maybe text or a short hand end tag
// Peek at the next character to see it we have short hand end tag
- ch=pXML->lpXML[pXML->nIndex];
+ ch = pXML->lpXML[pXML->nIndex];
// If we found a short hand end tag then we need to exit the loop
- if (ch==_CXML('>')) { pXML->nIndex--; break; }
+ if (ch == _CXML('>')) {
+ pXML->nIndex--;
+ break;
+ }
- } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('=')))
- {
- pXML->nIndex--; break;
+ } else if ((ch == _CXML('<')) || (ch == _CXML('>')) ||
+ (ch == _CXML('='))) {
+ pXML->nIndex--;
+ break;
}
}
}
- *pcbToken = pXML->nIndex-indexStart;
- } else
- {
+ *pcbToken = pXML->nIndex - indexStart;
+ } else {
// If we failed to obtain a valid character
*pcbToken = 0;
*pType = eTokenError;
- result.pStr=NULL;
+ result.pStr = NULL;
}
return result;
}
-XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName)
-{
- if (!d) { free(lpszName); return NULL; }
- if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
- d->lpszName=lpszName;
+XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) {
+ if (!d) {
+ free(lpszName);
+ return NULL;
+ }
+ if (d->lpszName && (lpszName != d->lpszName)) free((void*)d->lpszName);
+ d->lpszName = lpszName;
return lpszName;
}
// private:
-XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; }
-XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration)
-{
- d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
- d->ref_count=1;
+XMLNode::XMLNode(struct XMLNodeDataTag *p) {
+ d = p;
+ (p->ref_count)++;
+}
+XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) {
+ d = (XMLNodeData*)malloc(sizeof(XMLNodeData));
+ d->ref_count = 1;
- d->lpszName=NULL;
- d->nChild= 0;
+ d->lpszName = NULL;
+ d->nChild = 0;
d->nText = 0;
d->nClear = 0;
d->nAttribute = 0;
@@ -1060,25 +1258,35 @@ XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration)
d->isDeclaration = isDeclaration;
d->pParent = pParent;
- d->pChild= NULL;
- d->pText= NULL;
- d->pClear= NULL;
- d->pAttribute= NULL;
- d->pOrder= NULL;
+ d->pChild = NULL;
+ d->pText = NULL;
+ d->pClear = NULL;
+ d->pAttribute = NULL;
+ d->pOrder = NULL;
updateName_WOSD(lpszName);
}
-XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
-XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
+XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) {
+ return XMLNode(NULL, lpszName, isDeclaration);
+}
+XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) {
+ return XMLNode(NULL, stringDup(lpszName), isDeclaration);
+}
#define MEMORYINCREASE 50
-static inline void myFree(void *p) { if (p) free(p); }
-static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
-{
- if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
- if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
+static inline void myFree(void *p) {
+ if (p) free(p);
+}
+static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) {
+ if (p == NULL) {
+ if (memInc) return malloc(memInc*sizeofElem);
+ return malloc(sizeofElem);
+ }
+ if ((memInc == 0) || ((newsize % memInc) == 0)) {
+ p = realloc(p, (newsize + memInc) * sizeofElem);
+ }
// if (!p)
// {
// printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
@@ -1087,20 +1295,23 @@ static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
}
// private:
-XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype)
-{
- if (index<0) return -1;
- int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i;
+XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index,
+ XMLElementType xxtype) {
+ if (index < 0) return -1;
+ int i = 0, j = (int)((index << 2) + xxtype), *o = d->pOrder;
+ while (o[i] != j) i++;
+ return i;
}
// private:
// update "order" information when deleting a content of a XMLNode
-int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
-{
- int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t);
- memmove(o+i, o+i+1, (n-i)*sizeof(int));
- for (;i<n;i++)
- if ((o[i]&3)==(int)t) o[i]-=4;
+int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) {
+ int n = d->nChild + d->nText + d->nClear;
+ int *o = d->pOrder;
+ int i = findPosition(d, index, t);
+ memmove(o + i, o + i + 1, (n - i)*sizeof(int));
+ for (; i < n; i++)
+ if ((o[i]&3) == (int)t) o[i] -= 4;
// We should normally do:
// d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
// but we skip reallocation because it's too time consuming.
@@ -1108,51 +1319,67 @@ int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
return i;
}
-void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype)
-{
+void *XMLNode::addToOrder(int memoryIncrease, int *_pos, int nc, void *p,
+ int size, XMLElementType xtype) {
// in: *_pos is the position inside d->pOrder ("-1" means "EndOf")
// out: *_pos is the index inside p
- p=myRealloc(p,(nc+1),memoryIncrease,size);
- int n=d->nChild+d->nText+d->nClear;
- d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
- int pos=*_pos,*o=d->pOrder;
-
- if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
+ p = myRealloc(p, (nc + 1), memoryIncrease, size);
+ int n = d->nChild + d->nText + d->nClear;
+ d->pOrder = (int*)myRealloc(d->pOrder, n + 1, memoryIncrease * 3,
+ sizeof(int));
+ int pos = *_pos, *o = d->pOrder;
+
+ if ((pos < 0) || (pos >= n)) {
+ *_pos = nc;
+ o[n] = (int)((nc << 2) + xtype);
+ return p;
+ }
- int i=pos;
- memmove(o+i+1, o+i, (n-i)*sizeof(int));
+ int i = pos;
+ memmove(o + i + 1, o + i, (n - i)*sizeof(int));
- while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++;
- if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
+ while ((pos < n) && ((o[pos]&3) != (int)xtype)) pos++;
+ if (pos == n) {
+ *_pos = nc;
+ o[n] = (int)((nc << 2) + xtype);
+ return p;
+ }
- o[i]=o[pos];
- for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4;
+ o[i] = o[pos];
+ for (i = pos + 1; i <= n; i++) if ((o[i]&3) == (int)xtype) o[i] += 4;
- *_pos=pos=o[pos]>>2;
- memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size);
+ *_pos = pos = o[pos] >> 2;
+ memmove(((char*)p) + (pos + 1)*size, ((char*)p) + pos*size, (nc - pos)*size);
return p;
}
// Add a child node to the given element.
-XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos)
-{
+XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName,
+ char isDeclaration, int pos) {
if (!lpszName) return emptyXMLNode;
- d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
- d->pChild[pos].d=NULL;
- d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);
+ d->pChild = (XMLNode*)addToOrder(memoryIncrease, &pos, d->nChild,
+ d->pChild, sizeof(XMLNode), eNodeChild);
+ d->pChild[pos].d = NULL;
+ d->pChild[pos] = XMLNode(d, lpszName, isDeclaration);
d->nChild++;
return d->pChild[pos];
}
// Add an attribute to an element.
-XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev)
-{
+XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease, XMLSTR lpszName,
+ XMLSTR lpszValuev) {
if (!lpszName) return &emptyXMLAttribute;
- if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; }
- int nc=d->nAttribute;
- d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));
- XMLAttribute *pAttr=d->pAttribute+nc;
+ if (!d) {
+ myFree(lpszName);
+ myFree(lpszValuev);
+ return &emptyXMLAttribute;
+ }
+ int nc = d->nAttribute;
+ d->pAttribute = (XMLAttribute*)myRealloc(d->pAttribute, (nc + 1),
+ memoryIncrease,
+ sizeof(XMLAttribute));
+ XMLAttribute *pAttr = d->pAttribute + nc;
pAttr->lpszName = lpszName;
pAttr->lpszValue = lpszValuev;
d->nAttribute++;
@@ -1160,26 +1387,35 @@ XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XML
}
// Add text to the element.
-XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos)
-{
+XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) {
if (!lpszValue) return NULL;
- if (!d) { myFree(lpszValue); return NULL; }
- d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);
- d->pText[pos]=lpszValue;
+ if (!d) {
+ myFree(lpszValue);
+ return NULL;
+ }
+ d->pText = (XMLCSTR*)addToOrder(memoryIncrease, &pos, d->nText, d->pText,
+ sizeof(XMLSTR), eNodeText);
+ d->pText[pos] = lpszValue;
d->nText++;
return lpszValue;
}
// Add clear (unformatted) text to the element.
-XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos)
-{
+XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue,
+ XMLCSTR lpszOpen, XMLCSTR lpszClose,
+ int pos) {
if (!lpszValue) return &emptyXMLClear;
- if (!d) { myFree(lpszValue); return &emptyXMLClear; }
- d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);
- XMLClear *pNewClear=d->pClear+pos;
+ if (!d) {
+ myFree(lpszValue);
+ return &emptyXMLClear;
+ }
+ d->pClear = (XMLClear *)addToOrder(memoryIncrease, &pos, d->nClear,
+ d->pClear, sizeof(XMLClear),
+ eNodeClear);
+ XMLClear *pNewClear = d->pClear + pos;
pNewClear->lpszValue = lpszValue;
- if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen;
- if (!lpszClose) lpszClose=XMLClearTags->lpszClose;
+ if (!lpszOpen) lpszOpen = XMLClearTags->lpszOpen;
+ if (!lpszClose) lpszClose = XMLClearTags->lpszClose;
pNewClear->lpszOpenTag = lpszOpen;
pNewClear->lpszCloseTag = lpszClose;
d->nClear++;
@@ -1188,41 +1424,44 @@ XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR l
// private:
// Parse a clear (unformatted) type node.
-char XMLNode::parseClearTag(void *px, void *_pClear)
-{
- XML *pXML=(XML *)px;
- ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear);
- int cbTemp=0;
- XMLCSTR lpszTemp=NULL;
- XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
- static XMLCSTR docTypeEnd=_CXML("]>");
+char XMLNode::parseClearTag(void *px, void *_pClear) {
+ XML *pXML = (XML *)px;
+ ALLXMLClearTag pClear = *((ALLXMLClearTag*)_pClear);
+ int cbTemp = 0;
+ XMLCSTR lpszTemp = NULL;
+ XMLCSTR lpXML = &pXML->lpXML[pXML->nIndex];
+ static XMLCSTR docTypeEnd = _CXML("]>");
// Find the closing tag
// Seems the <!DOCTYPE need a better treatment so lets handle it
- if (pClear.lpszOpen==XMLClearTags[1].lpszOpen)
- {
- XMLCSTR pCh=lpXML;
- while (*pCh)
- {
- if (*pCh==_CXML('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; }
- else if (*pCh==_CXML('>')) { lpszTemp=pCh; break; }
+ if (pClear.lpszOpen == XMLClearTags[1].lpszOpen) {
+ XMLCSTR pCh = lpXML;
+ while (*pCh) {
+ if (*pCh == _CXML('<')) {
+ pClear.lpszClose = docTypeEnd;
+ lpszTemp = xstrstr(lpXML, docTypeEnd);
+ break;
+ } else if (*pCh == _CXML('>')) {
+ lpszTemp = pCh;
+ break;
+ }
#ifdef _XMLWIDECHAR
pCh++;
#else
- pCh+=XML_ByteTable[(unsigned char)(*pCh)];
+ pCh += XML_ByteTable[(unsigned char)(*pCh)];
#endif
}
- } else lpszTemp=xstrstr(lpXML, pClear.lpszClose);
+ } else lpszTemp = xstrstr(lpXML, pClear.lpszClose);
- if (lpszTemp)
- {
+ if (lpszTemp) {
// Cache the size and increment the index
cbTemp = (int)(lpszTemp - lpXML);
- pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose);
+ pXML->nIndex += cbTemp + (int)xstrlen(pClear.lpszClose);
// Add the clear node to the current element
- addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1);
+ addClear_priv(MEMORYINCREASE, stringDup(lpXML, cbTemp),
+ pClear.lpszOpen, pClear.lpszClose, -1);
return 0;
}
@@ -1231,63 +1470,81 @@ char XMLNode::parseClearTag(void *px, void *_pClear)
return 1;
}
-void XMLNode::exactMemory(XMLNodeData *d)
-{
- if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int));
- if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
- if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
- if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
- if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
+void XMLNode::exactMemory(XMLNodeData *d) {
+ if (d->pOrder) {
+ d->pOrder = (int*)realloc(d->pOrder, (d->nChild + d->nText + d->nClear)
+ * sizeof(int));
+ }
+ if (d->pChild) {
+ d->pChild = (XMLNode*)realloc(d->pChild, d->nChild * sizeof(XMLNode));
+ }
+ if (d->pAttribute) {
+ d->pAttribute = (XMLAttribute*)realloc(d->pAttribute, d->nAttribute *
+ sizeof(XMLAttribute));
+ }
+ if (d->pText) {
+ d->pText = (XMLCSTR*)realloc(d->pText, d->nText * sizeof(XMLSTR));
+ }
+ if (d->pClear) {
+ d->pClear = (XMLClear *)realloc(d->pClear, d->nClear * sizeof(XMLClear));
+ }
}
-char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr)
-{
- XML *pXML=(XML *)pa;
- XMLCSTR lpszText=pXML->lpszText;
+char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) {
+ XML *pXML = (XML *)pa;
+ XMLCSTR lpszText = pXML->lpszText;
if (!lpszText) return 0;
- if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++;
+ if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText) &&
+ (lpszText != tokenPStr)) lpszText++;
int cbText = (int)(tokenPStr - lpszText);
- if (!cbText) { pXML->lpszText=NULL; return 0; }
- if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; }
- if (!cbText) { pXML->lpszText=NULL; return 0; }
- XMLSTR lpt=fromXMLString(lpszText,cbText,pXML);
+ if (!cbText) {
+ pXML->lpszText = NULL;
+ return 0;
+ }
+ if (dropWhiteSpace) {
+ cbText--;
+ while ((cbText) && XML_isSPACECHAR(lpszText[cbText])) cbText--;
+ cbText++;
+ }
+ if (!cbText) {
+ pXML->lpszText = NULL;
+ return 0;
+ }
+ XMLSTR lpt = fromXMLString(lpszText, cbText, pXML);
if (!lpt) return 1;
- pXML->lpszText=NULL;
- if (removeCommentsInMiddleOfText && d->nText && d->nClear)
- {
+ pXML->lpszText = NULL;
+ if (removeCommentsInMiddleOfText && d->nText && d->nClear) {
// if the previous insertion was a comment (<!-- -->) AND
// if the previous previous insertion was a text then, delete the comment and append the text
- int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder;
- if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText))
- {
- int i=o[n]>>2;
- if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen)
- {
+ int n = d->nChild + d->nText + d->nClear - 1, *o = d->pOrder;
+ if (((o[n]&3) == eNodeClear) && ((o[n-1]&3) == eNodeText)) {
+ int i = o[n] >> 2;
+ if (d->pClear[i].lpszOpenTag == XMLClearTags[2].lpszOpen) {
deleteClear(i);
- i=o[n-1]>>2;
- n=xstrlen(d->pText[i]);
- int n2=xstrlen(lpt)+1;
- d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR));
+ i = o[n-1] >> 2;
+ n = xstrlen(d->pText[i]);
+ int n2 = xstrlen(lpt) + 1;
+ d->pText[i] = (XMLSTR)realloc((void*)d->pText[i], (n + n2) *
+ sizeof(XMLCHAR));
if (!d->pText[i]) return 1;
- memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR));
+ memcpy((void*)(d->pText[i] + n), lpt, n2*sizeof(XMLCHAR));
free(lpt);
return 0;
}
}
}
- addText_priv(MEMORYINCREASE,lpt,-1);
+ addText_priv(MEMORYINCREASE, lpt, -1);
return 0;
}
// private:
// Recursively parse an XML element.
-int XMLNode::ParseXMLElement(void *pa)
-{
- XML *pXML=(XML *)pa;
+int XMLNode::ParseXMLElement(void *pa) {
+ XML *pXML = (XML *)pa;
int cbToken;
enum XMLTokenTypeTag xtype;
NextToken token;
- XMLCSTR lpszTemp=NULL;
- int cbTemp=0;
+ XMLCSTR lpszTemp = NULL;
+ int cbTemp = 0;
char nDeclaration;
XMLNode pNew;
enum Status status; // inside or outside a tag
@@ -1296,36 +1553,30 @@ int XMLNode::ParseXMLElement(void *pa)
assert(pXML);
// If this is the first call to the function
- if (pXML->nFirst)
- {
+ if (pXML->nFirst) {
// Assume we are outside of a tag definition
pXML->nFirst = FALSE;
status = eOutsideTag;
- } else
- {
+ } else {
// If this is not the first call then we should only be called when inside a tag.
status = eInsideTag;
}
// Iterate through the tokens in the document
- for(;;)
- {
+ for (;;) {
// Obtain the next token
token = GetNextToken(pXML, &cbToken, &xtype);
- if (xtype != eTokenError)
- {
+ if (xtype != eTokenError) {
// Check the current status
- switch(status)
- {
+ switch (status) {
- // If we are outside of a tag definition
+ // If we are outside of a tag definition
case eOutsideTag:
// Check what type of token we obtained
- switch(xtype)
- {
- // If we have found text or quoted text
+ switch (xtype) {
+ // If we have found text or quoted text
case eTokenText:
case eTokenCloseTag: /* '>' */
case eTokenShortHandClose: /* '/>' */
@@ -1333,7 +1584,7 @@ int XMLNode::ParseXMLElement(void *pa)
case eTokenEquals:
break;
- // If we found a start tag '<' and declarations '<?'
+ // If we found a start tag '<' and declarations '<?'
case eTokenTagStart:
case eTokenDeclaration:
@@ -1341,15 +1592,14 @@ int XMLNode::ParseXMLElement(void *pa)
nDeclaration = (xtype == eTokenDeclaration);
// If we have node text then add this to the element
- if (maybeAddTxT(pXML,token.pStr)) return FALSE;
+ if (maybeAddTxT(pXML, token.pStr)) return FALSE;
// Find the name of the tag
token = GetNextToken(pXML, &cbToken, &xtype);
// Return an error if we couldn't obtain the next token or
// it wasnt text
- if (xtype != eTokenText)
- {
+ if (xtype != eTokenText) {
pXML->error = eXMLErrorMissingTagName;
return FALSE;
}
@@ -1359,8 +1609,7 @@ int XMLNode::ParseXMLElement(void *pa)
#ifdef APPROXIMATE_PARSING
if (d->lpszName &&
- myTagCompare(d->lpszName, token.pStr) == 0)
- {
+ myTagCompare(d->lpszName, token.pStr) == 0) {
// Indicate to the caller that it needs to create a
// new element.
pXML->lpNewElement = token.pStr;
@@ -1372,30 +1621,28 @@ int XMLNode::ParseXMLElement(void *pa)
// If the name of the new element differs from the name of
// the current element we need to add the new element to
// the current one and recurse
- pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1);
+ pNew = addChild_priv(MEMORYINCREASE,
+ stringDup(token.pStr, cbToken),
+ nDeclaration, -1);
- while (!pNew.isEmpty())
- {
+ while (!pNew.isEmpty()) {
// Callself to process the new node. If we return
// FALSE this means we dont have any more
// processing to do...
if (!pNew.ParseXMLElement(pXML)) return FALSE;
- else
- {
+ else {
// If the call to recurse this function
// evented in a end tag specified in XML then
// we need to unwind the calls to this
// function until we find the appropriate node
// (the element name and end tag name must
// match)
- if (pXML->cbEndTag)
- {
+ if (pXML->cbEndTag) {
// If we are back at the root node then we
// have an unmatched end tag
- if (!d->lpszName)
- {
- pXML->error=eXMLErrorUnmatchedEndTag;
+ if (!d->lpszName) {
+ pXML->error = eXMLErrorUnmatchedEndTag;
return FALSE;
}
@@ -1403,55 +1650,56 @@ int XMLNode::ParseXMLElement(void *pa)
// element then we only need to unwind
// once more...
- if (myTagCompare(d->lpszName, pXML->lpEndTag)==0)
- {
+ if (myTagCompare(d->lpszName,
+ pXML->lpEndTag) == 0) {
pXML->cbEndTag = 0;
}
return TRUE;
- } else
- if (pXML->cbNewElement)
- {
- // If the call indicated a new element is to
- // be created on THIS element.
-
- // If the name of this element matches the
- // name of the element we need to create
- // then we need to return to the caller
- // and let it process the element.
-
- if (myTagCompare(d->lpszName, pXML->lpNewElement)==0)
- {
- return TRUE;
- }
-
- // Add the new element and recurse
- pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1);
- pXML->cbNewElement = 0;
+ } else if (pXML->cbNewElement) {
+ // If the call indicated a new element is to
+ // be created on THIS element.
+
+ // If the name of this element matches the
+ // name of the element we need to create
+ // then we need to return to the caller
+ // and let it process the element.
+
+ if (myTagCompare(d->lpszName,
+ pXML->lpNewElement) == 0) {
+ return TRUE;
}
- else
- {
- // If we didn't have a new element to create
- pNew = emptyXMLNode;
- }
+ // Add the new element and recurse
+ pNew =
+ addChild_priv(MEMORYINCREASE,
+ stringDup(pXML->
+ lpNewElement,
+ pXML->
+ cbNewElement),
+ 0, -1);
+ pXML->cbNewElement = 0;
+ } else {
+ // If we didn't have a new element to create
+ pNew = emptyXMLNode;
+
+ }
}
}
}
break;
- // If we found an end tag
+ // If we found an end tag
case eTokenTagEnd:
// If we have node text then add this to the element
- if (maybeAddTxT(pXML,token.pStr)) return FALSE;
+ if (maybeAddTxT(pXML, token.pStr)) return FALSE;
// Find the name of the end tag
token = GetNextToken(pXML, &cbTemp, &xtype);
// The end tag should be text
- if (xtype != eTokenText)
- {
+ if (xtype != eTokenText) {
pXML->error = eXMLErrorMissingEndTagName;
return FALSE;
}
@@ -1459,12 +1707,11 @@ int XMLNode::ParseXMLElement(void *pa)
// After the end tag we should find a closing tag
token = GetNextToken(pXML, &cbToken, &xtype);
- if (xtype != eTokenCloseTag)
- {
+ if (xtype != eTokenCloseTag) {
pXML->error = eXMLErrorMissingEndTagName;
return FALSE;
}
- pXML->lpszText=pXML->lpXML+pXML->nIndex;
+ pXML->lpszText = pXML->lpXML + pXML->nIndex;
// We need to return to the previous caller. If the name
// of the tag cannot be found we need to keep returning to
@@ -1472,14 +1719,14 @@ int XMLNode::ParseXMLElement(void *pa)
if (myTagCompare(d->lpszName, lpszTemp) != 0)
#ifdef STRICT_PARSING
{
- pXML->error=eXMLErrorUnmatchedEndTag;
- pXML->nIndexMissigEndTag=pXML->nIndex;
+ pXML->error = eXMLErrorUnmatchedEndTag;
+ pXML->nIndexMissigEndTag = pXML->nIndex;
return FALSE;
}
#else
{
- pXML->error=eXMLErrorMissingEndTag;
- pXML->nIndexMissigEndTag=pXML->nIndex;
+ pXML->error = eXMLErrorMissingEndTag;
+ pXML->nIndexMissigEndTag = pXML->nIndex;
pXML->lpEndTag = lpszTemp;
pXML->cbEndTag = cbTemp;
}
@@ -1489,12 +1736,12 @@ int XMLNode::ParseXMLElement(void *pa)
exactMemory(d);
return TRUE;
- // If we found a clear (unformatted) token
+ // If we found a clear (unformatted) token
case eTokenClear:
// If we have node text then add this to the element
- if (maybeAddTxT(pXML,token.pStr)) return FALSE;
+ if (maybeAddTxT(pXML, token.pStr)) return FALSE;
if (parseClearTag(pXML, token.pClr)) return FALSE;
- pXML->lpszText=pXML->lpXML+pXML->nIndex;
+ pXML->lpszText = pXML->lpXML + pXML->nIndex;
break;
default:
@@ -1502,21 +1749,19 @@ int XMLNode::ParseXMLElement(void *pa)
}
break;
- // If we are inside a tag definition we need to search for attributes
+ // If we are inside a tag definition we need to search for attributes
case eInsideTag:
// Check what part of the attribute (name, equals, value) we
// are looking for.
- switch(attrib)
- {
- // If we are looking for a new attribute
+ switch (attrib) {
+ // If we are looking for a new attribute
case eAttribName:
// Check what the current token type is
- switch(xtype)
- {
- // If the current type is text...
- // Eg. 'attribute'
+ switch (xtype) {
+ // If the current type is text...
+ // Eg. 'attribute'
case eTokenText:
// Cache the token then indicate that we are next to
// look for the equals
@@ -1525,22 +1770,22 @@ int XMLNode::ParseXMLElement(void *pa)
attrib = eAttribEquals;
break;
- // If we found a closing tag...
- // Eg. '>'
+ // If we found a closing tag...
+ // Eg. '>'
case eTokenCloseTag:
// We are now outside the tag
status = eOutsideTag;
- pXML->lpszText=pXML->lpXML+pXML->nIndex;
+ pXML->lpszText = pXML->lpXML + pXML->nIndex;
break;
- // If we found a short hand '/>' closing tag then we can
- // return to the caller
+ // If we found a short hand '/>' closing tag then we can
+ // return to the caller
case eTokenShortHandClose:
exactMemory(d);
- pXML->lpszText=pXML->lpXML+pXML->nIndex;
+ pXML->lpszText = pXML->lpXML + pXML->nIndex;
return TRUE;
- // Errors...
+ // Errors...
case eTokenQuotedText: /* '"SomeText"' */
case eTokenTagStart: /* '<' */
case eTokenTagEnd: /* '</' */
@@ -1549,50 +1794,51 @@ int XMLNode::ParseXMLElement(void *pa)
case eTokenClear:
pXML->error = eXMLErrorUnexpectedToken;
return FALSE;
- default: break;
+ default:
+ break;
}
break;
- // If we are looking for an equals
+ // If we are looking for an equals
case eAttribEquals:
// Check what the current token type is
- switch(xtype)
- {
- // If the current type is text...
- // Eg. 'Attribute AnotherAttribute'
+ switch (xtype) {
+ // If the current type is text...
+ // Eg. 'Attribute AnotherAttribute'
case eTokenText:
// Add the unvalued attribute to the list
- addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL);
+ addAttribute_priv(MEMORYINCREASE,
+ stringDup(lpszTemp, cbTemp), NULL);
// Cache the token then indicate. We are next to
// look for the equals attribute
lpszTemp = token.pStr;
cbTemp = cbToken;
break;
- // If we found a closing tag 'Attribute >' or a short hand
- // closing tag 'Attribute />'
+ // If we found a closing tag 'Attribute >' or a short hand
+ // closing tag 'Attribute />'
case eTokenShortHandClose:
case eTokenCloseTag:
// If we are a declaration element '<?' then we need
// to remove extra closing '?' if it exists
- pXML->lpszText=pXML->lpXML+pXML->nIndex;
+ pXML->lpszText = pXML->lpXML + pXML->nIndex;
if (d->isDeclaration &&
- (lpszTemp[cbTemp-1]) == _CXML('?'))
- {
+ (lpszTemp[cbTemp-1]) == _CXML('?')) {
cbTemp--;
- if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose;
+ if (d->pParent && d->pParent->pParent) {
+ xtype = eTokenShortHandClose;
+ }
}
- if (cbTemp)
- {
+ if (cbTemp) {
// Add the unvalued attribute to the list
- addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL);
+ addAttribute_priv(MEMORYINCREASE,
+ stringDup(lpszTemp, cbTemp), NULL);
}
// If this is the end of the tag then return to the caller
- if (xtype == eTokenShortHandClose)
- {
+ if (xtype == eTokenShortHandClose) {
exactMemory(d);
return TRUE;
}
@@ -1601,15 +1847,15 @@ int XMLNode::ParseXMLElement(void *pa)
status = eOutsideTag;
break;
- // If we found the equals token...
- // Eg. 'Attribute ='
+ // If we found the equals token...
+ // Eg. 'Attribute ='
case eTokenEquals:
// Indicate that we next need to search for the value
// for the attribute
attrib = eAttribValue;
break;
- // Errors...
+ // Errors...
case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/
case eTokenTagStart: /* 'Attribute <' */
case eTokenTagEnd: /* 'Attribute </' */
@@ -1617,46 +1863,48 @@ int XMLNode::ParseXMLElement(void *pa)
case eTokenClear:
pXML->error = eXMLErrorUnexpectedToken;
return FALSE;
- default: break;
+ default:
+ break;
}
break;
- // If we are looking for an attribute value
+ // If we are looking for an attribute value
case eAttribValue:
// Check what the current token type is
- switch(xtype)
- {
- // If the current type is text or quoted text...
- // Eg. 'Attribute = "Value"' or 'Attribute = Value' or
- // 'Attribute = 'Value''.
+ switch (xtype) {
+ // If the current type is text or quoted text...
+ // Eg. 'Attribute = "Value"' or 'Attribute = Value' or
+ // 'Attribute = 'Value''.
case eTokenText:
case eTokenQuotedText:
// If we are a declaration element '<?' then we need
// to remove extra closing '?' if it exists
if (d->isDeclaration &&
- (token.pStr[cbToken-1]) == _CXML('?'))
- {
+ (token.pStr[cbToken-1]) == _CXML('?')) {
cbToken--;
}
- if (cbTemp)
- {
+ if (cbTemp) {
// Add the valued attribute to the list
- if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; }
- XMLSTR attrVal=(XMLSTR)token.pStr;
- if (attrVal)
- {
- attrVal=fromXMLString(attrVal,cbToken,pXML);
+ if (xtype == eTokenQuotedText) {
+ token.pStr++;
+ cbToken -= 2;
+ }
+ XMLSTR attrVal = (XMLSTR)token.pStr;
+ if (attrVal) {
+ attrVal = fromXMLString(attrVal, cbToken, pXML);
if (!attrVal) return FALSE;
}
- addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal);
+ addAttribute_priv(MEMORYINCREASE,
+ stringDup(lpszTemp, cbTemp),
+ attrVal);
}
// Indicate we are searching for a new attribute
attrib = eAttribName;
break;
- // Errors...
+ // Errors...
case eTokenTagStart: /* 'Attr = <' */
case eTokenTagEnd: /* 'Attr = </' */
case eTokenCloseTag: /* 'Attr = >' */
@@ -1667,109 +1915,104 @@ int XMLNode::ParseXMLElement(void *pa)
pXML->error = eXMLErrorUnexpectedToken;
return FALSE;
break;
- default: break;
+ default:
+ break;
}
}
}
}
// If we failed to obtain the next token
- else
- {
- if ((!d->isDeclaration)&&(d->pParent))
- {
+ else {
+ if ((!d->isDeclaration) && (d->pParent)) {
#ifdef STRICT_PARSING
- pXML->error=eXMLErrorUnmatchedEndTag;
+ pXML->error = eXMLErrorUnmatchedEndTag;
#else
- pXML->error=eXMLErrorMissingEndTag;
+ pXML->error = eXMLErrorMissingEndTag;
#endif
- pXML->nIndexMissigEndTag=pXML->nIndex;
+ pXML->nIndexMissigEndTag = pXML->nIndex;
}
- maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex);
+ maybeAddTxT(pXML, pXML->lpXML + pXML->nIndex);
return FALSE;
}
}
}
// Count the number of lines and columns in an XML string.
-static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults)
-{
+static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto,
+ XMLResults *pResults) {
XMLCHAR ch;
assert(lpXML);
assert(pResults);
- struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE };
+ struct XML xml = { lpXML, lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0,
+ TRUE };
pResults->nLine = 1;
pResults->nColumn = 1;
- while (xml.nIndex<nUpto)
- {
+ while (xml.nIndex < nUpto) {
ch = getNextChar(&xml);
if (ch != _CXML('\n')) pResults->nColumn++;
- else
- {
+ else {
pResults->nLine++;
- pResults->nColumn=1;
+ pResults->nColumn = 1;
}
}
}
// Parse XML and return the root element.
-XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults)
-{
- if (!lpszXML)
- {
- if (pResults)
- {
- pResults->error=eXMLErrorNoElements;
- pResults->nLine=0;
- pResults->nColumn=0;
+XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag,
+ XMLResults *pResults) {
+ if (!lpszXML) {
+ if (pResults) {
+ pResults->error = eXMLErrorNoElements;
+ pResults->nLine = 0;
+ pResults->nColumn = 0;
}
return emptyXMLNode;
}
- XMLNode xnode(NULL,NULL,FALSE);
- struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE };
+ XMLNode xnode(NULL, NULL, FALSE);
+ struct XML xml = { lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0,
+ TRUE };
// Create header element
xnode.ParseXMLElement(&xml);
enum XMLError error = xml.error;
- if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound;
- if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node
+ if (!xnode.nChildNode()) error = eXMLErrorNoXMLTagFound;
+ if ((xnode.nChildNode() == 1) && (xnode.nElement() == 1)) {
+ xnode = xnode.getChildNode(); // skip the empty node
+ }
// If no error occurred
- if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound))
- {
- XMLCSTR name=xnode.getName();
- if (tag&&(*tag)&&((!name)||(xstricmp(name,tag))))
- {
- xnode=xnode.getChildNode(tag);
- if (xnode.isEmpty())
- {
- if (pResults)
- {
- pResults->error=eXMLErrorFirstTagNotFound;
- pResults->nLine=0;
- pResults->nColumn=0;
+ if ((error == eXMLErrorNone) || (error == eXMLErrorMissingEndTag) ||
+ (error == eXMLErrorNoXMLTagFound)) {
+ XMLCSTR name = xnode.getName();
+ if (tag && (*tag) && ((!name) || (xstricmp(name, tag)))) {
+ xnode = xnode.getChildNode(tag);
+ if (xnode.isEmpty()) {
+ if (pResults) {
+ pResults->error = eXMLErrorFirstTagNotFound;
+ pResults->nLine = 0;
+ pResults->nColumn = 0;
}
return emptyXMLNode;
}
}
- } else
- {
+ } else {
// Cleanup: this will destroy all the nodes
xnode = emptyXMLNode;
}
// If we have been given somewhere to place results
- if (pResults)
- {
+ if (pResults) {
pResults->error = error;
// If we have an error
- if (error!=eXMLErrorNone)
- {
- if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag;
+ if (error != eXMLErrorNone) {
+ if (error == eXMLErrorMissingEndTag) {
+ xml.nIndex = xml.nIndexMissigEndTag;
+ }
// Find which line and column it starts on.
CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults);
}
@@ -1777,72 +2020,95 @@ XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults)
return xnode;
}
-XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults)
-{
- if (pResults) { pResults->nLine=0; pResults->nColumn=0; }
- FILE *f=xfopen(filename,_CXML("rb"));
- if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; }
- fseek(f,0,SEEK_END);
- int l=ftell(f),headerSz=0;
- if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; }
- fseek(f,0,SEEK_SET);
- unsigned char *buf=(unsigned char*)malloc(l+4);
- l=fread(buf,1,l,f);
+XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) {
+ if (pResults) {
+ pResults->nLine = 0;
+ pResults->nColumn = 0;
+ }
+ FILE *f = xfopen(filename, _CXML("rb"));
+ if (f == NULL) {
+ if (pResults) pResults->error = eXMLErrorFileNotFound;
+ return emptyXMLNode;
+ }
+ fseek(f, 0, SEEK_END);
+ int l = ftell(f), headerSz = 0;
+ if (!l) {
+ if (pResults) pResults->error = eXMLErrorEmpty;
+ fclose(f);
+ return emptyXMLNode;
+ }
+ fseek(f, 0, SEEK_SET);
+ unsigned char *buf = (unsigned char*)malloc(l + 4);
+ l = fread(buf, 1, l, f);
fclose(f);
- buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0;
+ buf[l] = 0;
+ buf[l+1] = 0;
+ buf[l+2] = 0;
+ buf[l+3] = 0;
#ifdef _XMLWIDECHAR
- if (guessWideCharChars)
- {
- if (!myIsTextWideChar(buf,l))
- {
- XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy;
- if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; }
- XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce);
- free(buf); buf=(unsigned char*)b2; headerSz=0;
- } else
- {
- if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2;
- if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2;
+ if (guessWideCharChars) {
+ if (!myIsTextWideChar(buf, l)) {
+ XMLNode::XMLCharEncoding ce = XMLNode::char_encoding_legacy;
+ if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) {
+ headerSz = 3;
+ ce = XMLNode::char_encoding_UTF8;
+ }
+ XMLSTR b2 = myMultiByteToWideChar((const char*)(buf + headerSz), ce);
+ free(buf);
+ buf = (unsigned char*)b2;
+ headerSz = 0;
+ } else {
+ if ((buf[0] == 0xef) && (buf[1] == 0xff)) headerSz = 2;
+ if ((buf[0] == 0xff) && (buf[1] == 0xfe)) headerSz = 2;
}
}
#else
- if (guessWideCharChars)
- {
- if (myIsTextWideChar(buf,l))
- {
- if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2;
- if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2;
- char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz));
- free(buf); buf=(unsigned char*)b2; headerSz=0;
- } else
- {
- if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3;
+ if (guessWideCharChars) {
+ if (myIsTextWideChar(buf, l)) {
+ if ((buf[0] == 0xef) && (buf[1] == 0xff)) headerSz = 2;
+ if ((buf[0] == 0xff) && (buf[1] == 0xfe)) headerSz = 2;
+ char *b2 = myWideCharToMultiByte((const wchar_t*)(buf + headerSz));
+ free(buf);
+ buf = (unsigned char*)b2;
+ headerSz = 0;
+ } else {
+ if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) {
+ headerSz = 3;
+ }
}
}
#endif
- if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; }
- XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults);
+ if (!buf) {
+ if (pResults) pResults->error = eXMLErrorCharConversionError;
+ return emptyXMLNode;
+ }
+ XMLNode x = parseString((XMLSTR)(buf + headerSz), tag, pResults);
free(buf);
return x;
}
-static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; }
+static inline void charmemset(XMLSTR dest, XMLCHAR c, int l) {
+ while (l--) *(dest++) = c;
+}
// private:
// Creates an user friendly XML string from a given element with
// appropriate white space and carriage returns.
//
// This recurses through all subnodes then adds contents of the nodes to the
// string.
-int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat)
-{
+int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker,
+ int nFormat) {
int nResult = 0;
- int cb=nFormat<0?0:nFormat;
+ int cb = nFormat < 0 ? 0 : nFormat;
int cbElement;
- int nChildFormat=-1;
- int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear;
- int i,j;
- if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2;
+ int nChildFormat = -1;
+ int nElementI = pEntry->nChild + pEntry->nText + pEntry->nClear;
+ int i, j;
+ if ((nFormat >= 0) && (nElementI == 1) && (pEntry->nText == 1) &&
+ (!pEntry->isDeclaration)) {
+ nFormat = -2;
+ }
assert(pEntry);
@@ -1851,47 +2117,43 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma
// If the element has no name then assume this is the head node.
cbElement = (int)LENSTR(pEntry->lpszName);
- if (cbElement)
- {
+ if (cbElement) {
// "<elementname "
- if (lpszMarker)
- {
+ if (lpszMarker) {
if (cb) charmemset(lpszMarker, INDENTCHAR, cb);
nResult = cb;
- lpszMarker[nResult++]=_CXML('<');
- if (pEntry->isDeclaration) lpszMarker[nResult++]=_CXML('?');
+ lpszMarker[nResult++] = _CXML('<');
+ if (pEntry->isDeclaration) lpszMarker[nResult++] = _CXML('?');
xstrcpy(&lpszMarker[nResult], pEntry->lpszName);
- nResult+=cbElement;
- lpszMarker[nResult++]=_CXML(' ');
+ nResult += cbElement;
+ lpszMarker[nResult++] = _CXML(' ');
- } else
- {
- nResult+=cbElement+2+cb;
+ } else {
+ nResult += cbElement + 2 + cb;
if (pEntry->isDeclaration) nResult++;
}
// Enumerate attributes and add them to the string
- XMLAttribute *pAttr=pEntry->pAttribute;
- for (i=0; i<pEntry->nAttribute; i++)
- {
+ XMLAttribute *pAttr = pEntry->pAttribute;
+ for (i = 0; i < pEntry->nAttribute; i++) {
// "Attrib
cb = (int)LENSTR(pAttr->lpszName);
- if (cb)
- {
+ if (cb) {
if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName);
nResult += cb;
// "Attrib=Value "
- if (pAttr->lpszValue)
- {
- cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue);
- if (lpszMarker)
- {
- lpszMarker[nResult]=_CXML('=');
- lpszMarker[nResult+1]=_CXML('"');
- if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue);
- lpszMarker[nResult+cb+2]=_CXML('"');
+ if (pAttr->lpszValue) {
+ cb = (int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue);
+ if (lpszMarker) {
+ lpszMarker[nResult] = _CXML('=');
+ lpszMarker[nResult+1] = _CXML('"');
+ if (cb) {
+ ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],
+ pAttr->lpszValue);
+ }
+ lpszMarker[nResult+cb+2] = _CXML('"');
}
- nResult+=cb+3;
+ nResult += cb + 3;
}
if (lpszMarker) lpszMarker[nResult] = _CXML(' ');
nResult++;
@@ -1899,27 +2161,22 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma
pAttr++;
}
- if (pEntry->isDeclaration)
- {
- if (lpszMarker)
- {
- lpszMarker[nResult-1]=_CXML('?');
- lpszMarker[nResult]=_CXML('>');
+ if (pEntry->isDeclaration) {
+ if (lpszMarker) {
+ lpszMarker[nResult-1] = _CXML('?');
+ lpszMarker[nResult] = _CXML('>');
}
nResult++;
- if (nFormat!=-1)
- {
- if (lpszMarker) lpszMarker[nResult]=_CXML('\n');
+ if (nFormat != -1) {
+ if (lpszMarker) lpszMarker[nResult] = _CXML('\n');
nResult++;
}
} else
// If there are child nodes we need to terminate the start tag
- if (nElementI)
- {
- if (lpszMarker) lpszMarker[nResult-1]=_CXML('>');
- if (nFormat>=0)
- {
- if (lpszMarker) lpszMarker[nResult]=_CXML('\n');
+ if (nElementI) {
+ if (lpszMarker) lpszMarker[nResult-1] = _CXML('>');
+ if (nFormat >= 0) {
+ if (lpszMarker) lpszMarker[nResult] = _CXML('\n');
nResult++;
}
} else nResult--;
@@ -1927,145 +2184,137 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma
// Calculate the child format for when we recurse. This is used to
// determine the number of spaces used for prefixes.
- if (nFormat!=-1)
- {
- if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1;
- else nChildFormat=nFormat;
+ if (nFormat != -1) {
+ if (cbElement && (!pEntry->isDeclaration)) nChildFormat = nFormat + 1;
+ else nChildFormat = nFormat;
}
// Enumerate through remaining children
- for (i=0; i<nElementI; i++)
- {
- j=pEntry->pOrder[i];
- switch((XMLElementType)(j&3))
- {
- // Text nodes
- case eNodeText:
- {
- // "Text"
- XMLCSTR pChild=pEntry->pText[j>>2];
- cb = (int)ToXMLStringTool::lengthXMLString(pChild);
- if (cb)
- {
- if (nFormat>=0)
- {
- if (lpszMarker)
- {
- charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1);
- ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild);
- lpszMarker[nResult+nFormat+1+cb]=_CXML('\n');
- }
- nResult+=cb+nFormat+2;
- } else
- {
- if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild);
- nResult += cb;
+ for (i = 0; i < nElementI; i++) {
+ j = pEntry->pOrder[i];
+ switch ((XMLElementType)(j&3)) {
+ // Text nodes
+ case eNodeText: {
+ // "Text"
+ XMLCSTR pChild = pEntry->pText[j>>2];
+ cb = (int)ToXMLStringTool::lengthXMLString(pChild);
+ if (cb) {
+ if (nFormat >= 0) {
+ if (lpszMarker) {
+ charmemset(&lpszMarker[nResult], INDENTCHAR,
+ nFormat + 1);
+ ToXMLStringTool::toXMLUnSafe(
+ &lpszMarker[nResult+nFormat+1], pChild);
+ lpszMarker[nResult+nFormat+1+cb] = _CXML('\n');
+ }
+ nResult += cb + nFormat + 2;
+ } else {
+ if (lpszMarker) {
+ ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult],
+ pChild);
}
+ nResult += cb;
}
- break;
}
+ break;
+ }
// Clear type nodes
- case eNodeClear:
- {
- XMLClear *pChild=pEntry->pClear+(j>>2);
- // "OpenTag"
- cb = (int)LENSTR(pChild->lpszOpenTag);
- if (cb)
- {
- if (nFormat!=-1)
- {
- if (lpszMarker)
- {
- charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1);
- xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag);
- }
- nResult+=cb+nFormat+1;
+ case eNodeClear: {
+ XMLClear *pChild = pEntry->pClear + (j >> 2);
+ // "OpenTag"
+ cb = (int)LENSTR(pChild->lpszOpenTag);
+ if (cb) {
+ if (nFormat != -1) {
+ if (lpszMarker) {
+ charmemset(&lpszMarker[nResult], INDENTCHAR,
+ nFormat + 1);
+ xstrcpy(&lpszMarker[nResult+nFormat+1],
+ pChild->lpszOpenTag);
}
- else
- {
- if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag);
- nResult += cb;
+ nResult += cb + nFormat + 1;
+ } else {
+ if (lpszMarker) {
+ xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag);
}
- }
-
- // "OpenTag Value"
- cb = (int)LENSTR(pChild->lpszValue);
- if (cb)
- {
- if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue);
nResult += cb;
}
+ }
- // "OpenTag Value CloseTag"
- cb = (int)LENSTR(pChild->lpszCloseTag);
- if (cb)
- {
- if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag);
- nResult += cb;
+ // "OpenTag Value"
+ cb = (int)LENSTR(pChild->lpszValue);
+ if (cb) {
+ if (lpszMarker) {
+ xstrcpy(&lpszMarker[nResult], pChild->lpszValue);
}
+ nResult += cb;
+ }
- if (nFormat!=-1)
- {
- if (lpszMarker) lpszMarker[nResult] = _CXML('\n');
- nResult++;
+ // "OpenTag Value CloseTag"
+ cb = (int)LENSTR(pChild->lpszCloseTag);
+ if (cb) {
+ if (lpszMarker) {
+ xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag);
}
- break;
+ nResult += cb;
}
- // Element nodes
- case eNodeChild:
- {
- // Recursively add child nodes
- nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat);
- break;
+ if (nFormat != -1) {
+ if (lpszMarker) lpszMarker[nResult] = _CXML('\n');
+ nResult++;
}
- default: break;
+ break;
+ }
+
+ // Element nodes
+ case eNodeChild: {
+ // Recursively add child nodes
+ nResult += CreateXMLStringR(pEntry->pChild[j>>2].d,
+ lpszMarker ? lpszMarker + nResult : 0,
+ nChildFormat);
+ break;
+ }
+ default:
+ break;
}
}
- if ((cbElement)&&(!pEntry->isDeclaration))
- {
+ if ((cbElement) && (!pEntry->isDeclaration)) {
// If we have child entries we need to use long XML notation for
// closing the element - "<elementname>blah blah blah</elementname>"
- if (nElementI)
- {
+ if (nElementI) {
// "</elementname>\0"
- if (lpszMarker)
- {
- if (nFormat >=0)
- {
- charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat);
- nResult+=nFormat;
+ if (lpszMarker) {
+ if (nFormat >= 0) {
+ charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat);
+ nResult += nFormat;
}
- lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/');
+ lpszMarker[nResult] = _CXML('<');
+ lpszMarker[nResult+1] = _CXML('/');
nResult += 2;
xstrcpy(&lpszMarker[nResult], pEntry->lpszName);
nResult += cbElement;
- lpszMarker[nResult]=_CXML('>');
+ lpszMarker[nResult] = _CXML('>');
if (nFormat == -1) nResult++;
- else
- {
- lpszMarker[nResult+1]=_CXML('\n');
- nResult+=2;
+ else {
+ lpszMarker[nResult+1] = _CXML('\n');
+ nResult += 2;
}
- } else
- {
- if (nFormat>=0) nResult+=cbElement+4+nFormat;
- else if (nFormat==-1) nResult+=cbElement+3;
- else nResult+=cbElement+4;
+ } else {
+ if (nFormat >= 0) nResult += cbElement + 4 + nFormat;
+ else if (nFormat == -1) nResult += cbElement + 3;
+ else nResult += cbElement + 4;
}
- } else
- {
+ } else {
// If there are no children we can use shorthand XML notation -
// "<elementname/>"
// "/>\0"
- if (lpszMarker)
- {
- lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>');
- if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n');
+ if (lpszMarker) {
+ lpszMarker[nResult] = _CXML('/');
+ lpszMarker[nResult+1] = _CXML('>');
+ if (nFormat != -1) lpszMarker[nResult+2] = _CXML('\n');
}
nResult += nFormat == -1 ? 2 : 3;
}
@@ -2085,342 +2334,401 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma
// NULL terminator.
// @return XMLSTR - Allocated XML string, you must free
// this with free().
-XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const
-{
- if (!d) { if (pnSize) *pnSize=0; return NULL; }
+XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const {
+ if (!d) {
+ if (pnSize) *pnSize = 0;
+ return NULL;
+ }
XMLSTR lpszResult = NULL;
int cbStr;
// Recursively Calculate the size of the XML string
- if (!dropWhiteSpace) nFormat=0;
+ if (!dropWhiteSpace) nFormat = 0;
nFormat = nFormat ? 0 : -1;
cbStr = CreateXMLStringR(d, 0, nFormat);
// Alllocate memory for the XML string + the NULL terminator and
// create the recursively XML string.
- lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR));
+ lpszResult = (XMLSTR)malloc((cbStr + 1) * sizeof(XMLCHAR));
CreateXMLStringR(d, lpszResult, nFormat);
- lpszResult[cbStr]=_CXML('\0');
+ lpszResult[cbStr] = _CXML('\0');
if (pnSize) *pnSize = cbStr;
return lpszResult;
}
-int XMLNode::detachFromParent(XMLNodeData *d)
-{
- XMLNode *pa=d->pParent->pChild;
- int i=0;
- while (((void*)(pa[i].d))!=((void*)d)) i++;
+int XMLNode::detachFromParent(XMLNodeData *d) {
+ XMLNode *pa = d->pParent->pChild;
+ int i = 0;
+ while (((void*)(pa[i].d)) != ((void*)d)) i++;
d->pParent->nChild--;
- if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode));
- else { free(pa); d->pParent->pChild=NULL; }
- return removeOrderElement(d->pParent,eNodeChild,i);
+ if (d->pParent->nChild) {
+ memmove(pa + i, pa + i + 1, (d->pParent->nChild - i)*sizeof(XMLNode));
+ } else {
+ free(pa);
+ d->pParent->pChild = NULL;
+ }
+ return removeOrderElement(d->pParent, eNodeChild, i);
}
-XMLNode::~XMLNode()
-{
+XMLNode::~XMLNode() {
if (!d) return;
d->ref_count--;
emptyTheNode(0);
}
-void XMLNode::deleteNodeContent()
-{
+void XMLNode::deleteNodeContent() {
if (!d) return;
- if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; }
+ if (d->pParent) {
+ detachFromParent(d);
+ d->pParent = NULL;
+ d->ref_count--;
+ }
emptyTheNode(1);
}
-void XMLNode::emptyTheNode(char force)
-{
- XMLNodeData *dd=d; // warning: must stay this way!
- if ((dd->ref_count==0)||force)
- {
+void XMLNode::emptyTheNode(char force) {
+ XMLNodeData *dd = d; // warning: must stay this way!
+ if ((dd->ref_count == 0) || force) {
if (d->pParent) detachFromParent(d);
int i;
XMLNode *pc;
- for(i=0; i<dd->nChild; i++)
- {
- pc=dd->pChild+i;
- pc->d->pParent=NULL;
+ for (i = 0; i < dd->nChild; i++) {
+ pc = dd->pChild + i;
+ pc->d->pParent = NULL;
pc->d->ref_count--;
pc->emptyTheNode(force);
}
myFree(dd->pChild);
- for(i=0; i<dd->nText; i++) free((void*)dd->pText[i]);
+ for (i = 0; i < dd->nText; i++) free((void*)dd->pText[i]);
myFree(dd->pText);
- for(i=0; i<dd->nClear; i++) free((void*)dd->pClear[i].lpszValue);
+ for (i = 0; i < dd->nClear; i++) free((void*)dd->pClear[i].lpszValue);
myFree(dd->pClear);
- for(i=0; i<dd->nAttribute; i++)
- {
+ for (i = 0; i < dd->nAttribute; i++) {
free((void*)dd->pAttribute[i].lpszName);
- if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue);
+ if (dd->pAttribute[i].lpszValue) {
+ free((void*)dd->pAttribute[i].lpszValue);
+ }
}
myFree(dd->pAttribute);
myFree(dd->pOrder);
myFree((void*)dd->lpszName);
- dd->nChild=0; dd->nText=0; dd->nClear=0; dd->nAttribute=0;
- dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL;
- dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL;
+ dd->nChild = 0;
+ dd->nText = 0;
+ dd->nClear = 0;
+ dd->nAttribute = 0;
+ dd->pChild = NULL;
+ dd->pText = NULL;
+ dd->pClear = NULL;
+ dd->pAttribute = NULL;
+ dd->pOrder = NULL;
+ dd->lpszName = NULL;
+ dd->pParent = NULL;
}
- if (dd->ref_count==0)
- {
+ if (dd->ref_count == 0) {
free(dd);
- d=NULL;
+ d = NULL;
}
}
-XMLNode& XMLNode::operator=( const XMLNode& A )
-{
+XMLNode& XMLNode::operator=( const XMLNode & A ) {
// shallow copy
- if (this != &A)
- {
- if (d) { d->ref_count--; emptyTheNode(0); }
- d=A.d;
+ if (this != &A) {
+ if (d) {
+ d->ref_count--;
+ emptyTheNode(0);
+ }
+ d = A.d;
if (d) (d->ref_count) ++ ;
}
return *this;
}
-XMLNode::XMLNode(const XMLNode &A)
-{
+XMLNode::XMLNode(const XMLNode &A) {
// shallow copy
- d=A.d;
+ d = A.d;
if (d) (d->ref_count)++ ;
}
-XMLNode XMLNode::deepCopy() const
-{
+XMLNode XMLNode::deepCopy() const {
if (!d) return XMLNode::emptyXMLNode;
- XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration);
- XMLNodeData *p=x.d;
- int n=d->nAttribute;
- if (n)
- {
- p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute));
- while (n--)
- {
- p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName);
- p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue);
+ XMLNode x(NULL, stringDup(d->lpszName), d->isDeclaration);
+ XMLNodeData *p = x.d;
+ int n = d->nAttribute;
+ if (n) {
+ p->nAttribute = n;
+ p->pAttribute = (XMLAttribute*)malloc(n * sizeof(XMLAttribute));
+ while (n--) {
+ p->pAttribute[n].lpszName = stringDup(d->pAttribute[n].lpszName);
+ p->pAttribute[n].lpszValue = stringDup(d->pAttribute[n].lpszValue);
}
}
- if (d->pOrder)
- {
- n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n);
- }
- n=d->nText;
- if (n)
- {
- p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR));
- while(n--) p->pText[n]=stringDup(d->pText[n]);
- }
- n=d->nClear;
- if (n)
- {
- p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear));
- while (n--)
- {
- p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag;
- p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag;
- p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue);
+ if (d->pOrder) {
+ n = (d->nChild + d->nText + d->nClear) * sizeof(int);
+ p->pOrder = (int*)malloc(n);
+ memcpy(p->pOrder, d->pOrder, n);
+ }
+ n = d->nText;
+ if (n) {
+ p->nText = n;
+ p->pText = (XMLCSTR*)malloc(n * sizeof(XMLCSTR));
+ while (n--) p->pText[n] = stringDup(d->pText[n]);
+ }
+ n = d->nClear;
+ if (n) {
+ p->nClear = n;
+ p->pClear = (XMLClear*)malloc(n * sizeof(XMLClear));
+ while (n--) {
+ p->pClear[n].lpszCloseTag = d->pClear[n].lpszCloseTag;
+ p->pClear[n].lpszOpenTag = d->pClear[n].lpszOpenTag;
+ p->pClear[n].lpszValue = stringDup(d->pClear[n].lpszValue);
}
}
- n=d->nChild;
- if (n)
- {
- p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode));
- while (n--)
- {
- p->pChild[n].d=NULL;
- p->pChild[n]=d->pChild[n].deepCopy();
- p->pChild[n].d->pParent=p;
+ n = d->nChild;
+ if (n) {
+ p->nChild = n;
+ p->pChild = (XMLNode*)malloc(n * sizeof(XMLNode));
+ while (n--) {
+ p->pChild[n].d = NULL;
+ p->pChild[n] = d->pChild[n].deepCopy();
+ p->pChild[n].d->pParent = p;
}
}
return x;
}
-XMLNode XMLNode::addChild(XMLNode childNode, int pos)
-{
- XMLNodeData *dc=childNode.d;
- if ((!dc)||(!d)) return childNode;
- if (!dc->lpszName)
- {
+XMLNode XMLNode::addChild(XMLNode childNode, int pos) {
+ XMLNodeData *dc = childNode.d;
+ if ((!dc) || (!d)) return childNode;
+ if (!dc->lpszName) {
// this is a root node: todo: correct fix
- int j=pos;
- while (dc->nChild)
- {
- addChild(dc->pChild[0],j);
- if (pos>=0) j++;
+ int j = pos;
+ while (dc->nChild) {
+ addChild(dc->pChild[0], j);
+ if (pos >= 0) j++;
}
return childNode;
}
- if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++;
- dc->pParent=d;
+ if (dc->pParent) {
+ if ((detachFromParent(dc) <= pos) && (dc->pParent == d)) pos--;
+ } else dc->ref_count++;
+ dc->pParent = d;
// int nc=d->nChild;
// d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode));
- d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
- d->pChild[pos].d=dc;
+ d->pChild = (XMLNode*)addToOrder(0, &pos, d->nChild, d->pChild,
+ sizeof(XMLNode), eNodeChild);
+ d->pChild[pos].d = dc;
d->nChild++;
return childNode;
}
-void XMLNode::deleteAttribute(int i)
-{
- if ((!d)||(i<0)||(i>=d->nAttribute)) return;
+void XMLNode::deleteAttribute(int i) {
+ if ((!d) || (i < 0) || (i >= d->nAttribute)) return;
d->nAttribute--;
- XMLAttribute *p=d->pAttribute+i;
+ XMLAttribute *p = d->pAttribute + i;
free((void*)p->lpszName);
if (p->lpszValue) free((void*)p->lpszValue);
- if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; }
+ if (d->nAttribute) {
+ memmove(p, p + 1, (d->nAttribute - i)*sizeof(XMLAttribute));
+ }
+ else {
+ free(p);
+ d->pAttribute = NULL;
+ }
}
-void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); }
-void XMLNode::deleteAttribute(XMLCSTR lpszName)
-{
- int j=0;
- getAttribute(lpszName,&j);
- if (j) deleteAttribute(j-1);
+void XMLNode::deleteAttribute(XMLAttribute *a) {
+ if (a) deleteAttribute(a->lpszName);
+}
+void XMLNode::deleteAttribute(XMLCSTR lpszName) {
+ int j = 0;
+ getAttribute(lpszName, &j);
+ if (j) deleteAttribute(j - 1);
}
-XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i)
-{
- if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; }
- if (i>=d->nAttribute)
- {
- if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue);
+XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue,
+ XMLSTR lpszNewName, int i) {
+ if (!d) {
+ if (lpszNewValue) free(lpszNewValue);
+ if (lpszNewName) free(lpszNewName);
+ return NULL;
+ }
+ if (i >= d->nAttribute) {
+ if (lpszNewName) return addAttribute_WOSD(lpszNewName, lpszNewValue);
return NULL;
}
- XMLAttribute *p=d->pAttribute+i;
- if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue);
- p->lpszValue=lpszNewValue;
- if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; };
+ XMLAttribute *p = d->pAttribute + i;
+ if (p->lpszValue && p->lpszValue != lpszNewValue) {
+ free((void*)p->lpszValue);
+ }
+ p->lpszValue = lpszNewValue;
+ if (lpszNewName && p->lpszName != lpszNewName) {
+ free((void*)p->lpszName);
+ p->lpszName = lpszNewName;
+ };
return p;
}
-XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute)
-{
- if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName);
- return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue);
-}
-
-XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName)
-{
- int j=0;
- getAttribute(lpszOldName,&j);
- if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1);
- else
- {
- if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue);
- else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue);
+XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute,
+ XMLAttribute *oldAttribute) {
+ if (oldAttribute) {
+ return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,
+ (XMLSTR)newAttribute->lpszName,
+ oldAttribute->lpszName);
+ }
+ return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,
+ (XMLSTR)newAttribute->lpszValue);
+}
+
+XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue,
+ XMLSTR lpszNewName,
+ XMLCSTR lpszOldName) {
+ int j = 0;
+ getAttribute(lpszOldName, &j);
+ if (j) return updateAttribute_WOSD(lpszNewValue, lpszNewName, j - 1);
+ else {
+ if (lpszNewName) {
+ return addAttribute_WOSD(lpszNewName, lpszNewValue);
+ } else {
+ return addAttribute_WOSD(stringDup(lpszOldName), lpszNewValue);
+ }
}
}
-int XMLNode::indexText(XMLCSTR lpszValue) const
-{
+int XMLNode::indexText(XMLCSTR lpszValue) const {
if (!d) return -1;
- int i,l=d->nText;
- if (!lpszValue) { if (l) return 0; return -1; }
- XMLCSTR *p=d->pText;
- for (i=0; i<l; i++) if (lpszValue==p[i]) return i;
+ int i, l = d->nText;
+ if (!lpszValue) {
+ if (l) return 0;
+ return -1;
+ }
+ XMLCSTR *p = d->pText;
+ for (i = 0; i < l; i++) if (lpszValue == p[i]) return i;
return -1;
}
-void XMLNode::deleteText(int i)
-{
- if ((!d)||(i<0)||(i>=d->nText)) return;
+void XMLNode::deleteText(int i) {
+ if ((!d) || (i < 0) || (i >= d->nText)) return;
d->nText--;
- XMLCSTR *p=d->pText+i;
+ XMLCSTR *p = d->pText + i;
free((void*)*p);
- if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; }
- removeOrderElement(d,eNodeText,i);
+ if (d->nText) memmove(p, p + 1, (d->nText - i)*sizeof(XMLCSTR));
+ else {
+ free(p);
+ d->pText = NULL;
+ }
+ removeOrderElement(d, eNodeText, i);
}
-void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); }
+void XMLNode::deleteText(XMLCSTR lpszValue) {
+ deleteText(indexText(lpszValue));
+}
-XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i)
-{
- if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; }
- if (i>=d->nText) return addText_WOSD(lpszNewValue);
- XMLCSTR *p=d->pText+i;
- if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; }
+XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) {
+ if (!d) {
+ if (lpszNewValue) free(lpszNewValue);
+ return NULL;
+ }
+ if (i >= d->nText) return addText_WOSD(lpszNewValue);
+ XMLCSTR *p = d->pText + i;
+ if (*p != lpszNewValue) {
+ free((void*)*p);
+ *p = lpszNewValue;
+ }
return lpszNewValue;
}
-XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue)
-{
- if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; }
- int i=indexText(lpszOldValue);
- if (i>=0) return updateText_WOSD(lpszNewValue,i);
+XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) {
+ if (!d) {
+ if (lpszNewValue) free(lpszNewValue);
+ return NULL;
+ }
+ int i = indexText(lpszOldValue);
+ if (i >= 0) return updateText_WOSD(lpszNewValue, i);
return addText_WOSD(lpszNewValue);
}
-void XMLNode::deleteClear(int i)
-{
- if ((!d)||(i<0)||(i>=d->nClear)) return;
+void XMLNode::deleteClear(int i) {
+ if ((!d) || (i < 0) || (i >= d->nClear)) return;
d->nClear--;
- XMLClear *p=d->pClear+i;
+ XMLClear *p = d->pClear + i;
free((void*)p->lpszValue);
- if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; }
- removeOrderElement(d,eNodeClear,i);
+ if (d->nClear) memmove(p, p + 1, (d->nClear - i)*sizeof(XMLClear));
+ else {
+ free(p);
+ d->pClear = NULL;
+ }
+ removeOrderElement(d, eNodeClear, i);
}
-int XMLNode::indexClear(XMLCSTR lpszValue) const
-{
+int XMLNode::indexClear(XMLCSTR lpszValue) const {
if (!d) return -1;
- int i,l=d->nClear;
- if (!lpszValue) { if (l) return 0; return -1; }
- XMLClear *p=d->pClear;
- for (i=0; i<l; i++) if (lpszValue==p[i].lpszValue) return i;
+ int i, l = d->nClear;
+ if (!lpszValue) {
+ if (l) return 0;
+ return -1;
+ }
+ XMLClear *p = d->pClear;
+ for (i = 0; i < l; i++) if (lpszValue == p[i].lpszValue) return i;
return -1;
}
-void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); }
-void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); }
+void XMLNode::deleteClear(XMLCSTR lpszValue) {
+ deleteClear(indexClear(lpszValue));
+}
+void XMLNode::deleteClear(XMLClear *a) {
+ if (a) deleteClear(a->lpszValue);
+}
-XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i)
-{
- if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; }
- if (i>=d->nClear) return addClear_WOSD(lpszNewContent);
- XMLClear *p=d->pClear+i;
- if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; }
+XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) {
+ if (!d) {
+ if (lpszNewContent) free(lpszNewContent);
+ return NULL;
+ }
+ if (i >= d->nClear) return addClear_WOSD(lpszNewContent);
+ XMLClear *p = d->pClear + i;
+ if (lpszNewContent != p->lpszValue) {
+ free((void*)p->lpszValue);
+ p->lpszValue = lpszNewContent;
+ }
return p;
}
-XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue)
-{
- if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; }
- int i=indexClear(lpszOldValue);
- if (i>=0) return updateClear_WOSD(lpszNewContent,i);
+XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent,
+ XMLCSTR lpszOldValue) {
+ if (!d) {
+ if (lpszNewContent) free(lpszNewContent);
+ return NULL;
+ }
+ int i = indexClear(lpszOldValue);
+ if (i >= 0) return updateClear_WOSD(lpszNewContent, i);
return addClear_WOSD(lpszNewContent);
}
-XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP)
-{
- if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue);
+XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP, XMLClear *oldP) {
+ if (oldP) {
+ return updateClear_WOSD((XMLSTR)newP->lpszValue,
+ (XMLSTR)oldP->lpszValue);
+ }
return NULL;
}
-int XMLNode::nChildNode(XMLCSTR name) const
-{
+int XMLNode::nChildNode(XMLCSTR name) const {
if (!d) return 0;
- int i,j=0,n=d->nChild;
- XMLNode *pc=d->pChild;
- for (i=0; i<n; i++)
- {
- if (xstricmp(pc->d->lpszName, name)==0) j++;
+ int i, j = 0, n = d->nChild;
+ XMLNode *pc = d->pChild;
+ for (i = 0; i < n; i++) {
+ if (xstricmp(pc->d->lpszName, name) == 0) j++;
pc++;
}
return j;
}
-XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const
-{
+XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const {
if (!d) return emptyXMLNode;
- int i=0,n=d->nChild;
- if (j) i=*j;
- XMLNode *pc=d->pChild+i;
- for (; i<n; i++)
- {
- if (!xstricmp(pc->d->lpszName, name))
- {
- if (j) *j=i+1;
+ int i = 0, n = d->nChild;
+ if (j) i = *j;
+ XMLNode *pc = d->pChild + i;
+ for (; i < n; i++) {
+ if (!xstricmp(pc->d->lpszName, name)) {
+ if (j) *j = i + 1;
return *pc;
}
pc++;
@@ -2428,117 +2736,149 @@ XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const
return emptyXMLNode;
}
-XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const
-{
+XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const {
if (!d) return emptyXMLNode;
- if (j>=0)
- {
- int i=0;
- while (j-->0) getChildNode(name,&i);
- return getChildNode(name,&i);
- }
- int i=d->nChild;
- while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break;
- if (i<0) return emptyXMLNode;
+ if (j >= 0) {
+ int i = 0;
+ while (j-- > 0) getChildNode(name, &i);
+ return getChildNode(name, &i);
+ }
+ int i = d->nChild;
+ while (i--) if (!xstricmp(name, d->pChild[i].d->lpszName)) break;
+ if (i < 0) return emptyXMLNode;
return getChildNode(i);
}
-XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep)
-{
- XMLSTR path=stringDup(_path);
- XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep);
+XMLNode* XMLNode::getChildNodePtr(XMLCSTR name, int *j) const {
+ if (!d) return &emptyXMLNode;
+ int i = 0, n = d->nChild;
+ int foundIndex = 0;
+ XMLNode *pc = d->pChild + i;
+ for (; i < n; i++) {
+ if (!xstricmp(pc->d->lpszName, name)) {
+ if (*j == foundIndex) return pc;
+ foundIndex++;
+ }
+ pc++;
+ }
+ return &emptyXMLNode;
+}
+
+XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing,
+ XMLCHAR sep) {
+ XMLSTR path = stringDup(_path);
+ XMLNode x = getChildNodeByPathNonConst(path, createMissing, sep);
if (path) free(path);
return x;
}
-XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep)
-{
- if ((!path)||(!(*path))) return *this;
- XMLNode xn,xbase=*this;
- XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0;
- tend1=xstrstr(path,sepString);
- while(tend1)
- {
- *tend1=0;
- xn=xbase.getChildNode(path);
- if (xn.isEmpty())
- {
- if (createIfMissing) xn=xbase.addChild(path);
- else { *tend1=sep; return XMLNode::emptyXMLNode; }
+XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path,
+ char createIfMissing, XMLCHAR sep) {
+ if ((!path) || (!(*path))) return *this;
+ XMLNode xn, xbase = *this;
+ XMLCHAR *tend1, sepString[2];
+ sepString[0] = sep;
+ sepString[1] = 0;
+ tend1 = xstrstr(path, sepString);
+ while (tend1) {
+ *tend1 = 0;
+ xn = xbase.getChildNode(path);
+ if (xn.isEmpty()) {
+ if (createIfMissing) xn = xbase.addChild(path);
+ else {
+ *tend1 = sep;
+ return XMLNode::emptyXMLNode;
+ }
}
- *tend1=sep;
- xbase=xn;
- path=tend1+1;
- tend1=xstrstr(path,sepString);
+ *tend1 = sep;
+ xbase = xn;
+ path = tend1 + 1;
+ tend1 = xstrstr(path, sepString);
}
- xn=xbase.getChildNode(path);
- if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path);
+ xn = xbase.getChildNode(path);
+ if (xn.isEmpty() && createIfMissing) xn = xbase.addChild(path);
return xn;
}
-XMLElementPosition XMLNode::positionOfText (int i) const { if (i>=d->nText ) i=d->nText-1; return findPosition(d,i,eNodeText ); }
-XMLElementPosition XMLNode::positionOfClear (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); }
-XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); }
-XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); }
-XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); }
-XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); }
-XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const
-{
- if ((!d)||(!x.d)) return -1;
- XMLNodeData *dd=x.d;
- XMLNode *pc=d->pChild;
- int i=d->nChild;
- while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild);
+XMLElementPosition XMLNode::positionOfText (int i) const {
+ if (i >= d->nText ) i = d->nText - 1;
+ return findPosition(d, i, eNodeText );
+}
+XMLElementPosition XMLNode::positionOfClear (int i) const {
+ if (i >= d->nClear) i = d->nClear - 1;
+ return findPosition(d, i, eNodeClear);
+}
+XMLElementPosition XMLNode::positionOfChildNode(int i) const {
+ if (i >= d->nChild) i = d->nChild - 1;
+ return findPosition(d, i, eNodeChild);
+}
+XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const {
+ return positionOfText (indexText (lpszValue));
+}
+XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const {
+ return positionOfClear(indexClear(lpszValue));
+}
+XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const {
+ if (a) return positionOfClear(a->lpszValue);
+ return positionOfClear();
+}
+XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const {
+ if ((!d) || (!x.d)) return -1;
+ XMLNodeData *dd = x.d;
+ XMLNode *pc = d->pChild;
+ int i = d->nChild;
+ while (i--) if (pc[i].d == dd) return findPosition(d, i, eNodeChild);
return -1;
}
-XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const
-{
+XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const {
if (!name) return positionOfChildNode(count);
- int j=0;
- do { getChildNode(name,&j); if (j<0) return -1; } while (count--);
- return findPosition(d,j-1,eNodeChild);
-}
-
-XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const
-{
- int i=0,j;
- if (k) i=*k;
- XMLNode x;
- XMLCSTR t;
- do
- {
- x=getChildNode(name,&i);
- if (!x.isEmpty())
- {
- if (attributeValue)
- {
- j=0;
- do
- {
- t=x.getAttribute(attributeName,&j);
- if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; }
- } while (t);
- } else
- {
- if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; }
- }
- }
- } while (!x.isEmpty());
- return emptyXMLNode;
+ int j = 0;
+ do {
+ getChildNode(name, &j);
+ if (j < 0) return -1;
+ } while (count--);
+ return findPosition(d, j - 1, eNodeChild);
+}
+
+XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name, XMLCSTR attributeName,
+ XMLCSTR attributeValue,
+ int *k) const {
+ int i = 0, j;
+ if (k) i = *k;
+ XMLNode x;
+ XMLCSTR t;
+ do {
+ x = getChildNode(name, &i);
+ if (!x.isEmpty()) {
+ if (attributeValue) {
+ j = 0;
+ do {
+ t = x.getAttribute(attributeName, &j);
+ if (t && (xstricmp(attributeValue, t) == 0)) {
+ if (k) *k = i;
+ return x;
+ }
+ } while (t);
+ } else {
+ if (x.isAttributeSet(attributeName)) {
+ if (k) *k = i;
+ return x;
+ }
+ }
+ }
+ } while (!x.isEmpty());
+ return emptyXMLNode;
}
// Find an attribute on an node.
-XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const
-{
+XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const {
if (!d) return NULL;
- int i=0,n=d->nAttribute;
- if (j) i=*j;
- XMLAttribute *pAttr=d->pAttribute+i;
- for (; i<n; i++)
- {
- if (xstricmp(pAttr->lpszName, lpszAttrib)==0)
- {
- if (j) *j=i+1;
+ int i = 0, n = d->nAttribute;
+ if (j) i = *j;
+ XMLAttribute *pAttr = d->pAttribute + i;
+ for (; i < n; i++) {
+ if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) {
+ if (j) *j = i + 1;
return pAttr->lpszValue;
}
pAttr++;
@@ -2546,15 +2886,12 @@ XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const
return NULL;
}
-char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const
-{
+char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const {
if (!d) return FALSE;
- int i,n=d->nAttribute;
- XMLAttribute *pAttr=d->pAttribute;
- for (i=0; i<n; i++)
- {
- if (xstricmp(pAttr->lpszName, lpszAttrib)==0)
- {
+ int i, n = d->nAttribute;
+ XMLAttribute *pAttr = d->pAttribute;
+ for (i = 0; i < n; i++) {
+ if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) {
return TRUE;
}
pAttr++;
@@ -2562,159 +2899,283 @@ char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const
return FALSE;
}
-XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const
-{
+XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const {
if (!d) return NULL;
- int i=0;
- while (j-->0) getAttribute(name,&i);
- return getAttribute(name,&i);
+ int i = 0;
+ while (j-- > 0) getAttribute(name, &i);
+ return getAttribute(name, &i);
}
-XMLNodeContents XMLNode::enumContents(int i) const
-{
+XMLNodeContents XMLNode::enumContents(int i) const {
XMLNodeContents c;
- if (!d) { c.etype=eNodeNULL; return c; }
- if (i<d->nAttribute)
- {
- c.etype=eNodeAttribute;
- c.attrib=d->pAttribute[i];
+ if (!d) {
+ c.etype = eNodeNULL;
+ return c;
+ }
+ if (i < d->nAttribute) {
+ c.etype = eNodeAttribute;
+ c.attrib = d->pAttribute[i];
return c;
}
- i-=d->nAttribute;
- c.etype=(XMLElementType)(d->pOrder[i]&3);
- i=(d->pOrder[i])>>2;
- switch (c.etype)
- {
- case eNodeChild: c.child = d->pChild[i]; break;
- case eNodeText: c.text = d->pText[i]; break;
- case eNodeClear: c.clear = d->pClear[i]; break;
- default: break;
+ i -= d->nAttribute;
+ c.etype = (XMLElementType)(d->pOrder[i] & 3);
+ i = (d->pOrder[i]) >> 2;
+ switch (c.etype) {
+ case eNodeChild:
+ c.child = d->pChild[i];
+ break;
+ case eNodeText:
+ c.text = d->pText[i];
+ break;
+ case eNodeClear:
+ c.clear = d->pClear[i];
+ break;
+ default:
+ break;
}
return c;
}
-XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName; }
-int XMLNode::nText() const { if (!d) return 0; return d->nText; }
-int XMLNode::nChildNode() const { if (!d) return 0; return d->nChild; }
-int XMLNode::nAttribute() const { if (!d) return 0; return d->nAttribute; }
-int XMLNode::nClear() const { if (!d) return 0; return d->nClear; }
-int XMLNode::nElement() const { if (!d) return 0; return d->nAttribute+d->nChild+d->nText+d->nClear; }
-XMLClear XMLNode::getClear (int i) const { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; }
-XMLAttribute XMLNode::getAttribute (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; }
-XMLCSTR XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; }
-XMLCSTR XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; }
-XMLCSTR XMLNode::getText (int i) const { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; }
-XMLNode XMLNode::getChildNode (int i) const { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; }
-XMLNode XMLNode::getParentNode ( ) const { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); }
-char XMLNode::isDeclaration ( ) const { if (!d) return 0; return d->isDeclaration; }
-char XMLNode::isEmpty ( ) const { return (d==NULL); }
-XMLNode XMLNode::emptyNode ( ) { return XMLNode::emptyXMLNode; }
-
-XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos)
- { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); }
-XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos)
- { return addChild_priv(0,lpszName,isDeclaration,pos); }
-XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue)
- { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); }
-XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev)
- { return addAttribute_priv(0,lpszName,lpszValuev); }
-XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos)
- { return addText_priv(0,stringDup(lpszValue),pos); }
-XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos)
- { return addText_priv(0,lpszValue,pos); }
-XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos)
- { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); }
-XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos)
- { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); }
-XMLCSTR XMLNode::updateName(XMLCSTR lpszName)
- { return updateName_WOSD(stringDup(lpszName)); }
-XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute)
- { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); }
-XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i)
- { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); }
-XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName)
- { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); }
-XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i)
- { return updateText_WOSD(stringDup(lpszNewValue),i); }
-XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
- { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); }
-XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i)
- { return updateClear_WOSD(stringDup(lpszNewContent),i); }
-XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
- { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); }
-XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP)
- { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); }
-
-char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars,
- char _dropWhiteSpace, char _removeCommentsInMiddleOfText)
-{
- guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText;
+XMLCSTR XMLNode::getName() const {
+ if (!d) return NULL;
+ return d->lpszName;
+}
+int XMLNode::nText() const {
+ if (!d) return 0;
+ return d->nText;
+}
+int XMLNode::nChildNode() const {
+ if (!d) return 0;
+ return d->nChild;
+}
+int XMLNode::nAttribute() const {
+ if (!d) return 0;
+ return d->nAttribute;
+}
+int XMLNode::nClear() const {
+ if (!d) return 0;
+ return d->nClear;
+}
+int XMLNode::nElement() const {
+ if (!d) return 0;
+ return d->nAttribute + d->nChild + d->nText + d->nClear;
+}
+XMLClear XMLNode::getClear (int i) const {
+ if ((!d) || (i >= d->nClear )) return emptyXMLClear;
+ return d->pClear[i];
+}
+XMLAttribute XMLNode::getAttribute (int i) const {
+ if ((!d) || (i >= d->nAttribute)) return emptyXMLAttribute;
+ return d->pAttribute[i];
+}
+XMLCSTR XMLNode::getAttributeName (int i) const {
+ if ((!d) || (i >= d->nAttribute)) return NULL;
+ return d->pAttribute[i].lpszName;
+}
+XMLCSTR XMLNode::getAttributeValue(int i) const {
+ if ((!d) || (i >= d->nAttribute)) return NULL;
+ return d->pAttribute[i].lpszValue;
+}
+XMLCSTR XMLNode::getText (int i) const {
+ if ((!d) || (i >= d->nText )) return NULL;
+ return d->pText[i];
+}
+XMLNode XMLNode::getChildNode (int i) const {
+ if ((!d) || (i >= d->nChild )) return emptyXMLNode;
+ return d->pChild[i];
+}
+XMLNode XMLNode::getParentNode ( ) const {
+ if ((!d) || (!d->pParent )) return emptyXMLNode;
+ return XMLNode(d->pParent);
+}
+char XMLNode::isDeclaration ( ) const {
+ if (!d) return 0;
+ return d->isDeclaration;
+}
+char XMLNode::isEmpty ( ) const {
+ return (d == NULL);
+}
+XMLNode XMLNode::emptyNode ( ) {
+ return XMLNode::emptyXMLNode;
+}
+
+XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration,
+ XMLElementPosition pos) {
+ return addChild_priv(0, stringDup(lpszName), isDeclaration, pos);
+}
+XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration,
+ XMLElementPosition pos) {
+ return addChild_priv(0, lpszName, isDeclaration, pos);
+}
+XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) {
+ return addAttribute_priv(0, stringDup(lpszName), stringDup(lpszValue));
+}
+XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) {
+ return addAttribute_priv(0, lpszName, lpszValuev);
+}
+XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) {
+ return addText_priv(0, stringDup(lpszValue), pos);
+}
+XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) {
+ return addText_priv(0, lpszValue, pos);
+}
+XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen,
+ XMLCSTR lpszClose, XMLElementPosition pos) {
+ return addClear_priv(0, stringDup(lpszValue), lpszOpen, lpszClose, pos);
+}
+XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen,
+ XMLCSTR lpszClose, XMLElementPosition pos) {
+ return addClear_priv(0, lpszValue, lpszOpen, lpszClose, pos);
+}
+XMLCSTR XMLNode::updateName(XMLCSTR lpszName) {
+ return updateName_WOSD(stringDup(lpszName));
+}
+XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute,
+ XMLAttribute *oldAttribute) {
+ return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),
+ stringDup(newAttribute->lpszName),
+ oldAttribute->lpszName);
+}
+XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue,
+ XMLCSTR lpszNewName, int i) {
+ return updateAttribute_WOSD(stringDup(lpszNewValue),
+ stringDup(lpszNewName), i);
+}
+XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue,
+ XMLCSTR lpszNewName,
+ XMLCSTR lpszOldName) {
+ return updateAttribute_WOSD(stringDup(lpszNewValue),
+ stringDup(lpszNewName), lpszOldName);
+}
+XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) {
+ return updateText_WOSD(stringDup(lpszNewValue), i);
+}
+XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) {
+ return updateText_WOSD(stringDup(lpszNewValue), lpszOldValue);
+}
+XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) {
+ return updateClear_WOSD(stringDup(lpszNewContent), i);
+}
+XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) {
+ return updateClear_WOSD(stringDup(lpszNewValue), lpszOldValue);
+}
+XMLClear *XMLNode::updateClear(XMLClear *newP, XMLClear *oldP) {
+ return updateClear_WOSD(stringDup(newP->lpszValue), oldP->lpszValue);
+}
+
+char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding,
+ char _guessWideCharChars,
+ char _dropWhiteSpace,
+ char _removeCommentsInMiddleOfText) {
+ guessWideCharChars = _guessWideCharChars;
+ dropWhiteSpace = _dropWhiteSpace;
+ removeCommentsInMiddleOfText = _removeCommentsInMiddleOfText;
#ifdef _XMLWIDECHAR
- if (_characterEncoding) characterEncoding=_characterEncoding;
+ if (_characterEncoding) characterEncoding = _characterEncoding;
#else
- switch(_characterEncoding)
- {
- case char_encoding_UTF8: characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break;
- case char_encoding_legacy: characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break;
- case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break;
- case char_encoding_GB2312: characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break;
+ switch (_characterEncoding) {
+ case char_encoding_UTF8:
+ characterEncoding = _characterEncoding;
+ XML_ByteTable = XML_utf8ByteTable;
+ break;
+ case char_encoding_legacy:
+ characterEncoding = _characterEncoding;
+ XML_ByteTable = XML_legacyByteTable;
+ break;
+ case char_encoding_ShiftJIS:
+ characterEncoding = _characterEncoding;
+ XML_ByteTable = XML_sjisByteTable;
+ break;
+ case char_encoding_GB2312:
+ characterEncoding = _characterEncoding;
+ XML_ByteTable = XML_gb2312ByteTable;
+ break;
case char_encoding_Big5:
- case char_encoding_GBK: characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break;
- default: return 1;
+ case char_encoding_GBK:
+ characterEncoding = _characterEncoding;
+ XML_ByteTable = XML_gbk_big5_ByteTable;
+ break;
+ default:
+ return 1;
}
#endif
return 0;
}
-XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute)
-{
+XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf, int l,
+ char useXMLEncodingAttribute) {
#ifdef _XMLWIDECHAR
return (XMLCharEncoding)0;
#else
- if (l<25) return (XMLCharEncoding)0;
- if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0;
- unsigned char *b=(unsigned char*)buf;
- if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8;
+ if (l < 25) return (XMLCharEncoding)0;
+ if (guessWideCharChars && (myIsTextWideChar(buf, l))) {
+ return (XMLCharEncoding)0;
+ }
+ unsigned char *b = (unsigned char*)buf;
+ if ((b[0] == 0xef) && (b[1] == 0xbb) && (b[2] == 0xbf)) {
+ return char_encoding_UTF8;
+ }
// Match utf-8 model ?
- XMLCharEncoding bestGuess=char_encoding_UTF8;
- int i=0;
- while (i<l)
- switch (XML_utf8ByteTable[b[i]])
- {
- case 4: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ?
- case 3: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ?
- case 2: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ?
- case 1: i++; break;
- case 0: i=l;
+ XMLCharEncoding bestGuess = char_encoding_UTF8;
+ int i = 0;
+ while (i < l)
+ switch (XML_utf8ByteTable[b[i]]) {
+ case 4:
+ i++;
+ if ((i < l) && (b[i]& 0xC0) != 0x80) {
+ bestGuess = char_encoding_legacy; // 10bbbbbb ?
+ i = l;
+ }
+ case 3:
+ i++;
+ if ((i < l) && (b[i]& 0xC0) != 0x80) {
+ bestGuess = char_encoding_legacy; // 10bbbbbb ?
+ i = l;
+ }
+ case 2:
+ i++;
+ if ((i < l) && (b[i]& 0xC0) != 0x80) {
+ bestGuess = char_encoding_legacy; // 10bbbbbb ?
+ i = l;
+ }
+ case 1:
+ i++;
+ break;
+ case 0:
+ i = l;
}
if (!useXMLEncodingAttribute) return bestGuess;
// if encoding is specified and different from utf-8 than it's non-utf8
// otherwise it's utf-8
char bb[201];
- l=mmin(l,200);
- memcpy(bb,buf,l); // copy buf into bb to be able to do "bb[l]=0"
- bb[l]=0;
- b=(unsigned char*)strstr(bb,"encoding");
+ l = mmin(l, 200);
+ memcpy(bb, buf, l); // copy buf into bb to be able to do "bb[l]=0"
+ bb[l] = 0;
+ b = (unsigned char*)strstr(bb, "encoding");
if (!b) return bestGuess;
- b+=8; while XML_isSPACECHAR(*b) b++; if (*b!='=') return bestGuess;
- b++; while XML_isSPACECHAR(*b) b++; if ((*b!='\'')&&(*b!='"')) return bestGuess;
- b++; while XML_isSPACECHAR(*b) b++;
-
- if ((xstrnicmp((char*)b,"utf-8",5)==0)||
- (xstrnicmp((char*)b,"utf8",4)==0))
- {
- if (bestGuess==char_encoding_legacy) return char_encoding_error;
+ b += 8;
+ while XML_isSPACECHAR(*b) b++;
+ if (*b != '=') return bestGuess;
+ b++;
+ while XML_isSPACECHAR(*b) b++;
+ if ((*b != '\'') && (*b != '"')) return bestGuess;
+ b++;
+ while XML_isSPACECHAR(*b) b++;
+
+ if ((xstrnicmp((char*)b, "utf-8", 5) == 0) ||
+ (xstrnicmp((char*)b, "utf8", 4) == 0)) {
+ if (bestGuess == char_encoding_legacy) return char_encoding_error;
return char_encoding_UTF8;
}
- if ((xstrnicmp((char*)b,"shiftjis",8)==0)||
- (xstrnicmp((char*)b,"shift-jis",9)==0)||
- (xstrnicmp((char*)b,"sjis",4)==0)) return char_encoding_ShiftJIS;
+ if ((xstrnicmp((char*)b, "shiftjis", 8) == 0) ||
+ (xstrnicmp((char*)b, "shift-jis", 9) == 0) ||
+ (xstrnicmp((char*)b, "sjis", 4) == 0)) return char_encoding_ShiftJIS;
- if (xstrnicmp((char*)b,"GB2312",6)==0) return char_encoding_GB2312;
- if (xstrnicmp((char*)b,"Big5",4)==0) return char_encoding_Big5;
- if (xstrnicmp((char*)b,"GBK",3)==0) return char_encoding_GBK;
+ if (xstrnicmp((char*)b, "GB2312", 6) == 0) return char_encoding_GB2312;
+ if (xstrnicmp((char*)b, "Big5", 4) == 0) return char_encoding_Big5;
+ if (xstrnicmp((char*)b, "GBK", 3) == 0) return char_encoding_GBK;
return char_encoding_legacy;
#endif
@@ -2725,100 +3186,117 @@ XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXML
// Here starts the base64 conversion functions. //
//////////////////////////////////////////////////////////
-static const char base64Fillchar = _CXML('='); // used to mark partial words at the end
+// used to mark partial words at the end
+static const char base64Fillchar = _CXML('=');
// this lookup table defines the base64 encoding
-XMLCSTR base64EncodeTable=_CXML("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
+XMLCSTR base64EncodeTable = _CXML("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
// Decode Table gives the index of any valid base64 character in the Base64 table]
// 96: '=' - 97: space char - 98: illegal char - 99: end of string
const unsigned char base64DecodeTable[] = {
- 99,98,98,98,98,98,98,98,98,97, 97,98,98,97,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //00 -29
- 98,98,97,98,98,98,98,98,98,98, 98,98,98,62,98,98,98,63,52,53, 54,55,56,57,58,59,60,61,98,98, //30 -59
- 98,96,98,98,98, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24, //60 -89
- 25,98,98,98,98,98,98,26,27,28, 29,30,31,32,33,34,35,36,37,38, 39,40,41,42,43,44,45,46,47,48, //90 -119
- 49,50,51,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //120 -149
- 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //150 -179
- 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //180 -209
- 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //210 -239
- 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98 //240 -255
+ 99, 98, 98, 98, 98, 98, 98, 98, 98, 97, 97, 98, 98, 97, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, //00 -29
+ 98, 98, 97, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 62, 98, 98, 98, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 98, 98, //30 -59
+ 98, 96, 98, 98, 98, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, //60 -89
+ 25, 98, 98, 98, 98, 98, 98, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, //90 -119
+ 49, 50, 51, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, //120 -149
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, //150 -179
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, //180 -209
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, //210 -239
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98 //240 -255
};
-XMLParserBase64Tool::~XMLParserBase64Tool(){ freeBuffer(); }
+XMLParserBase64Tool::~XMLParserBase64Tool() {
+ freeBuffer();
+}
-void XMLParserBase64Tool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
+void XMLParserBase64Tool::freeBuffer() {
+ if (buf) free(buf);
+ buf = NULL;
+ buflen = 0;
+}
-int XMLParserBase64Tool::encodeLength(int inlen, char formatted)
-{
- unsigned int i=((inlen-1)/3*4+4+1);
- if (formatted) i+=inlen/54;
+int XMLParserBase64Tool::encodeLength(int inlen, char formatted) {
+ unsigned int i = ((inlen - 1) / 3 * 4 + 4 + 1);
+ if (formatted) i += inlen / 54;
return i;
}
-XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, char formatted)
-{
- int i=encodeLength(inlen,formatted),k=17,eLen=inlen/3,j;
+XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen,
+ char formatted) {
+ int i = encodeLength(inlen, formatted), k = 17, eLen = inlen / 3, j;
alloc(i*sizeof(XMLCHAR));
- XMLSTR curr=(XMLSTR)buf;
- for(i=0;i<eLen;i++)
- {
+ XMLSTR curr = (XMLSTR)buf;
+ for (i = 0; i < eLen; i++) {
// Copy next three bytes into lower 24 bits of int, paying attention to sign.
- j=(inbuf[0]<<16)|(inbuf[1]<<8)|inbuf[2]; inbuf+=3;
+ j = (inbuf[0] << 16) | (inbuf[1] << 8) | inbuf[2];
+ inbuf += 3;
// Encode the int into four chars
- *(curr++)=base64EncodeTable[ j>>18 ];
- *(curr++)=base64EncodeTable[(j>>12)&0x3f];
- *(curr++)=base64EncodeTable[(j>> 6)&0x3f];
- *(curr++)=base64EncodeTable[(j )&0x3f];
- if (formatted) { if (!k) { *(curr++)=_CXML('\n'); k=18; } k--; }
- }
- eLen=inlen-eLen*3; // 0 - 2.
- if (eLen==1)
- {
- *(curr++)=base64EncodeTable[ inbuf[0]>>2 ];
- *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F];
- *(curr++)=base64Fillchar;
- *(curr++)=base64Fillchar;
- } else if (eLen==2)
- {
- j=(inbuf[0]<<8)|inbuf[1];
- *(curr++)=base64EncodeTable[ j>>10 ];
- *(curr++)=base64EncodeTable[(j>> 4)&0x3f];
- *(curr++)=base64EncodeTable[(j<< 2)&0x3f];
- *(curr++)=base64Fillchar;
- }
- *(curr++)=0;
+ *(curr++) = base64EncodeTable[ j>>18 ];
+ *(curr++) = base64EncodeTable[(j>>12)&0x3f];
+ *(curr++) = base64EncodeTable[(j>> 6)&0x3f];
+ *(curr++) = base64EncodeTable[(j )&0x3f];
+ if (formatted) {
+ if (!k) {
+ *(curr++) = _CXML('\n');
+ k = 18;
+ }
+ k--;
+ }
+ }
+ eLen = inlen - eLen * 3; // 0 - 2.
+ if (eLen == 1) {
+ *(curr++) = base64EncodeTable[ inbuf[0] >> 2 ];
+ *(curr++) = base64EncodeTable[(inbuf[0] << 4) & 0x3F];
+ *(curr++) = base64Fillchar;
+ *(curr++) = base64Fillchar;
+ } else if (eLen == 2) {
+ j = (inbuf[0] << 8) | inbuf[1];
+ *(curr++) = base64EncodeTable[ j>>10 ];
+ *(curr++) = base64EncodeTable[(j>> 4)&0x3f];
+ *(curr++) = base64EncodeTable[(j<< 2)&0x3f];
+ *(curr++) = base64Fillchar;
+ }
+ *(curr++) = 0;
return (XMLSTR)buf;
}
-unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe)
-{
- if (xe) *xe=eXMLErrorNone;
- int size=0;
+unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data, XMLError *xe) {
+ if (xe) *xe = eXMLErrorNone;
+ int size = 0;
unsigned char c;
//skip any extra characters (e.g. newlines or spaces)
- while (*data)
- {
+ while (*data) {
#ifdef _XMLWIDECHAR
- if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
+ if (*data > 255) {
+ if (xe) *xe = eXMLErrorBase64DecodeIllegalCharacter;
+ return 0;
+ }
#endif
- c=base64DecodeTable[(unsigned char)(*data)];
- if (c<97) size++;
- else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
+ c = base64DecodeTable[(unsigned char)(*data)];
+ if (c < 97) size++;
+ else if (c == 98) {
+ if (xe) *xe = eXMLErrorBase64DecodeIllegalCharacter;
+ return 0;
+ }
data++;
}
- if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4;
- if (size==0) return 0;
- do { data--; size--; } while(*data==base64Fillchar); size++;
- return (unsigned int)((size*3)/4);
-}
-
-unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe)
-{
- if (xe) *xe=eXMLErrorNone;
- int i=0,p=0;
- unsigned char d,c;
- for(;;)
- {
+ if (xe && (size % 4 != 0)) *xe = eXMLErrorBase64DataSizeIsNotMultipleOf4;
+ if (size == 0) return 0;
+ do {
+ data--;
+ size--;
+ } while (*data == base64Fillchar);
+ size++;
+ return (unsigned int)((size*3) / 4);
+}
+
+unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf,
+ int len, XMLError *xe) {
+ if (xe) *xe = eXMLErrorNone;
+ int i = 0, p = 0;
+ unsigned char d, c;
+ for (;;) {
#ifdef _XMLWIDECHAR
#define BASE64DECODE_READ_NEXT_CHAR(c) \
@@ -2834,58 +3312,82 @@ unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int
#endif
BASE64DECODE_READ_NEXT_CHAR(c)
- if (c==99) { return 2; }
- if (c==96)
- {
- if (p==(int)len) return 2;
- if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;
+ if (c == 99) {
+ return 2;
+ }
+ if (c == 96) {
+ if (p == (int)len) return 2;
+ if (xe) *xe = eXMLErrorBase64DecodeTruncatedData;
return 1;
}
BASE64DECODE_READ_NEXT_CHAR(d)
- if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
- if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; }
- buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3));
+ if ((d == 99) || (d == 96)) {
+ if (xe) *xe = eXMLErrorBase64DecodeTruncatedData;
+ return 1;
+ }
+ if (p == (int)len) {
+ if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall;
+ return 0;
+ }
+ buf[p++] = (unsigned char)((c << 2) | ((d >> 4) & 0x3));
BASE64DECODE_READ_NEXT_CHAR(c)
- if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
- if (p==(int)len)
- {
- if (c==96) return 2;
- if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall;
+ if (c == 99) {
+ if (xe) *xe = eXMLErrorBase64DecodeTruncatedData;
+ return 1;
+ }
+ if (p == (int)len) {
+ if (c == 96) return 2;
+ if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall;
return 0;
}
- if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
- buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf));
+ if (c == 96) {
+ if (xe) *xe = eXMLErrorBase64DecodeTruncatedData;
+ return 1;
+ }
+ buf[p++] = (unsigned char)(((d << 4) & 0xf0) | ((c >> 2) & 0xf));
BASE64DECODE_READ_NEXT_CHAR(d)
- if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
- if (p==(int)len)
- {
- if (d==96) return 2;
- if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall;
+ if (d == 99 ) {
+ if (xe) *xe = eXMLErrorBase64DecodeTruncatedData;
+ return 1;
+ }
+ if (p == (int)len) {
+ if (d == 96) return 2;
+ if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall;
return 0;
}
- if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
- buf[p++]=(unsigned char)(((c<<6)&0xc0)|d);
+ if (d == 96) {
+ if (xe) *xe = eXMLErrorBase64DecodeTruncatedData;
+ return 1;
+ }
+ buf[p++] = (unsigned char)(((c << 6) & 0xc0) | d);
}
}
#undef BASE64DECODE_READ_NEXT_CHAR
-void XMLParserBase64Tool::alloc(int newsize)
-{
- if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; }
- if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; }
+void XMLParserBase64Tool::alloc(int newsize) {
+ if ((!buf) && (newsize)) {
+ buf = malloc(newsize);
+ buflen = newsize;
+ return;
+ }
+ if (newsize > buflen) {
+ buf = realloc(buf, newsize);
+ buflen = newsize;
+ }
}
-unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe)
-{
- if (xe) *xe=eXMLErrorNone;
- unsigned int len=decodeSize(data,xe);
- if (outlen) *outlen=len;
+unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) {
+ if (xe) *xe = eXMLErrorNone;
+ unsigned int len = decodeSize(data, xe);
+ if (outlen) *outlen = len;
if (!len) return NULL;
- alloc(len+1);
- if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; }
+ alloc(len + 1);
+ if (!decode(data, (unsigned char*)buf, len, xe)) {
+ return NULL;
+ }
return (unsigned char*)buf;
}
diff --git a/ext/mcpat/xmlParser.h b/ext/mcpat/xmlParser.h
index e29136cb9..dd43694bb 100644
--- a/ext/mcpat/xmlParser.h
+++ b/ext/mcpat/xmlParser.h
@@ -42,6 +42,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Copyright (c) 2002, Business-Insight
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* <a href="http://www.Business-Insight.com">Business-Insight</a>
* All rights reserved.
*
@@ -160,33 +161,32 @@
#define XMLDLLENTRY
#ifndef XML_NO_WIDE_CHAR
#include <wchar.h> // to have 'wcsrtombs' for ANSI version
- // to have 'mbsrtowcs' for WIDECHAR version
+// to have 'mbsrtowcs' for WIDECHAR version
#endif
#endif
// Some common types for char set portable code
#ifdef _XMLWIDECHAR
- #define _CXML(c) L ## c
- #define XMLCSTR const wchar_t *
- #define XMLSTR wchar_t *
- #define XMLCHAR wchar_t
+#define _CXML(c) L ## c
+#define XMLCSTR const wchar_t *
+#define XMLSTR wchar_t *
+#define XMLCHAR wchar_t
#else
- #define _CXML(c) c
- #define XMLCSTR const char *
- #define XMLSTR char *
- #define XMLCHAR char
+#define _CXML(c) c
+#define XMLCSTR const char *
+#define XMLSTR char *
+#define XMLCHAR char
#endif
#ifndef FALSE
- #define FALSE 0
+#define FALSE 0
#endif /* FALSE */
#ifndef TRUE
- #define TRUE 1
+#define TRUE 1
#endif /* TRUE */
/// Enumeration for XML parse errors.
-typedef enum XMLError
-{
+typedef enum XMLError {
eXMLErrorNone = 0,
eXMLErrorMissingEndTag,
eXMLErrorNoXMLTagFound,
@@ -213,30 +213,32 @@ typedef enum XMLError
/// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents
-typedef enum XMLElementType
-{
- eNodeChild=0,
- eNodeAttribute=1,
- eNodeText=2,
- eNodeClear=3,
- eNodeNULL=4
+typedef enum XMLElementType {
+ eNodeChild = 0,
+ eNodeAttribute = 1,
+ eNodeText = 2,
+ eNodeClear = 3,
+ eNodeNULL = 4
} XMLElementType;
/// Structure used to obtain error details if the parse fails.
-typedef struct XMLResults
-{
+typedef struct XMLResults {
enum XMLError error;
- int nLine,nColumn;
+ int nLine;
+ int nColumn;
} XMLResults;
/// Structure for XML clear (unformatted) node (usually comments)
typedef struct XMLClear {
- XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag;
+ XMLCSTR lpszValue;
+ XMLCSTR lpszOpenTag;
+ XMLCSTR lpszCloseTag;
} XMLClear;
/// Structure for XML attribute.
typedef struct XMLAttribute {
- XMLCSTR lpszName; XMLCSTR lpszValue;
+ XMLCSTR lpszName;
+ XMLCSTR lpszValue;
} XMLAttribute;
/// XMLElementPosition are not interchangeable with simple indexes
@@ -256,9 +258,8 @@ struct XMLNodeContents;
* <li> XMLNode::openFileHelper </li>
* <li> XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)</li>
* </ul> */
-typedef struct XMLDLLENTRY XMLNode
-{
- private:
+typedef struct XMLDLLENTRY XMLNode {
+private:
struct XMLNodeDataTag;
@@ -267,7 +268,7 @@ typedef struct XMLDLLENTRY XMLNode
/// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode
XMLNode(struct XMLNodeDataTag *p);
- public:
+public:
static XMLCSTR getVersion();///< Return the XMLParser library version number
/** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string.
@@ -275,7 +276,8 @@ typedef struct XMLDLLENTRY XMLNode
* @{ */
/// Parse an XML string and return the root of a XMLNode tree representing the string.
- static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
+ static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL,
+ XMLResults *pResults = NULL);
/**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is
* the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the
* "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error.
@@ -288,7 +290,8 @@ typedef struct XMLDLLENTRY XMLNode
*/
/// Parse an XML file and return the root of a XMLNode tree representing the file.
- static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
+ static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL,
+ XMLResults *pResults = NULL);
/**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is
* the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the
* "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error.
@@ -301,7 +304,7 @@ typedef struct XMLDLLENTRY XMLNode
*/
/// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made.
- static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag=NULL);
+ static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag = NULL);
/**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file.
* This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each
* application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files
@@ -322,7 +325,7 @@ typedef struct XMLDLLENTRY XMLNode
static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error
/// Create an XML string starting from the current XMLNode.
- XMLSTR createXMLString(int nFormat=1, int *pnSize=NULL) const;
+ XMLSTR createXMLString(int nFormat = 1, int *pnSize = NULL) const;
/**< The returned string should be free'd using the "freeXMLString" function.
*
* If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element
@@ -330,8 +333,8 @@ typedef struct XMLDLLENTRY XMLNode
/// Save the content of an xmlNode inside a file
XMLError writeToFile(XMLCSTR filename,
- const char *encoding=NULL,
- char nFormat=1) const;
+ const char *encoding = NULL,
+ char nFormat = 1) const;
/**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns.
* If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8".
* If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS".
@@ -349,14 +352,15 @@ typedef struct XMLDLLENTRY XMLNode
XMLNode getChildNode(int i=0) const; ///< return ith child node
XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name.
XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing)
+ XMLNode* getChildNodePtr(XMLCSTR name, int *j) const;
XMLNode getChildNodeWithAttribute(XMLCSTR tagName,
XMLCSTR attributeName,
XMLCSTR attributeValue=NULL,
int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing)
XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
- ///< return the first child node with specific path
+ ///< return the first child node with specific path
XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
- ///< return the first child node with specific path.
+ ///< return the first child node with specific path.
int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name
int nChildNode() const; ///< nbr of child node
@@ -418,12 +422,12 @@ typedef struct XMLDLLENTRY XMLNode
*/
XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name
XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
- XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added
- XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
+ XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added
+ XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
- XMLClear *updateClear(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
+ XMLClear *updateClear(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
/** @} */
@@ -482,12 +486,12 @@ typedef struct XMLDLLENTRY XMLNode
XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name
XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
- XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added
- XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
+ XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added
+ XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
- XMLClear *updateClear_WOSD(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
+ XMLClear *updateClear_WOSD(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
/** @} */
@@ -508,15 +512,14 @@ typedef struct XMLDLLENTRY XMLNode
/** @} */
/// Enumeration for XML character encoding.
- typedef enum XMLCharEncoding
- {
- char_encoding_error=0,
- char_encoding_UTF8=1,
- char_encoding_legacy=2,
- char_encoding_ShiftJIS=3,
- char_encoding_GB2312=4,
- char_encoding_Big5=5,
- char_encoding_GBK=6 // this is actually the same as Big5
+ typedef enum XMLCharEncoding {
+ char_encoding_error = 0,
+ char_encoding_UTF8 = 1,
+ char_encoding_legacy = 2,
+ char_encoding_ShiftJIS = 3,
+ char_encoding_GB2312 = 4,
+ char_encoding_Big5 = 5,
+ char_encoding_GBK = 6 // this is actually the same as Big5
} XMLCharEncoding;
/** \addtogroup conversions
@@ -589,48 +592,46 @@ typedef struct XMLDLLENTRY XMLNode
* If an inconsistency in the encoding is detected, then the return value is "0". */
/** @} */
- private:
- // these are functions and structures used internally by the XMLNode class (don't bother about them):
-
- typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
- {
- XMLCSTR lpszName; // Element name (=NULL if root)
- int nChild, // Number of child nodes
- nText, // Number of text fields
- nClear, // Number of Clear fields (comments)
- nAttribute; // Number of attributes
- char isDeclaration; // Whether node is an XML declaration - '<?xml ?>'
- struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root)
- XMLNode *pChild; // Array of child nodes
- XMLCSTR *pText; // Array of text fields
- XMLClear *pClear; // Array of clear fields
- XMLAttribute *pAttribute; // Array of attributes
- int *pOrder; // order of the child_nodes,text_fields,clear_fields
- int ref_count; // for garbage collection (smart pointers)
- } XMLNodeData;
- XMLNodeData *d;
-
- char parseClearTag(void *px, void *pa);
- char maybeAddTxT(void *pa, XMLCSTR tokenPStr);
- int ParseXMLElement(void *pXML);
- void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype);
- int indexText(XMLCSTR lpszValue) const;
- int indexClear(XMLCSTR lpszValue) const;
- XMLNode addChild_priv(int,XMLSTR,char,int);
- XMLAttribute *addAttribute_priv(int,XMLSTR,XMLSTR);
- XMLCSTR addText_priv(int,XMLSTR,int);
- XMLClear *addClear_priv(int,XMLSTR,XMLCSTR,XMLCSTR,int);
- void emptyTheNode(char force);
- static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype);
- static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat);
- static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index);
- static void exactMemory(XMLNodeData *d);
- static int detachFromParent(XMLNodeData *d);
+private:
+ // these are functions and structures used internally by the XMLNode class (don't bother about them):
+
+ typedef struct XMLNodeDataTag { // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
+ XMLCSTR lpszName; // Element name (=NULL if root)
+ int nChild, // Number of child nodes
+ nText, // Number of text fields
+ nClear, // Number of Clear fields (comments)
+ nAttribute; // Number of attributes
+ char isDeclaration; // Whether node is an XML declaration - '<?xml ?>'
+ struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root)
+ XMLNode *pChild; // Array of child nodes
+ XMLCSTR *pText; // Array of text fields
+ XMLClear *pClear; // Array of clear fields
+ XMLAttribute *pAttribute; // Array of attributes
+ int *pOrder; // order of the child_nodes,text_fields,clear_fields
+ int ref_count; // for garbage collection (smart pointers)
+ } XMLNodeData;
+ XMLNodeData *d;
+
+ char parseClearTag(void *px, void *pa);
+ char maybeAddTxT(void *pa, XMLCSTR tokenPStr);
+ int ParseXMLElement(void *pXML);
+ void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype);
+ int indexText(XMLCSTR lpszValue) const;
+ int indexClear(XMLCSTR lpszValue) const;
+ XMLNode addChild_priv(int, XMLSTR, char, int);
+ XMLAttribute *addAttribute_priv(int, XMLSTR, XMLSTR);
+ XMLCSTR addText_priv(int, XMLSTR, int);
+ XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int);
+ void emptyTheNode(char force);
+ static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype);
+ static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat);
+ static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index);
+ static void exactMemory(XMLNodeData *d);
+ static int detachFromParent(XMLNodeData *d);
} XMLNode;
/// This structure is given by the function XMLNode::enumContents.
-typedef struct XMLNodeContents
-{
+typedef struct XMLNodeContents {
/// This dictates what's the content of the XMLNodeContent
enum XMLElementType etype;
/**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */
@@ -664,12 +665,12 @@ XMLDLLENTRY void freeXMLString(XMLSTR t); // {free(t);}
* delete them without any trouble.
*
* @{ */
-XMLDLLENTRY char xmltob(XMLCSTR xmlString,char defautValue=0);
-XMLDLLENTRY int xmltoi(XMLCSTR xmlString,int defautValue=0);
-XMLDLLENTRY long xmltol(XMLCSTR xmlString,long defautValue=0);
-XMLDLLENTRY double xmltof(XMLCSTR xmlString,double defautValue=.0);
-XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString,XMLCSTR defautValue=_CXML(""));
-XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
+XMLDLLENTRY char xmltob(XMLCSTR xmlString, char defautValue=0);
+XMLDLLENTRY int xmltoi(XMLCSTR xmlString, int defautValue=0);
+XMLDLLENTRY long xmltol(XMLCSTR xmlString, long defautValue=0);
+XMLDLLENTRY double xmltof(XMLCSTR xmlString, double defautValue=.0);
+XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString, XMLCSTR defautValue=_CXML(""));
+XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString, XMLCHAR defautValue=_CXML('\0'));
/** @} */
/** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions.
@@ -685,10 +686,9 @@ XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
* \note If you are creating from scratch an XML file using the provided XMLNode class
* you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the
* processing job for you during rendering).*/
-typedef struct XMLDLLENTRY ToXMLStringTool
-{
+typedef struct XMLDLLENTRY ToXMLStringTool {
public:
- ToXMLStringTool(): buf(NULL),buflen(0){}
+ ToXMLStringTool(): buf(NULL), buflen(0){}
~ToXMLStringTool();
void freeBuffer();///<call this function when you have finished using this object to release memory used by the internal buffer.
@@ -718,10 +718,9 @@ private:
* b64-encoded text included inside the XML file, use "decode". Alternatively, these
* functions can also be used to "encrypt/decrypt" some critical data contained inside
* the XML (it's not a strong encryption at all, but sometimes it can be useful). */
-typedef struct XMLDLLENTRY XMLParserBase64Tool
-{
+typedef struct XMLDLLENTRY XMLParserBase64Tool {
public:
- XMLParserBase64Tool(): buf(NULL),buflen(0){}
+ XMLParserBase64Tool(): buf(NULL), buflen(0){}
~XMLParserBase64Tool();
void freeBuffer();///< Call this function when you have finished using this object to release memory used by the internal buffer.