summaryrefslogtreecommitdiff
path: root/ext/mcpat/cachearray.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mcpat/cachearray.cc')
-rw-r--r--ext/mcpat/cachearray.cc321
1 files changed, 321 insertions, 0 deletions
diff --git a/ext/mcpat/cachearray.cc b/ext/mcpat/cachearray.cc
new file mode 100644
index 000000000..cebea289e
--- /dev/null
+++ b/ext/mcpat/cachearray.cc
@@ -0,0 +1,321 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Joel Hestness
+ * Yasuko Eckert
+ *
+ ***************************************************************************/
+
+#include <cmath>
+#include <iostream>
+
+#include "area.h"
+#include "cachearray.h"
+#include "common.h"
+#include "decoder.h"
+#include "parameter.h"
+
+using namespace std;
+
+double CacheArray::area_efficiency_threshold = 20.0;
+int CacheArray::ed = 0;
+//Fixed number, make sure timing can be satisfied.
+int CacheArray::delay_wt = 100;
+int CacheArray::cycle_time_wt = 1000;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::area_wt = 10;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::dynamic_power_wt = 10;
+int CacheArray::leakage_power_wt = 10;
+//Fixed number, make sure timing can be satisfied.
+int CacheArray::delay_dev = 1000000;
+int CacheArray::cycle_time_dev = 100;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::area_dev = 1000000;
+//Fixed number, This is used to exhaustive search for individual components.
+int CacheArray::dynamic_power_dev = 1000000;
+int CacheArray::leakage_power_dev = 1000000;
+int CacheArray::cycle_time_dev_threshold = 10;
+
+CacheArray::CacheArray(XMLNode* _xml_data,
+ const InputParameter *configure_interface, string _name,
+ enum Device_ty device_ty_, double _clockRate,
+ bool opt_local_, enum Core_type core_ty_, bool _is_default)
+ : McPATComponent(_xml_data), l_ip(*configure_interface),
+ device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
+ is_default(_is_default), sbt_dir_overhead(0) {
+ name = _name;
+ clockRate = _clockRate;
+ if (l_ip.cache_sz < MIN_BUFFER_SIZE) {
+ l_ip.cache_sz = MIN_BUFFER_SIZE;
+ }
+
+ if (!l_ip.error_checking(name)) {
+ exit(1);
+ }
+
+ sbt_tdp_stats.reset();
+ sbt_rtp_stats.reset();
+
+ // Compute initial search point
+ local_result.valid = false;
+ compute_base_power();
+
+ // Set up the cache by searching design space with cacti
+ list<uca_org_t > candidate_solutions(0);
+ list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
+ uca_org_t* temp_res = NULL;
+ double throughput = l_ip.throughput;
+ double latency = l_ip.latency;
+ bool throughput_overflow = true;
+ bool latency_overflow = true;
+
+ if ((local_result.cycle_time - throughput) <= 1e-10 )
+ throughput_overflow = false;
+ if ((local_result.access_time - latency) <= 1e-10)
+ latency_overflow = false;
+
+ if (opt_for_clk && opt_local) {
+ if (throughput_overflow || latency_overflow) {
+ l_ip.ed = ed;
+
+ l_ip.delay_wt = delay_wt;
+ l_ip.cycle_time_wt = cycle_time_wt;
+
+ l_ip.area_wt = area_wt;
+ l_ip.dynamic_power_wt = dynamic_power_wt;
+ l_ip.leakage_power_wt = leakage_power_wt;
+
+ l_ip.delay_dev = delay_dev;
+ l_ip.cycle_time_dev = cycle_time_dev;
+
+ l_ip.area_dev = area_dev;
+ l_ip.dynamic_power_dev = dynamic_power_dev;
+ l_ip.leakage_power_dev = leakage_power_dev;
+
+ //Reset overflow flag before start optimization iterations
+ throughput_overflow = true;
+ latency_overflow = true;
+
+ //Clean up the result for optimized for ED^2P
+ temp_res = &local_result;
+ temp_res->cleanup();
+ }
+
+
+ while ((throughput_overflow || latency_overflow) &&
+ l_ip.cycle_time_dev > cycle_time_dev_threshold) {
+ compute_base_power();
+
+ //This is the time_dev to be used for next iteration
+ l_ip.cycle_time_dev -= cycle_time_dev_threshold;
+
+ // from best area to worst area -->worst timing to best timing
+ if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
+ (local_result.access_time - latency) <= 1e-10) ||
+ (local_result.data_array2->area_efficiency <
+ area_efficiency_threshold && l_ip.assoc == 0)) {
+ //if no satisfiable solution is found,the most aggressive one
+ //is left
+ candidate_solutions.push_back(local_result);
+ if (((local_result.cycle_time - throughput) <= 1e-10) &&
+ ((local_result.access_time - latency) <= 1e-10)) {
+ //ensure stop opt not because of cam
+ throughput_overflow = false;
+ latency_overflow = false;
+ }
+
+ } else {
+ if ((local_result.cycle_time - throughput) <= 1e-10)
+ throughput_overflow = false;
+ if ((local_result.access_time - latency) <= 1e-10)
+ latency_overflow = false;
+
+ //if not >10 local_result is the last result, it cannot be
+ //cleaned up
+ if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
+ //Only solutions not saved in the list need to be
+ //cleaned up
+ temp_res = &local_result;
+ temp_res->cleanup();
+ }
+ }
+ }
+
+
+ if (l_ip.assoc > 0) {
+ //For array structures except CAM and FA, Give warning but still
+ //provide a result with best timing found
+ if (throughput_overflow == true)
+ cout << "Warning: " << name
+ << " array structure cannot satisfy throughput constraint."
+ << endl;
+ if (latency_overflow == true)
+ cout << "Warning: " << name
+ << " array structure cannot satisfy latency constraint."
+ << endl;
+ }
+
+ double min_dynamic_energy = BIGNUM;
+ if (candidate_solutions.empty() == false) {
+ local_result.valid = true;
+ for (candidate_iter = candidate_solutions.begin();
+ candidate_iter != candidate_solutions.end();
+ ++candidate_iter) {
+ if (min_dynamic_energy >
+ (candidate_iter)->power.readOp.dynamic) {
+ min_dynamic_energy =
+ (candidate_iter)->power.readOp.dynamic;
+ min_dynamic_energy_iter = candidate_iter;
+ local_result = *(min_dynamic_energy_iter);
+
+ } else {
+ candidate_iter->cleanup() ;
+ }
+
+ }
+
+
+ }
+ candidate_solutions.clear();
+ }
+
+ double long_channel_device_reduction =
+ longer_channel_device_reduction(device_ty, core_ty);
+
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ double chip_PR_overhead = g_tp.chip_layout_overhead;
+ double total_overhead = macro_layout_overhead * chip_PR_overhead;
+ local_result.area *= total_overhead;
+
+ //maintain constant power density
+ double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
+
+ double sckRation = g_tp.sckt_co_eff;
+ local_result.power.readOp.dynamic *= sckRation;
+ local_result.power.writeOp.dynamic *= sckRation;
+ local_result.power.searchOp.dynamic *= sckRation;
+ local_result.power.readOp.leakage *= l_ip.nbanks;
+ local_result.power.readOp.longer_channel_leakage =
+ local_result.power.readOp.leakage * long_channel_device_reduction;
+ local_result.power = local_result.power * pppm_t;
+
+ local_result.data_array2->power.readOp.dynamic *= sckRation;
+ local_result.data_array2->power.writeOp.dynamic *= sckRation;
+ local_result.data_array2->power.searchOp.dynamic *= sckRation;
+ local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.data_array2->power.readOp.longer_channel_leakage =
+ local_result.data_array2->power.readOp.leakage *
+ long_channel_device_reduction;
+ local_result.data_array2->power = local_result.data_array2->power * pppm_t;
+
+
+ if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
+ local_result.tag_array2->power.readOp.dynamic *= sckRation;
+ local_result.tag_array2->power.writeOp.dynamic *= sckRation;
+ local_result.tag_array2->power.searchOp.dynamic *= sckRation;
+ local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.tag_array2->power.readOp.longer_channel_leakage =
+ local_result.tag_array2->power.readOp.leakage *
+ long_channel_device_reduction;
+ local_result.tag_array2->power =
+ local_result.tag_array2->power * pppm_t;
+ }
+}
+
+void CacheArray::compute_base_power() {
+ local_result = cacti_interface(&l_ip);
+}
+
+void CacheArray::computeArea() {
+ area.set_area(local_result.area);
+ output_data.area = local_result.area / 1e6;
+}
+
+void CacheArray::computeEnergy() {
+ // Set the leakage power numbers
+ output_data.subthreshold_leakage_power = local_result.power.readOp.leakage;
+ output_data.gate_leakage_power = local_result.power.readOp.gate_leakage;
+
+ if (l_ip.assoc && l_ip.is_cache) {
+ // This is a standard cache array with data and tags
+ // Calculate peak dynamic power
+ output_data.peak_dynamic_power =
+ (local_result.tag_array2->power.readOp.dynamic +
+ local_result.data_array2->power.readOp.dynamic) *
+ tdp_stats.readAc.hit +
+ (local_result.tag_array2->power.readOp.dynamic) *
+ tdp_stats.readAc.miss +
+ (local_result.tag_array2->power.readOp.dynamic +
+ local_result.data_array2->power.writeOp.dynamic) *
+ tdp_stats.writeAc.hit +
+ (local_result.tag_array2->power.readOp.dynamic) *
+ tdp_stats.writeAc.miss;
+ output_data.peak_dynamic_power *= clockRate;
+
+ // Calculate the runtime dynamic power
+ output_data.runtime_dynamic_energy =
+ local_result.data_array2->power.readOp.dynamic *
+ rtp_stats.dataReadAc.access +
+ local_result.data_array2->power.writeOp.dynamic *
+ rtp_stats.dataWriteAc.access +
+ (local_result.tag_array2->power.readOp.dynamic *
+ rtp_stats.tagReadAc.access +
+ local_result.tag_array2->power.writeOp.dynamic *
+ rtp_stats.tagWriteAc.access) * l_ip.assoc;
+ } else {
+ // Calculate peak dynamic power
+ output_data.peak_dynamic_power =
+ local_result.power.readOp.dynamic * tdp_stats.readAc.access +
+ local_result.power.writeOp.dynamic * tdp_stats.writeAc.access +
+ local_result.power.searchOp.dynamic * tdp_stats.searchAc.access;
+ output_data.peak_dynamic_power *= clockRate;
+
+ // Calculate the runtime dynamic power
+ output_data.runtime_dynamic_energy =
+ local_result.power.readOp.dynamic * rtp_stats.readAc.access +
+ local_result.power.writeOp.dynamic * rtp_stats.writeAc.access +
+ local_result.power.searchOp.dynamic * rtp_stats.searchAc.access;
+ }
+
+ // An SBT directory has more dynamic power
+ if (sbt_dir_overhead > 0) {
+ // Calculate peak dynamic power
+ output_data.peak_dynamic_power +=
+ (computeSBTDynEnergy(&sbt_tdp_stats) * clockRate);
+
+ // Calculate the runtime dynamic power
+ output_data.runtime_dynamic_energy +=
+ computeSBTDynEnergy(&sbt_rtp_stats);
+ }
+}
+
+CacheArray::~CacheArray() {
+ local_result.cleanup();
+}