summaryrefslogtreecommitdiff
path: root/src/mem/ruby/network/fault_model/FaultModel.cc
diff options
context:
space:
mode:
authorTushar Krishna <tushar@csail.mit.edu>2011-11-04 18:40:22 -0400
committerTushar Krishna <tushar@csail.mit.edu>2011-11-04 18:40:22 -0400
commitac993964a93b7d2190aa178dc0d5c1c0ade4a966 (patch)
tree292a8fd447690367721ec955ac2b3895d09731df /src/mem/ruby/network/fault_model/FaultModel.cc
parent5ff6e05ffdf926523969de471044c9de8b00d593 (diff)
downloadgem5-ac993964a93b7d2190aa178dc0d5c1c0ade4a966.tar.xz
GARNET: adding a fault model for resilient on-chip network research.
This patch adds a fault model, which provides the probability of a number of architectural faults in the interconnection network (e.g., data corruption, misrouting). These probabilities can be used to realistically inject faults in GARNET and faithfully evaluate the effectiveness of novel resilient NoC architectures.
Diffstat (limited to 'src/mem/ruby/network/fault_model/FaultModel.cc')
-rw-r--r--src/mem/ruby/network/fault_model/FaultModel.cc278
1 files changed, 278 insertions, 0 deletions
diff --git a/src/mem/ruby/network/fault_model/FaultModel.cc b/src/mem/ruby/network/fault_model/FaultModel.cc
new file mode 100644
index 000000000..195f7c66c
--- /dev/null
+++ b/src/mem/ruby/network/fault_model/FaultModel.cc
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2011 Massachusetts Institute of Technology
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Konstantinos Aisopos
+ */
+
+/*
+ * Official Tool Website: www.mit.edu/~kaisopos/FaultModel
+ *
+ * If you use our tool for academic research, we request that you cite:
+ * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling
+ * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip.
+ * Proceedings of the 48th Design Automation Conference (DAC'11)
+ */
+
+// C includes
+#include <assert.h>
+#include <stdio.h>
+
+// C++ includes
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+// GEM5 includes
+#include "FaultModel.hh"
+#include "base/misc.hh"
+
+#define MAX(a,b) ((a > b) ? (a) : (b))
+
+
+FaultModel::FaultModel(const Params *p) : SimObject(p)
+{
+ // read configurations into "configurations" vector
+ // format: <buff/vc> <vcs> <10 fault types>
+ bool more_records = true;
+ for (int i = 0; more_records; i += (fields_per_conf_record)){
+ system_conf configuration;
+ configuration.buff_per_vc =
+ p->baseline_fault_vector_database[i + conf_record_buff_per_vc];
+ configuration.vcs =
+ p->baseline_fault_vector_database[i + conf_record_vcs];
+ for (int fault_index = 0; fault_index < number_of_fault_types;
+ fault_index++){
+ configuration.fault_type[fault_index] =
+ p->baseline_fault_vector_database[i +
+ conf_record_first_fault_type + fault_index] / 100;
+ }
+ configurations.push_back(configuration);
+ if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){
+ more_records = false;
+ }
+ }
+
+ // read temperature weights into "temperature_weights" vector
+ // format: <temperature> <weight>
+ more_records = true;
+ for (int i = 0; more_records; i += (fields_per_temperature_record)){
+ int record_temperature =
+ p->temperature_weights_database[i + temperature_record_temp];
+ int record_weight =
+ p->temperature_weights_database[i + temperature_record_weight];
+ static int first_record = true;
+ if (first_record){
+ for (int temperature = 0; temperature < record_temperature;
+ temperature++){
+ temperature_weights.push_back(0);
+ }
+ first_record = false;
+ }
+ assert(record_temperature == temperature_weights.size());
+ temperature_weights.push_back(record_weight);
+ if (p->temperature_weights_database[i +
+ fields_per_temperature_record] < 0){
+ more_records = false;
+ }
+ }
+}
+
+string
+FaultModel::fault_type_to_string(int ft)
+{
+ if (ft == data_corruption__few_bits){
+ return "data_corruption__few_bits";
+ } else if (ft == data_corruption__all_bits){
+ return "data_corruption__all_bits";
+ } else if (ft == flit_conservation__flit_duplication){
+ return "flit_conservation__flit_duplication";
+ } else if (ft == flit_conservation__flit_loss_or_split){
+ return "flit_conservation__flit_loss_or_split";
+ } else if (ft == misrouting){
+ return "misrouting";
+ } else if (ft == credit_conservation__credit_generation){
+ return "credit_conservation__credit_generation";
+ } else if (ft == credit_conservation__credit_loss){
+ return "credit_conservation__credit_loss";
+ } else if (ft == erroneous_allocation__VC){
+ return "erroneous_allocation__VC";
+ } else if (ft == erroneous_allocation__switch){
+ return "erroneous_allocation__switch";
+ } else if (ft == unfair_arbitration){
+ return "unfair_arbitration";
+ } else if (ft == number_of_fault_types){
+ return "none";
+ } else {
+ return "none";
+ }
+}
+
+
+int
+FaultModel::declare_router(int number_of_inputs,
+ int number_of_outputs,
+ int number_of_vcs_per_input,
+ int number_of_buff_per_data_vc,
+ int number_of_buff_per_ctrl_vc)
+{
+ // check inputs (are they legal?)
+ if (number_of_inputs <= 0 || number_of_outputs <= 0 ||
+ number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 ||
+ number_of_buff_per_ctrl_vc <= 0){
+ fatal("Fault Model: ERROR in argument of FaultModel_declare_router!");
+ }
+ int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc,
+ number_of_buff_per_ctrl_vc);
+ int total_vcs = number_of_inputs * number_of_vcs_per_input;
+ if (total_vcs > MAX_VCs){
+ fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported");
+ }
+ if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){
+ fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high");
+ }
+
+ // link the router to a DB record
+ int record_hit = -1;
+ for (int record = 0; record < configurations.size(); record++){
+ if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&&
+ (configurations[record].vcs == total_vcs)){
+ record_hit = record;
+ }
+ }
+ if (record_hit == -1){
+ panic("Fault Model: ERROR! configuration not found in DB. BUG?");
+ }
+
+ // remember the router and return its ID
+ routers.push_back(configurations[record_hit]);
+ static int router_index = 0;
+ return router_index++;
+}
+
+bool
+FaultModel::fault_vector(int routerID,
+ int temperature_input,
+ float fault_vector[])
+{
+ bool ok = true;
+
+ // is the routerID recorded?
+ if (routerID < 0 || routerID >= ((int) routers.size())){
+ warn("Fault Model: ERROR! unknown router ID argument.");
+ fatal("Fault Model: Did you enable the fault model flag)?");
+ }
+
+ // is the temperature too high/too low?
+ int temperature = temperature_input;
+ if (temperature_input >= ((int) temperature_weights.size())){
+ ok = false;
+ warn_once("Fault Model: Temperature exceeded simulated upper bound.");
+ warn_once("Fault Model: The fault model is not accurate any more.");
+ temperature = (temperature_weights.size() - 1);
+ } else if (temperature_input < 0){
+ ok = false;
+ warn_once("Fault Model: Temperature exceeded simulated lower bound.");
+ warn_once("Fault Model: The fault model is not accurate any more.");
+ temperature = 0;
+ }
+
+ // recover the router record and return its fault vector
+ for (int i = 0; i < number_of_fault_types; i++){
+ fault_vector[i] = routers[routerID].fault_type[i] *
+ ((float)temperature_weights[temperature]);
+ }
+ return ok;
+}
+
+bool
+FaultModel::fault_prob(int routerID,
+ int temperature_input,
+ float *aggregate_fault_prob)
+{
+ *aggregate_fault_prob = 1.0;
+ bool ok = true;
+
+ // is the routerID recorded?
+ if (routerID < 0 || routerID >= ((int) routers.size())){
+ warn("Fault Model: ERROR! unknown router ID argument.");
+ fatal("Fault Model: Did you enable the fault model flag)?");
+ }
+
+ // is the temperature too high/too low?
+ int temperature = temperature_input;
+ if (temperature_input >= ((int) temperature_weights.size()) ){
+ ok = false;
+ warn_once("Fault Model: Temperature exceeded simulated upper bound.");
+ warn_once("Fault Model: The fault model is not accurate any more.");
+ temperature = (temperature_weights.size()-1);
+ } else if (temperature_input < 0){
+ ok = false;
+ warn_once("Fault Model: Temperature exceeded simulated lower bound.");
+ warn_once("Fault Model: The fault model is not accurate any more.");
+ temperature = 0;
+ }
+
+ // recover the router record and return its aggregate fault probability
+ for (int i = 0; i < number_of_fault_types; i++){
+ *aggregate_fault_prob= *aggregate_fault_prob *
+ ( 1.0 - (routers[routerID].fault_type[i] *
+ ((float)temperature_weights[temperature])) );
+ }
+ *aggregate_fault_prob = 1.0 - *aggregate_fault_prob;
+ return ok;
+}
+
+// this function is used only for debugging purposes
+void
+FaultModel::print(void)
+{
+ cout << "--- PRINTING configurations ---\n";
+ for (int record = 0; record < configurations.size(); record++){
+ cout << "(" << record << ") ";
+ cout << "VCs=" << configurations[record].vcs << " ";
+ cout << "Buff/VC=" << configurations[record].buff_per_vc << " [";
+ for (int fault_type_num = 0;
+ fault_type_num < number_of_fault_types;
+ fault_type_num++){
+ cout << (100 * configurations[record].fault_type[fault_type_num]);
+ cout << "% ";
+ }
+ cout << "]\n";
+ }
+ cout << "--- PRINTING temperature weights ---\n";
+ for (int record = 0; record < temperature_weights.size(); record++){
+ cout << "temperature=" << record << " => ";
+ cout << "weight=" << temperature_weights[record];
+ cout << "\n";
+ }
+}
+
+FaultModel *
+FaultModelParams::create()
+{
+ return new FaultModel(this);
+}