diff options
Diffstat (limited to 'src/mem/ruby/network/fault_model/FaultModel.cc')
-rw-r--r-- | src/mem/ruby/network/fault_model/FaultModel.cc | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/src/mem/ruby/network/fault_model/FaultModel.cc b/src/mem/ruby/network/fault_model/FaultModel.cc new file mode 100644 index 000000000..195f7c66c --- /dev/null +++ b/src/mem/ruby/network/fault_model/FaultModel.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2011 Massachusetts Institute of Technology + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Konstantinos Aisopos + */ + +/* + * Official Tool Website: www.mit.edu/~kaisopos/FaultModel + * + * If you use our tool for academic research, we request that you cite: + * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling + * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip. + * Proceedings of the 48th Design Automation Conference (DAC'11) + */ + +// C includes +#include <assert.h> +#include <stdio.h> + +// C++ includes +#include <fstream> +#include <iostream> +#include <vector> + +// GEM5 includes +#include "FaultModel.hh" +#include "base/misc.hh" + +#define MAX(a,b) ((a > b) ? (a) : (b)) + + +FaultModel::FaultModel(const Params *p) : SimObject(p) +{ + // read configurations into "configurations" vector + // format: <buff/vc> <vcs> <10 fault types> + bool more_records = true; + for (int i = 0; more_records; i += (fields_per_conf_record)){ + system_conf configuration; + configuration.buff_per_vc = + p->baseline_fault_vector_database[i + conf_record_buff_per_vc]; + configuration.vcs = + p->baseline_fault_vector_database[i + conf_record_vcs]; + for (int fault_index = 0; fault_index < number_of_fault_types; + fault_index++){ + configuration.fault_type[fault_index] = + p->baseline_fault_vector_database[i + + conf_record_first_fault_type + fault_index] / 100; + } + configurations.push_back(configuration); + if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){ + more_records = false; + } + } + + // read temperature weights into "temperature_weights" vector + // format: <temperature> <weight> + more_records = true; + for (int i = 0; more_records; i += (fields_per_temperature_record)){ + int record_temperature = + p->temperature_weights_database[i + temperature_record_temp]; + int record_weight = + p->temperature_weights_database[i + temperature_record_weight]; + static int first_record = true; + if (first_record){ + for (int temperature = 0; temperature < record_temperature; + temperature++){ + temperature_weights.push_back(0); + } + first_record = false; + } + assert(record_temperature == temperature_weights.size()); + temperature_weights.push_back(record_weight); + if (p->temperature_weights_database[i + + fields_per_temperature_record] < 0){ + more_records = false; + } + } +} + +string +FaultModel::fault_type_to_string(int ft) +{ + if (ft == data_corruption__few_bits){ + return "data_corruption__few_bits"; + } else if (ft == data_corruption__all_bits){ + return "data_corruption__all_bits"; + } else if (ft == flit_conservation__flit_duplication){ + return "flit_conservation__flit_duplication"; + } else if (ft == flit_conservation__flit_loss_or_split){ + return "flit_conservation__flit_loss_or_split"; + } else if (ft == misrouting){ + return "misrouting"; + } else if (ft == credit_conservation__credit_generation){ + return "credit_conservation__credit_generation"; + } else if (ft == credit_conservation__credit_loss){ + return "credit_conservation__credit_loss"; + } else if (ft == erroneous_allocation__VC){ + return "erroneous_allocation__VC"; + } else if (ft == erroneous_allocation__switch){ + return "erroneous_allocation__switch"; + } else if (ft == unfair_arbitration){ + return "unfair_arbitration"; + } else if (ft == number_of_fault_types){ + return "none"; + } else { + return "none"; + } +} + + +int +FaultModel::declare_router(int number_of_inputs, + int number_of_outputs, + int number_of_vcs_per_input, + int number_of_buff_per_data_vc, + int number_of_buff_per_ctrl_vc) +{ + // check inputs (are they legal?) + if (number_of_inputs <= 0 || number_of_outputs <= 0 || + number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 || + number_of_buff_per_ctrl_vc <= 0){ + fatal("Fault Model: ERROR in argument of FaultModel_declare_router!"); + } + int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc, + number_of_buff_per_ctrl_vc); + int total_vcs = number_of_inputs * number_of_vcs_per_input; + if (total_vcs > MAX_VCs){ + fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported"); + } + if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){ + fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high"); + } + + // link the router to a DB record + int record_hit = -1; + for (int record = 0; record < configurations.size(); record++){ + if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&& + (configurations[record].vcs == total_vcs)){ + record_hit = record; + } + } + if (record_hit == -1){ + panic("Fault Model: ERROR! configuration not found in DB. BUG?"); + } + + // remember the router and return its ID + routers.push_back(configurations[record_hit]); + static int router_index = 0; + return router_index++; +} + +bool +FaultModel::fault_vector(int routerID, + int temperature_input, + float fault_vector[]) +{ + bool ok = true; + + // is the routerID recorded? + if (routerID < 0 || routerID >= ((int) routers.size())){ + warn("Fault Model: ERROR! unknown router ID argument."); + fatal("Fault Model: Did you enable the fault model flag)?"); + } + + // is the temperature too high/too low? + int temperature = temperature_input; + if (temperature_input >= ((int) temperature_weights.size())){ + ok = false; + warn_once("Fault Model: Temperature exceeded simulated upper bound."); + warn_once("Fault Model: The fault model is not accurate any more."); + temperature = (temperature_weights.size() - 1); + } else if (temperature_input < 0){ + ok = false; + warn_once("Fault Model: Temperature exceeded simulated lower bound."); + warn_once("Fault Model: The fault model is not accurate any more."); + temperature = 0; + } + + // recover the router record and return its fault vector + for (int i = 0; i < number_of_fault_types; i++){ + fault_vector[i] = routers[routerID].fault_type[i] * + ((float)temperature_weights[temperature]); + } + return ok; +} + +bool +FaultModel::fault_prob(int routerID, + int temperature_input, + float *aggregate_fault_prob) +{ + *aggregate_fault_prob = 1.0; + bool ok = true; + + // is the routerID recorded? + if (routerID < 0 || routerID >= ((int) routers.size())){ + warn("Fault Model: ERROR! unknown router ID argument."); + fatal("Fault Model: Did you enable the fault model flag)?"); + } + + // is the temperature too high/too low? + int temperature = temperature_input; + if (temperature_input >= ((int) temperature_weights.size()) ){ + ok = false; + warn_once("Fault Model: Temperature exceeded simulated upper bound."); + warn_once("Fault Model: The fault model is not accurate any more."); + temperature = (temperature_weights.size()-1); + } else if (temperature_input < 0){ + ok = false; + warn_once("Fault Model: Temperature exceeded simulated lower bound."); + warn_once("Fault Model: The fault model is not accurate any more."); + temperature = 0; + } + + // recover the router record and return its aggregate fault probability + for (int i = 0; i < number_of_fault_types; i++){ + *aggregate_fault_prob= *aggregate_fault_prob * + ( 1.0 - (routers[routerID].fault_type[i] * + ((float)temperature_weights[temperature])) ); + } + *aggregate_fault_prob = 1.0 - *aggregate_fault_prob; + return ok; +} + +// this function is used only for debugging purposes +void +FaultModel::print(void) +{ + cout << "--- PRINTING configurations ---\n"; + for (int record = 0; record < configurations.size(); record++){ + cout << "(" << record << ") "; + cout << "VCs=" << configurations[record].vcs << " "; + cout << "Buff/VC=" << configurations[record].buff_per_vc << " ["; + for (int fault_type_num = 0; + fault_type_num < number_of_fault_types; + fault_type_num++){ + cout << (100 * configurations[record].fault_type[fault_type_num]); + cout << "% "; + } + cout << "]\n"; + } + cout << "--- PRINTING temperature weights ---\n"; + for (int record = 0; record < temperature_weights.size(); record++){ + cout << "temperature=" << record << " => "; + cout << "weight=" << temperature_weights[record]; + cout << "\n"; + } +} + +FaultModel * +FaultModelParams::create() +{ + return new FaultModel(this); +} |