From 2f30950143cc70bc42a3c8a4111d7cf8198ec881 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Mon, 11 May 2009 10:38:43 -0700 Subject: ruby: Import ruby and slicc from GEMS We eventually plan to replace the m5 cache hierarchy with the GEMS hierarchy, but for now we will make both live alongside eachother. --- src/mem/ruby/network/simple/Topology.cc | 801 ++++++++++++++++++++++++++++++++ 1 file changed, 801 insertions(+) create mode 100644 src/mem/ruby/network/simple/Topology.cc (limited to 'src/mem/ruby/network/simple/Topology.cc') diff --git a/src/mem/ruby/network/simple/Topology.cc b/src/mem/ruby/network/simple/Topology.cc new file mode 100644 index 000000000..db886052f --- /dev/null +++ b/src/mem/ruby/network/simple/Topology.cc @@ -0,0 +1,801 @@ + +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Topology.C + * + * Description: See Topology.h + * + * $Id$ + * + * */ + +#include "Topology.hh" +#include "NetDest.hh" +#include "Network.hh" +#include "TopologyType.hh" +#include "RubyConfig.hh" +#include "util.hh" +#include "MachineType.hh" +#include "Protocol.hh" +#include + +static const int INFINITE_LATENCY = 10000; // Yes, this is a big hack +static const int DEFAULT_BW_MULTIPLIER = 1; // Just to be consistent with above :) + +// Note: In this file, we use the first 2*m_nodes SwitchIDs to +// represent the input and output endpoint links. These really are +// not 'switches', as they will not have a Switch object allocated for +// them. The first m_nodes SwitchIDs are the links into the network, +// the second m_nodes set of SwitchIDs represent the the output queues +// of the network. + +// Helper functions based on chapter 29 of Cormen et al. +static Matrix extend_shortest_path(const Matrix& current_dist, Matrix& latencies, Matrix& inter_switches); +static Matrix shortest_path(const Matrix& weights, Matrix& latencies, Matrix& inter_switches); +static bool link_is_shortest_path_to_node(SwitchID src, SwitchID next, SwitchID final, const Matrix& weights, const Matrix& dist); +static NetDest shortest_path_to_node(SwitchID src, SwitchID next, const Matrix& weights, const Matrix& dist); + + +Topology::Topology(Network* network_ptr, int number_of_nodes) +{ + m_network_ptr = network_ptr; + m_nodes = number_of_nodes; + m_number_of_switches = 0; + init(); +} + +void Topology::init() +{ + if (m_nodes == 1) { + SwitchID id = newSwitchID(); + addLink(0, id, NETWORK_LINK_LATENCY); + addLink(id, 1, NETWORK_LINK_LATENCY); + return; + } + + // topology-specific set-up + TopologyType topology = string_to_TopologyType(g_NETWORK_TOPOLOGY); + switch (topology) { + case TopologyType_TORUS_2D: + make2DTorus(); + break; + case TopologyType_HIERARCHICAL_SWITCH: + makeHierarchicalSwitch(FAN_OUT_DEGREE); + break; + case TopologyType_CROSSBAR: + makeHierarchicalSwitch(1024); + break; + case TopologyType_PT_TO_PT: + makePtToPt(); + break; + case TopologyType_FILE_SPECIFIED: + makeFileSpecified(); + break; + default: + ERROR_MSG("Unexpected typology type") + } + + // initialize component latencies record + m_component_latencies.setSize(0); + m_component_inter_switches.setSize(0); +} + +void Topology::makeSwitchesPerChip(Vector< Vector < SwitchID > > &nodePairs, Vector &latencies, Vector &bw_multis, int numberOfChipSwitches) +{ + + Vector < SwitchID > nodes; // temporary buffer + nodes.setSize(2); + + Vector endpointConnectionExist; // used to ensure all endpoints are connected to the network + endpointConnectionExist.setSize(m_nodes); + // initialize endpoint check vector + for (int k = 0; k < endpointConnectionExist.size(); k++) { + endpointConnectionExist[k] = false; + } + + Vector componentCount; + componentCount.setSize(MachineType_NUM); + for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { + componentCount[mType] = 0; + } + + // components to/from network links + for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { + for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { + for (int component = 0; component < MachineType_chip_count(mType, chip); component++) { + + int latency = -1; + int bw_multiplier = -1; // internal link bw multiplier of the global bandwidth + if (mType != MachineType_Directory) { + latency = ON_CHIP_LINK_LATENCY; // internal link latency + bw_multiplier = 10; // internal link bw multiplier of the global bandwidth + } else { + latency = NETWORK_LINK_LATENCY; // local memory latency + bw_multiplier = 1; // local memory link bw multiplier of the global bandwidth + } + nodes[0] = MachineType_base_number(mType)+componentCount[mType]; + nodes[1] = chip+m_nodes*2; // this is the chip's internal switch id # + + // insert link + nodePairs.insertAtBottom(nodes); + latencies.insertAtBottom(latency); + //bw_multis.insertAtBottom(bw_multiplier); + bw_multis.insertAtBottom(componentCount[mType]+MachineType_base_number((MachineType)mType)); + + // opposite direction link + Vector < SwitchID > otherDirectionNodes; + otherDirectionNodes.setSize(2); + otherDirectionNodes[0] = nodes[1]; + otherDirectionNodes[1] = nodes[0]+m_nodes; + nodePairs.insertAtBottom(otherDirectionNodes); + latencies.insertAtBottom(latency); + bw_multis.insertAtBottom(bw_multiplier); + + assert(!endpointConnectionExist[nodes[0]]); + endpointConnectionExist[nodes[0]] = true; + componentCount[mType]++; + } + } + } + + // make sure all enpoints are connected in the soon to be created network + for (int k = 0; k < endpointConnectionExist.size(); k++) { + if (endpointConnectionExist[k] == false) { + cerr << "Error: Unconnected Endpoint: " << k << endl; + exit(1); + } + } + + // secondary check to ensure we saw the correct machine counts + for (MachineType mType = MachineType_FIRST; mType < MachineType_NUM; ++mType) { + assert(componentCount[mType] == MachineType_base_count((MachineType)mType)); + } + +} + +// 2D torus topology + +void Topology::make2DTorus() +{ + Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file + Vector latencies; // link latencies for each link extracted + Vector bw_multis; // bw multipliers for each link extracted + + Vector < SwitchID > nodes; // temporary buffer + nodes.setSize(2); + + // number of inter-chip switches + int numberOfTorusSwitches = m_nodes/MachineType_base_level(MachineType_NUM); + // one switch per machine node grouping + Vector torusSwitches; + for(int i=0; i= 2*m_nodes+numberOfTorusSwitches){ // determine if node is on the bottom + // sorin: bad bug if this is a > instead of a >= + nodes[1] = nodes[0] + lengthOfSide - (lengthOfSide*lengthOfSide); + } else { + nodes[1] = nodes[0] + lengthOfSide; + } + nodePairs.insertAtBottom(nodes); + latencies.insertAtBottom(latency); + bw_multis.insertAtBottom(bw_multiplier); + + } + + // add links + ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size()) + for (int k = 0; k < nodePairs.size(); k++) { + ASSERT(nodePairs[k].size() == 2); + addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k]); + } + +} + +// hierarchical switch topology +void Topology::makeHierarchicalSwitch(int fan_out_degree) +{ + // Make a row of switches with only one input. This extra row makes + // sure the links out of the nodes have latency and limited + // bandwidth. + + // number of inter-chip switches, i.e. the last row of switches + Vector last_level; + for (int i=0; i next_level; + while(last_level.size() > 1) { + for (int i=0; i out_level; + for (int i=0; i fan_out_degree) { + next_level.insertAtBottom(newSwitchID()); + } else { + next_level.insertAtBottom(root_switch); + } + } + // Add this link to the last switch we created + addLink(next_level[next_level.size()-1], out_level[i], NETWORK_LINK_LATENCY); + } + + // Make the current level the last level to get ready for next + // iteration + out_level = next_level; + next_level.clear(); + } +} + +// one internal node per chip, point to point links between chips +void Topology::makePtToPt() +{ + Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file + Vector latencies; // link latencies for each link extracted + Vector bw_multis; // bw multipliers for each link extracted + + Vector < SwitchID > nodes; + nodes.setSize(2); + + // number of inter-chip switches + int numberOfChipSwitches = m_nodes/MachineType_base_level(MachineType_NUM); + // two switches per machine node grouping + // one intra-chip switch and one inter-chip switch per chip + for(int i=0; i otherDirectionNodes; + otherDirectionNodes.setSize(2); + otherDirectionNodes[0] = nodes[1]; + otherDirectionNodes[1] = nodes[0]; + nodePairs.insertAtBottom(otherDirectionNodes); + latencies.insertAtBottom(latency); + bw_multis.insertAtBottom(bw_multiplier); + } + + // point-to-point network between chips + for (int chip = 0; chip < RubyConfig::numberOfChips(); chip++) { + for (int other_chip = chip+1; other_chip < RubyConfig::numberOfChips(); other_chip++) { + + int latency = NETWORK_LINK_LATENCY; // external link latency + int bw_multiplier = 1; // external link bw multiplier of the global bandwidth + + nodes[0] = chip+m_nodes*2+RubyConfig::numberOfChips(); + nodes[1] = other_chip+m_nodes*2+RubyConfig::numberOfChips(); + + // insert link + nodePairs.insertAtBottom(nodes); + latencies.insertAtBottom(latency); + bw_multis.insertAtBottom(bw_multiplier); + + // opposite direction link + Vector < SwitchID > otherDirectionNodes; + otherDirectionNodes.setSize(2); + otherDirectionNodes[0] = nodes[1]; + otherDirectionNodes[1] = nodes[0]; + nodePairs.insertAtBottom(otherDirectionNodes); + latencies.insertAtBottom(latency); + bw_multis.insertAtBottom(bw_multiplier); + } + } + + // add links + ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size()) + for (int k = 0; k < nodePairs.size(); k++) { + ASSERT(nodePairs[k].size() == 2); + addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k]); + } +} + +// make a network as described by the networkFile +void Topology::makeFileSpecified() +{ + + Vector< Vector < SwitchID > > nodePairs; // node pairs extracted from the file + Vector latencies; // link latencies for each link extracted + Vector bw_multis; // bw multipliers for each link extracted + Vector weights; // link weights used to enfore e-cube deadlock free routing + Vector< SwitchID > int_network_switches; // internal switches extracted from the file + Vector endpointConnectionExist; // used to ensure all endpoints are connected to the network + + endpointConnectionExist.setSize(m_nodes); + + // initialize endpoint check vector + for (int k = 0; k < endpointConnectionExist.size(); k++) { + endpointConnectionExist[k] = false; + } + + string filename = "network/simple/Network_Files/"; + filename = filename+g_CACHE_DESIGN + +"_Procs-"+int_to_string(RubyConfig::numberOfProcessors()) + +"_ProcsPerChip-"+int_to_string(RubyConfig::numberOfProcsPerChip()) + +"_L2Banks-"+int_to_string(RubyConfig::numberOfL2Cache()) + +"_Memories-"+int_to_string(RubyConfig::numberOfMemories()) + +".txt"; + + if (g_SIMICS) { + filename = "../../../ruby/"+filename; + } + ifstream networkFile( filename.c_str() , ios::in); + if (!networkFile.is_open()) { + cerr << "Error: Could not open network file: " << filename << endl; + cerr << "Probably no network file exists for " << RubyConfig::numberOfProcessors() + << " processors and " << RubyConfig::numberOfProcsPerChip() << " procs per chip " << endl; + exit(1); + } + + string line = ""; + + while (!networkFile.eof()) { + + Vector < SwitchID > nodes; + nodes.setSize(2); + int latency = -1; // null latency + int weight = -1; // null weight + int bw_multiplier = DEFAULT_BW_MULTIPLIER; // default multiplier incase the network file doesn't define it + int i = 0; // node pair index + int varsFound = 0; // number of varsFound on the line + int internalNodes = 0; // used to determine if the link is between 2 internal nodes + std::getline(networkFile, line, '\n'); + string varStr = string_split(line, ' '); + + // parse the current line in the file + while (varStr != "") { + string label = string_split(varStr, ':'); + + // valid node labels + if (label == "ext_node" || label == "int_node") { + ASSERT(i < 2); // one link between 2 switches per line + varsFound++; + bool isNewIntSwitch = true; + if (label == "ext_node") { // input link to node + MachineType machine = string_to_MachineType(string_split(varStr, ':')); + string nodeStr = string_split(varStr, ':'); + if (string_split(varStr, ':') == "bank") { + nodes[i] = MachineType_base_number(machine) + + atoi(nodeStr.c_str()) + + atoi((string_split(varStr, ':')).c_str())*RubyConfig::numberOfChips(); + } else { + nodes[i] = MachineType_base_number(machine) + + atoi(nodeStr.c_str()); + } + // in nodes should be numbered 0 to m_nodes-1 + ASSERT(nodes[i] >= 0 && nodes[i] < m_nodes); + isNewIntSwitch = false; + endpointConnectionExist[nodes[i]] = true; + } + if (label == "int_node") { // interior node + nodes[i] = atoi((string_split(varStr, ':')).c_str())+m_nodes*2; + // in nodes should be numbered >= m_nodes*2 + ASSERT(nodes[i] >= m_nodes*2); + for (int k = 0; k < int_network_switches.size(); k++) { + if (int_network_switches[k] == nodes[i]) { + isNewIntSwitch = false; + } + } + if (isNewIntSwitch) { // if internal switch + m_number_of_switches++; + int_network_switches.insertAtBottom(nodes[i]); + } + internalNodes++; + } + i++; + } else if (label == "link_latency") { + latency = atoi((string_split(varStr, ':')).c_str()); + varsFound++; + } else if (label == "bw_multiplier") { // not necessary, defaults to DEFAULT_BW_MULTIPLIER + bw_multiplier = atoi((string_split(varStr, ':')).c_str()); + } else if (label == "link_weight") { // not necessary, defaults to link_latency + weight = atoi((string_split(varStr, ':')).c_str()); + } else if (label == "processors") { + ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfProcessors()); + } else if (label == "bw_unit") { + ASSERT(atoi((string_split(varStr, ':')).c_str()) == g_endpoint_bandwidth); + } else if (label == "procs_per_chip") { + ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfProcsPerChip()); + } else if (label == "L2banks") { + ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfL2Cache()); + } else if (label == "memories") { + ASSERT(atoi((string_split(varStr, ':')).c_str()) == RubyConfig::numberOfMemories()); + } else { + cerr << "Error: Unexpected Identifier: " << label << endl; + exit(1); + } + varStr = string_split(line, ' '); + } + if (varsFound == 3) { // all three necessary link variables where found so add the link + nodePairs.insertAtBottom(nodes); + latencies.insertAtBottom(latency); + if (weight != -1) { + weights.insertAtBottom(weight); + } else { + weights.insertAtBottom(latency); + } + bw_multis.insertAtBottom(bw_multiplier); + Vector < SwitchID > otherDirectionNodes; + otherDirectionNodes.setSize(2); + otherDirectionNodes[0] = nodes[1]; + if (internalNodes == 2) { // this is an internal link + otherDirectionNodes[1] = nodes[0]; + } else { + otherDirectionNodes[1] = nodes[0]+m_nodes; + } + nodePairs.insertAtBottom(otherDirectionNodes); + latencies.insertAtBottom(latency); + if (weight != -1) { + weights.insertAtBottom(weight); + } else { + weights.insertAtBottom(latency); + } + bw_multis.insertAtBottom(bw_multiplier); + } else { + if (varsFound != 0) { // if this is not a valid link, then no vars should have been found + cerr << "Error in line: " << line << endl; + exit(1); + } + } + } // end of file + + // makes sure all enpoints are connected in the soon to be created network + for (int k = 0; k < endpointConnectionExist.size(); k++) { + if (endpointConnectionExist[k] == false) { + cerr << "Error: Unconnected Endpoint: " << k << endl; + exit(1); + } + } + + ASSERT(nodePairs.size() == latencies.size() && latencies.size() == bw_multis.size() && latencies.size() == weights.size()) + for (int k = 0; k < nodePairs.size(); k++) { + ASSERT(nodePairs[k].size() == 2); + addLink(nodePairs[k][0], nodePairs[k][1], latencies[k], bw_multis[k], weights[k]); + } + + networkFile.close(); +} + +void Topology::createLinks(bool isReconfiguration) +{ + // Find maximum switchID + + SwitchID max_switch_id = 0; + for (int i=0; i 0 && weight != INFINITE_LATENCY) { + NetDest destination_set = shortest_path_to_node(i, j, topology_weights, dist); + assert(latency != -1); + makeLink(i, j, destination_set, latency, weight, bw_multiplier, isReconfiguration); + } + } + } +} + +SwitchID Topology::newSwitchID() +{ + m_number_of_switches++; + return m_number_of_switches-1+m_nodes+m_nodes; +} + +void Topology::addLink(SwitchID src, SwitchID dest, int link_latency) +{ + addLink(src, dest, link_latency, DEFAULT_BW_MULTIPLIER, link_latency); +} + +void Topology::addLink(SwitchID src, SwitchID dest, int link_latency, int bw_multiplier) +{ + addLink(src, dest, link_latency, bw_multiplier, link_latency); +} + +void Topology::addLink(SwitchID src, SwitchID dest, int link_latency, int bw_multiplier, int link_weight) +{ + ASSERT(src <= m_number_of_switches+m_nodes+m_nodes); + ASSERT(dest <= m_number_of_switches+m_nodes+m_nodes); + m_links_src_vector.insertAtBottom(src); + m_links_dest_vector.insertAtBottom(dest); + m_links_latency_vector.insertAtBottom(link_latency); + m_links_weight_vector.insertAtBottom(link_weight); + m_bw_multiplier_vector.insertAtBottom(bw_multiplier); +} + +void Topology::makeLink(SwitchID src, SwitchID dest, const NetDest& routing_table_entry, int link_latency, int link_weight, int bw_multiplier, bool isReconfiguration) +{ + // Make sure we're not trying to connect two end-point nodes directly together + assert((src >= 2*m_nodes) || (dest >= 2*m_nodes)); + + if (src < m_nodes) { + m_network_ptr->makeInLink(src, dest-(2*m_nodes), routing_table_entry, link_latency, bw_multiplier, isReconfiguration); + } else if (dest < 2*m_nodes) { + assert(dest >= m_nodes); + NodeID node = dest-m_nodes; + m_network_ptr->makeOutLink(src-(2*m_nodes), node, routing_table_entry, link_latency, link_weight, bw_multiplier, isReconfiguration); + } else { + assert((src >= 2*m_nodes) && (dest >= 2*m_nodes)); + m_network_ptr->makeInternalLink(src-(2*m_nodes), dest-(2*m_nodes), routing_table_entry, link_latency, link_weight, bw_multiplier, isReconfiguration); + } +} + +void Topology::printConfig(ostream& out) const +{ + assert(m_component_latencies.size() > 0); + + out << "--- Begin Topology Print ---" << endl; + out << endl; + out << "Topology print ONLY indicates the _NETWORK_ latency between two machines" << endl; + out << "It does NOT include the latency within the machines" << endl; + out << endl; + for (int m=0; m " << dest_mach << " net_lat: " + << link_latency+intermediate_switches << endl; // NOTE switches are assumed to have single cycle latency + } + } + } + out << endl; + } + } + + out << "--- End Topology Print ---" << endl; +} + +/**************************************************************************/ + +// The following all-pairs shortest path algorithm is based on the +// discussion from Cormen et al., Chapter 26.1. + +static void extend_shortest_path(Matrix& current_dist, Matrix& latencies, Matrix& inter_switches) +{ + bool change = true; + int nodes = current_dist.size(); + + while (change) { + change = false; + for (int i=0; i= 0); + assert(intermediate_switch < latencies[i].size()); + latencies[i][j] = latencies[i][intermediate_switch] + latencies[intermediate_switch][j]; + } + } + } + } +} + +static Matrix shortest_path(const Matrix& weights, Matrix& latencies, Matrix& inter_switches) +{ + Matrix dist = weights; + extend_shortest_path(dist, latencies, inter_switches); + return dist; +} + +static bool link_is_shortest_path_to_node(SwitchID src, SwitchID next, SwitchID final, + const Matrix& weights, const Matrix& dist) +{ + return (weights[src][next] + dist[next][final] == dist[src][final]); +} + +static NetDest shortest_path_to_node(SwitchID src, SwitchID next, + const Matrix& weights, const Matrix& dist) +{ + NetDest result; + int d = 0; + int machines; + int max_machines; + + machines = MachineType_NUM; + max_machines = MachineType_base_number(MachineType_NUM); + + for (int m=0; m