From dbe8892b761067c5c1e828f889a513ea085b044f Mon Sep 17 00:00:00 2001 From: Tushar Krishna Date: Thu, 6 Oct 2016 14:35:22 -0400 Subject: ruby: garnet2.0 Revamped version of garnet with more optimized single-cycle routers, more configurability, and cleaner code. --- src/mem/ruby/network/garnet2.0/SwitchAllocator.cc | 389 ++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 src/mem/ruby/network/garnet2.0/SwitchAllocator.cc (limited to 'src/mem/ruby/network/garnet2.0/SwitchAllocator.cc') diff --git a/src/mem/ruby/network/garnet2.0/SwitchAllocator.cc b/src/mem/ruby/network/garnet2.0/SwitchAllocator.cc new file mode 100644 index 000000000..7916802a5 --- /dev/null +++ b/src/mem/ruby/network/garnet2.0/SwitchAllocator.cc @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2008 Princeton University + * Copyright (c) 2016 Georgia Institute of Technology + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Niket Agarwal + * Tushar Krishna + */ + + +#include "mem/ruby/network/garnet2.0/SwitchAllocator.hh" + +#include "debug/RubyNetwork.hh" +#include "mem/ruby/network/garnet2.0/GarnetNetwork.hh" +#include "mem/ruby/network/garnet2.0/InputUnit.hh" +#include "mem/ruby/network/garnet2.0/OutputUnit.hh" +#include "mem/ruby/network/garnet2.0/Router.hh" + +SwitchAllocator::SwitchAllocator(Router *router) + : Consumer(router) +{ + m_router = router; + m_num_vcs = m_router->get_num_vcs(); + m_vc_per_vnet = m_router->get_vc_per_vnet(); + + m_input_arbiter_activity = 0; + m_output_arbiter_activity = 0; +} + +void +SwitchAllocator::init() +{ + m_input_unit = m_router->get_inputUnit_ref(); + m_output_unit = m_router->get_outputUnit_ref(); + + m_num_inports = m_router->get_num_inports(); + m_num_outports = m_router->get_num_outports(); + m_round_robin_inport.resize(m_num_outports); + m_round_robin_invc.resize(m_num_inports); + m_port_requests.resize(m_num_outports); + m_vc_winners.resize(m_num_outports); + + for (int i = 0; i < m_num_inports; i++) { + m_round_robin_invc[i] = 0; + } + + for (int i = 0; i < m_num_outports; i++) { + m_port_requests[i].resize(m_num_inports); + m_vc_winners[i].resize(m_num_inports); + + m_round_robin_inport[i] = 0; + + for (int j = 0; j < m_num_inports; j++) { + m_port_requests[i][j] = false; // [outport][inport] + } + } +} + +/* + * The wakeup function of the SwitchAllocator performs a 2-stage + * seperable switch allocation. At the end of the 2nd stage, a free + * output VC is assigned to the winning flits of each output port. + * There is no separate VCAllocator stage like the one in garnet1.0. + * At the end of this function, the router is rescheduled to wakeup + * next cycle for peforming SA for any flits ready next cycle. + */ + +void +SwitchAllocator::wakeup() +{ + arbitrate_inports(); // First stage of allocation + arbitrate_outports(); // Second stage of allocation + + clear_request_vector(); + check_for_wakeup(); +} + +/* + * SA-I (or SA-i) loops through all input VCs at every input port, + * and selects one in a round robin manner. + * - For HEAD/HEAD_TAIL flits only selects an input VC whose output port + * has at least one free output VC. + * - For BODY/TAIL flits, only selects an input VC that has credits + * in its output VC. + * Places a request for the output port from this input VC. + */ + +void +SwitchAllocator::arbitrate_inports() +{ + // Select a VC from each input in a round robin manner + // Independent arbiter at each input port + for (int inport = 0; inport < m_num_inports; inport++) { + int invc = m_round_robin_invc[inport]; + + // Select next round robin vc candidate within valid vnet + int next_round_robin_invc = invc; + next_round_robin_invc++; + if (next_round_robin_invc >= m_num_vcs) + next_round_robin_invc = 0; + m_round_robin_invc[inport] = next_round_robin_invc; + + for (int invc_iter = 0; invc_iter < m_num_vcs; invc_iter++) { + + if (m_input_unit[inport]->need_stage(invc, SA_, + m_router->curCycle())) { + + // This flit is in SA stage + + int outport = m_input_unit[inport]->get_outport(invc); + int outvc = m_input_unit[inport]->get_outvc(invc); + + // check if the flit in this InputVC is allowed to be sent + // send_allowed conditions described in that function. + bool make_request = + send_allowed(inport, invc, outport, outvc); + + if (make_request) { + m_input_arbiter_activity++; + m_port_requests[outport][inport] = true; + m_vc_winners[outport][inport]= invc; + break; // got one vc winner for this port + } + } + + invc++; + if (invc >= m_num_vcs) + invc = 0; + } + } +} + +/* + * SA-II (or SA-o) loops through all output ports, + * and selects one input VC (that placed a request during SA-I) + * as the winner for this output port in a round robin manner. + * - For HEAD/HEAD_TAIL flits, performs simplified outvc allocation. + * (i.e., select a free VC from the output port). + * - For BODY/TAIL flits, decrement a credit in the output vc. + * The winning flit is read out from the input VC and sent to the + * CrossbarSwitch. + * An increment_credit signal is sent from the InputUnit + * to the upstream router. For HEAD_TAIL/TAIL flits, is_free_signal in the + * credit is set to true. + */ + +void +SwitchAllocator::arbitrate_outports() +{ + // Now there are a set of input vc requests for output vcs. + // Again do round robin arbitration on these requests + // Independent arbiter at each output port + for (int outport = 0; outport < m_num_outports; outport++) { + int inport = m_round_robin_inport[outport]; + m_round_robin_inport[outport]++; + + if (m_round_robin_inport[outport] >= m_num_inports) + m_round_robin_inport[outport] = 0; + + for (int inport_iter = 0; inport_iter < m_num_inports; + inport_iter++) { + + // inport has a request this cycle for outport + if (m_port_requests[outport][inport]) { + + // grant this outport to this inport + int invc = m_vc_winners[outport][inport]; + + int outvc = m_input_unit[inport]->get_outvc(invc); + if (outvc == -1) { + // VC Allocation - select any free VC from outport + outvc = vc_allocate(outport, inport, invc); + } + + // remove flit from Input VC + flit *t_flit = m_input_unit[inport]->getTopFlit(invc); + + DPRINTF(RubyNetwork, "SwitchAllocator at Router %d " + "granted outvc %d at outport %d " + "to invc %d at inport %d to flit %s at " + "time: %lld\n", + m_router->get_id(), outvc, + m_router->getPortDirectionName( + m_output_unit[outport]->get_direction()), + invc, + m_router->getPortDirectionName( + m_input_unit[inport]->get_direction()), + *t_flit, + m_router->curCycle()); + + + // Update outport field in the flit since this is + // used by CrossbarSwitch code to send it out of + // correct outport. + // Note: post route compute in InputUnit, + // outport is updated in VC, but not in flit + t_flit->set_outport(outport); + + // set outvc (i.e., invc for next hop) in flit + // (This was updated in VC by vc_allocate, but not in flit) + t_flit->set_vc(outvc); + + // decrement credit in outvc + m_output_unit[outport]->decrement_credit(outvc); + + // flit ready for Switch Traversal + t_flit->advance_stage(ST_, m_router->curCycle()); + m_router->grant_switch(inport, t_flit); + m_output_arbiter_activity++; + + if ((t_flit->get_type() == TAIL_) || + t_flit->get_type() == HEAD_TAIL_) { + + // This Input VC should now be empty + assert(!(m_input_unit[inport]->isReady(invc, + m_router->curCycle()))); + + // Free this VC + m_input_unit[inport]->set_vc_idle(invc, + m_router->curCycle()); + + // Send a credit back + // along with the information that this VC is now idle + m_input_unit[inport]->increment_credit(invc, true, + m_router->curCycle()); + } else { + // Send a credit back + // but do not indicate that the VC is idle + m_input_unit[inport]->increment_credit(invc, false, + m_router->curCycle()); + } + + // remove this request + m_port_requests[outport][inport] = false; + + break; // got a input winner for this outport + } + + inport++; + if (inport >= m_num_inports) + inport = 0; + } + } +} + +/* + * A flit can be sent only if + * (1) there is at least one free output VC at the + * output port (for HEAD/HEAD_TAIL), + * or + * (2) if there is at least one credit (i.e., buffer slot) + * within the VC for BODY/TAIL flits of multi-flit packets. + * and + * (3) pt-to-pt ordering is not violated in ordered vnets, i.e., + * there should be no other flit in this input port + * within an ordered vnet + * that arrived before this flit and is requesting the same output port. + */ + +bool +SwitchAllocator::send_allowed(int inport, int invc, int outport, int outvc) +{ + // Check if outvc needed + // Check if credit needed (for multi-flit packet) + // Check if ordering violated (in ordered vnet) + + int vnet = get_vnet(invc); + bool has_outvc = (outvc != -1); + bool has_credit = false; + + if (!has_outvc) { + + // needs outvc + // this is only true for HEAD and HEAD_TAIL flits. + + if (m_output_unit[outport]->has_free_vc(vnet)) { + + has_outvc = true; + + // each VC has at least one buffer, + // so no need for additional credit check + has_credit = true; + } + } else { + has_credit = m_output_unit[outport]->has_credit(outvc); + } + + // cannot send if no outvc or no credit. + if (!has_outvc || !has_credit) + return false; + + + // protocol ordering check + if ((m_router->get_net_ptr())->isVNetOrdered(vnet)) { + + // enqueue time of this flit + Cycles t_enqueue_time = m_input_unit[inport]->get_enqueue_time(invc); + + // check if any other flit is ready for SA and for same output port + // and was enqueued before this flit + int vc_base = vnet*m_vc_per_vnet; + for (int vc_offset = 0; vc_offset < m_vc_per_vnet; vc_offset++) { + int temp_vc = vc_base + vc_offset; + if (m_input_unit[inport]->need_stage(temp_vc, SA_, + m_router->curCycle()) && + (m_input_unit[inport]->get_outport(temp_vc) == outport) && + (m_input_unit[inport]->get_enqueue_time(temp_vc) < + t_enqueue_time)) { + return false; + } + } + } + + return true; +} + +// Assign a free VC to the winner of the output port. +int +SwitchAllocator::vc_allocate(int outport, int inport, int invc) +{ + // Select a free VC from the output port + int outvc = m_output_unit[outport]->select_free_vc(get_vnet(invc)); + + // has to get a valid VC since it checked before performing SA + assert(outvc != -1); + m_input_unit[inport]->grant_outvc(invc, outvc); + return outvc; +} + +// Wakeup the router next cycle to perform SA again +// if there are flits ready. +void +SwitchAllocator::check_for_wakeup() +{ + Cycles nextCycle = m_router->curCycle() + Cycles(1); + + for (int i = 0; i < m_num_inports; i++) { + for (int j = 0; j < m_num_vcs; j++) { + if (m_input_unit[i]->need_stage(j, SA_, nextCycle)) { + m_router->schedule_wakeup(Cycles(1)); + return; + } + } + } +} + +int +SwitchAllocator::get_vnet(int invc) +{ + int vnet = invc/m_vc_per_vnet; + assert(vnet < m_router->get_num_vnets()); + return vnet; +} + + +// Clear the request vector within the allocator at end of SA-II. +// Was populated by SA-I. +void +SwitchAllocator::clear_request_vector() +{ + for (int i = 0; i < m_num_outports; i++) { + for (int j = 0; j < m_num_inports; j++) { + m_port_requests[i][j] = false; + } + } +} -- cgit v1.2.3