diff options
author | Gabor Dozsa <gabor.dozsa@arm.com> | 2016-01-07 16:33:47 -0600 |
---|---|---|
committer | Gabor Dozsa <gabor.dozsa@arm.com> | 2016-01-07 16:33:47 -0600 |
commit | 64ca31976fe91eedd91b2d703c6e3e62328f8e1d (patch) | |
tree | aaadab6dfd5d45a7d8794d2033d8ec1b9f845b37 | |
parent | 5dec4e07b89786aa67ce64aadeeb14c81b3977b3 (diff) | |
download | gem5-64ca31976fe91eedd91b2d703c6e3e62328f8e1d.tar.xz |
config: Updates for distributed gem5 simulations
-rw-r--r-- | configs/common/FSConfig.py | 36 | ||||
-rw-r--r-- | configs/common/Options.py | 31 | ||||
-rw-r--r-- | configs/example/fs.py | 12 | ||||
-rwxr-xr-x | util/dist/gem5-dist.sh | 385 | ||||
-rw-r--r-- | util/dist/test/simple_bootscript.rcS (renamed from util/multi/bootscript.rcS) | 73 | ||||
-rw-r--r-- | util/dist/test/test-2nodes-AArch64.sh | 82 | ||||
-rwxr-xr-x | util/multi/gem5-multi.sh | 275 |
7 files changed, 575 insertions, 319 deletions
diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index 0f63ec9e7..004d06d55 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -654,3 +654,39 @@ def makeDualRoot(full_system, testSystem, driveSystem, dumpfile): self.etherlink.dump = Parent.etherdump return self + + +def makeDistRoot(testSystem, + rank, + size, + server_name, + server_port, + sync_repeat, + sync_start, + linkspeed, + linkdelay, + dumpfile): + self = Root(full_system = True) + self.testsys = testSystem + + self.etherlink = DistEtherLink(speed = linkspeed, + delay = linkdelay, + dist_rank = rank, + dist_size = size, + server_name = server_name, + server_port = server_port, + sync_start = sync_start, + sync_repeat = sync_repeat) + + if hasattr(testSystem, 'realview'): + self.etherlink.int0 = Parent.testsys.realview.ethernet.interface + elif hasattr(testSystem, 'tsunami'): + self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface + else: + fatal("Don't know how to connect DistEtherLink to this system") + + if dumpfile: + self.etherdump = EtherDump(file=dumpfile) + self.etherlink.dump = Parent.etherdump + + return self diff --git a/configs/common/Options.py b/configs/common/Options.py index 45be8e2f8..d5671f311 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -297,10 +297,41 @@ def addFSOptions(parser): # Benchmark options parser.add_option("--dual", action="store_true", help="Simulate two systems attached with an ethernet link") + parser.add_option("--dist", action="store_true", + help="Parallel distributed gem5 simulation.") + parser.add_option("--is-switch", action="store_true", + help="Select the network switch simulator process for a"\ + "distributed gem5 run") + parser.add_option("--dist-rank", default=0, action="store", type="int", + help="Rank of this system within the dist gem5 run.") + parser.add_option("--dist-size", default=0, action="store", type="int", + help="Number of gem5 processes within the dist gem5 run.") + parser.add_option("--dist-server-name", + default="127.0.0.1", + action="store", type="string", + help="Name of the message server host\nDEFAULT: localhost") + parser.add_option("--dist-server-port", + default=2200, + action="store", type="int", + help="Message server listen port\nDEFAULT: 2200") + parser.add_option("--dist-sync-repeat", + default="0us", + action="store", type="string", + help="Repeat interval for synchronisation barriers among dist-gem5 processes\nDEFAULT: --ethernet-linkdelay") + parser.add_option("--dist-sync-start", + default="5200000000000t", + action="store", type="string", + help="Time to schedule the first dist synchronisation barrier\nDEFAULT:5200000000000t") parser.add_option("-b", "--benchmark", action="store", type="string", dest="benchmark", help="Specify the benchmark to run. Available benchmarks: %s"\ % DefinedBenchmarks) + parser.add_option("--ethernet-linkspeed", default="10Gbps", + action="store", type="string", + help="Link speed in bps\nDEFAULT: 10Gbps") + parser.add_option("--ethernet-linkdelay", default="10us", + action="store", type="string", + help="Link delay in seconds\nDEFAULT: 10us") # Metafile options parser.add_option("--etherdump", action="store", type="string", dest="etherdump", diff --git a/configs/example/fs.py b/configs/example/fs.py index dddb2ea3c..6ee969a6e 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -340,6 +340,18 @@ test_sys = build_test_system(np) if len(bm) == 2: drive_sys = build_drive_system(np) root = makeDualRoot(True, test_sys, drive_sys, options.etherdump) +elif len(bm) == 1 and options.dist: + # This system is part of a dist-gem5 simulation + root = makeDistRoot(test_sys, + options.dist_rank, + options.dist_size, + options.dist_server_name, + options.dist_server_port, + options.dist_sync_repeat, + options.dist_sync_start, + options.ethernet_linkspeed, + options.ethernet_linkdelay, + options.etherdump); elif len(bm) == 1: root = Root(full_system=True, system=test_sys) else: diff --git a/util/dist/gem5-dist.sh b/util/dist/gem5-dist.sh new file mode 100755 index 000000000..8fa799acc --- /dev/null +++ b/util/dist/gem5-dist.sh @@ -0,0 +1,385 @@ +#! /bin/bash + +# +# Copyright (c) 2015 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2015 University of Illinois Urbana Champaign +# All rights reserved +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gabor Dozsa +# Mohammad Alian + + +# This is a wrapper script to run a dist gem5 simulations. +# See the usage_func() below for hints on how to use it. Also, +# there are some examples in the util/dist directory (e.g. +# see util/dist/test-2nodes-AArch64.sh) +# +# +# Allocated hosts/cores are assumed to be listed in the LSB_MCPU_HOSTS +# environment variable (which is what LSF does by default). +# E.g. LSB_MCPU_HOSTS=\"hname1 2 hname2 4\" means we have altogether 6 slots +# allocated to launch the gem5 processes, 2 of them are on host hname1 +# and 4 of them are on host hname2. +# If LSB_MCPU_HOSTS environment variable is not defined then we launch all +# processes on the localhost. +# +# Each gem5 process are passed in a unique rank ID [0..N-1] via the kernel +# boot params. The total number of gem5 processes is also passed in. +# These values can be used in the boot script to configure the MAC/IP +# addresses - among other things (see util/dist/bootscript.rcS). +# +# Each gem5 process will create an m5out.$GEM5_RANK directory for +# the usual output files. Furthermore, there will be a separate log file +# for each ssh session (we use ssh to start gem5 processes) and one for +# the server. These are called log.$GEM5_RANK and log.switch. +# + + +# print help +usage_func () +{ + echo "Usage:$0 [-debug] [-n nnodes] [-r rundir] [-c ckptdir] [-p port] [-sw switch] [--sw-args sw_args] [-fs fullsystem] [--fs-args fs_args] [--cf-args conf_args] [--m5-args m5_args] -x gem5_exe " + echo " -debug : debug mode (start gem5 in gdb)" + echo " nnodes : number of gem5 processes" + echo " rundir : run simulation under this path. If not specified, current dir will be used" + echo " ckptdir : dump/restore checkpoints to/from this path. If not specified, current dir will be used" + + echo " fullsystem: fullsystem config file" + echo " fs_args : fullsystem config specific argument list: arg1 arg2 ..." + echo " port : switch listen port" + echo " switch : switch config file" + echo " sw_args : switch config specific argument list: arg1 arg2 ..." + echo " conf_args : common (for both fullsystem and switch) config argument list: arg1 arg2 ..." + echo " gem5_exe : gem5 executable (full path required)" + echo " m5_args : common m5 argument list (e.g. debug flags): arg1 arg2 ..." + echo "Note: if no LSF slots allocation is found all proceses are launched on the localhost." +} + +# Process (optional) command line options +FS_ARGS=" " +SW_ARGS=" " +CF_ARGS=" " +M5_ARGS=" " +while (($# > 0)) +do + case "x$1" in + x-debug) + GEM5_DEBUG="-debug" + shift 1 + ;; + x-n|x-nodes) + NNODES=$2 + shift 2 + ;; + x-r|x-rundir) + RUN_DIR=$2 + shift 2 + ;; + x-c|x-ckptdir) + CKPT_DIR=$2 + shift 2 + ;; + x-p|x-port) + SW_PORT=$2 + shift 2 + ;; + x-s|x-switch) + SW_CONFIG=$2 + shift 2 + ;; + x--sw-args) + CUR_ARGS="SW_ARGS" + shift 1 + ;; + x-f|x-fullsystem) + FS_CONFIG=$2 + shift 2 + ;; + x--fs-args) + CUR_ARGS="FS_ARGS" + shift 1 + ;; + x--cf-args) + CUR_ARGS="CF_ARGS" + shift 1 + ;; + x--m5-args) + CUR_ARGS="M5_ARGS" + shift 1 + ;; + x-x) + GEM5_EXE=$2 + shift 2 + ;; + x-*) + [ -n "$CUR_ARGS" ] || { echo "Unexpected arg: $1"; usage_func; exit -1; } + case "x$2" in + x-*|x) + eval $CUR_ARGS=\"${!CUR_ARGS} $1\" + shift 1 + ;; + *) + eval $CUR_ARGS=\"${!CUR_ARGS} $1 $2\" + shift 2 + ;; + esac + ;; + *) + echo "Unknown arg: $1" + usage_func + exit 1 + ;; + esac +done + +# Default values to use (in case they are not defined as command line options) +DEFAULT_FS_CONFIG=$M5_PATH/configs/example/fs.py +DEFAULT_SW_CONFIG=$M5_PATH/configs/example/sw.py +DEFAULT_SW_PORT=2200 + +[ -z "$FS_CONFIG" ] && FS_CONFIG=$DEFAULT_FS_CONFIG +[ -z "$SW_CONFIG" ] && SW_CONFIG=$DEFAULT_SW_CONFIG +[ -z "$SW_PORT" ] && SW_PORT=$DEFAULT_SW_PORT +[ -z "$NNODES" ] && NNODES=2 +[ -z "$RUN_DIR" ] && RUN_DIR=$(pwd) +[ -z "$CKPT_DIR" ] && CKPT_DIR=$(pwd) + +# Check if all the executables we need exist +[ -f "$FS_CONFIG" ] || { echo "FS config ${FS_CONFIG} not found"; exit 1; } +[ -f "$SW_CONFIG" ] || { echo "Switch config ${SW_CONFIG} not found"; exit 1; } +[ -x "$GEM5_EXE" ] || { echo "Executable ${GEM5_EXE} not found"; exit 1; } +# make sure that RUN_DIR exists +mkdir -p $RUN_DIR > /dev/null 2>&1 + +declare -a SSH_PIDS +declare -a HOSTS +declare -a NCORES + +# Find out which cluster hosts/slots are allocated or +# use localhost if there is no LSF allocation. +# We assume that allocated slots are listed in the LSB_MCPU_HOSTS +# environment variable in the form: +# host1 nslots1 host2 nslots2 ... +# (This is what LSF does by default.) +NH=0 +[ "x$LSB_MCPU_HOSTS" != "x" ] || LSB_MCPU_HOSTS="127.0.0.1 $NNODES" +host="" +for hc in $LSB_MCPU_HOSTS +do + if [ "x$host" == "x" ] + then + host=$hc + HOSTS+=($hc) + else + NCORES+=($hc) + ((NH+=hc)) + host="" + fi +done +((NNODES==NH)) || { echo "(E) Number of cluster slots ($NH) and gem5 instances ($N) differ"; exit -1; } + +# function to clean up and abort if something goes wrong +abort_func () +{ + echo + echo "KILLED $(date)" + # Try to Kill the server first. That should trigger an exit for all connected + # gem5 processes. + [ "x$SW_PID" != "x" ] && kill $SW_PID 2>/dev/null + sleep 20 + # (try to) kill gem5 processes - just in case something went wrong with the + # server triggered exit + bname=$(basename $GEM5_EXE) + killall -q -s SIGKILL $bname + for h in ${HOSTS[@]} + do + ssh $h killall -q -s SIGKILL $bname + done + sleep 5 + # kill the watchdog + [ "x$WATCHDOG_PID" != "x" ] && kill $WATCHDOG_PID 2>/dev/null + exit -1 +} + +# We need a watchdog to trigger full clean up if a gem5 process dies +watchdog_func () +{ + while true + do + sleep 30 + ((NDEAD=0)) + for p in ${SSH_PIDS[*]} + do + kill -0 $p 2>/dev/null || ((NDEAD+=1)) + done + kill -0 $SW_PID || ((NDEAD+=1)) + if ((NDEAD>0)) + then + # we may be in the middle of an orderly termination, + # give it some time to complete before reporting abort + sleep 60 + echo -n "(I) (some) gem5 process(es) exited" + abort_func + fi + done +} + +# This function launches the gem5 processes. The only purpose is to enable +# launching gem5 processes under gdb control for debugging +start_func () +{ + local N=$1 + local HOST=$2 + local ENV_ARGS=$3 + shift 3 + if [ "x$GEM5_DEBUG" != "x" ] + then + echo "DEBUG starting terminal..." + MY_ARGS="$@" + xterm -e "gdb --args $MY_ARGS" & + else + ssh $HOST $ENV_ARGS "$@" &> $RUN_DIR/log.$N & + fi +} + +# block till the gem5 process starts +connected () +{ + FILE=$1 + STRING=$2 + echo -n "waiting for $3 to start " + while : ; + do + kill -0 $4 || { echo "Failed to start $3"; exit -1; } + [[ -f "$FILE" ]] && \ + grep -q "$STRING" "$FILE" && \ + echo -e "\nnode #$3 started" && \ + break + + sleep 2 + echo -n "." + done +} + +# Trigger full clean up in case we are being killed by external signal +trap 'abort_func' INT TERM + +# env args to be passed explicitly to gem5 processes started via ssh +ENV_ARGS="LD_LIBRARY_PATH=$LD_LIBRARY_PATH M5_PATH=$M5_PATH" + +#cleanup log files before starting gem5 processes +rm $RUN_DIR/log.switch > /dev/null 2>&1 + +# make sure that CKPT_DIR exists +mkdir -p $CKPT_DIR/m5out.switch > /dev/null 2>&1 +# launch switch gem5 +SW_HOST=${HOSTS[0]} +echo "launch switch gem5 process on $SW_HOST ..." +start_func "switch" $SW_HOST "$ENV_ARGS" $GEM5_EXE -d $RUN_DIR/m5out.switch \ + $M5_ARGS \ + $SW_CONFIG \ + $SW_ARGS \ + $CF_ARGS \ + --checkpoint-dir=$CKPT_DIR/m5out.switch \ + --is-switch \ + --dist-size=$NNODES \ + --dist-server-port=$SW_PORT +SW_PID=$! + +# block here till switch process starts +connected $RUN_DIR/log.switch "tcp_iface listening on port" "switch" $SW_PID +LINE=$(grep -r "tcp_iface listening on port" $RUN_DIR/log.switch) + +IFS=' ' read -ra ADDR <<< "$LINE" +# actual port that switch is listening on may be different +# from what we specified if the port was busy +SW_PORT=${ADDR[5]} + +# Now launch all the gem5 processes with ssh. +echo "START $(date)" +n=0 +for ((i=0; i < ${#HOSTS[@]}; i++)) +do + h=${HOSTS[$i]} + for ((j=0; j < ${NCORES[i]}; j++)) + do + #cleanup log files before starting gem5 processes + rm $RUN_DIR/log.$n > /dev/null 2>&1 + # make sure that CKPT_DIR exists + mkdir -p $CKPT_DIR/m5out.$n > /dev/null 2>&1 + echo "starting gem5 on $h ..." + start_func $n $h "$ENV_ARGS" $GEM5_EXE -d $RUN_DIR/m5out.$n \ + $M5_ARGS \ + $FS_CONFIG \ + $FS_ARGS \ + $CF_ARGS \ + --checkpoint-dir=$CKPT_DIR/m5out.$n \ + --dist \ + --dist-rank=$n \ + --dist-size=$NNODES \ + --dist-server-name=${HOSTS[0]} \ + --dist-server-port=$SW_PORT + SSH_PIDS[$n]=$! + ((n+=1)) + done +done + +# Wait here if it is a debug session +[ "x$GEM5_DEBUG" == "x" ] || { echo "DEBUG session"; wait $SW_PID; exit -1; } + +# start watchdog to trigger complete abort (after a grace period) if any +# gem5 process dies +watchdog_func & +WATCHDOG_PID=$! + +# wait for exit statuses +((NFAIL=0)) +for p in ${SSH_PIDS[*]} +do + wait $p || ((NFAIL+=1)) +done +wait $SW_PID || ((NFAIL+=1)) + +# all done, let's terminate the watchdog +kill $WATCHDOG_PID 2>/dev/null + +if ((NFAIL==0)) +then + echo "EXIT $(date)" +else + echo "ABORT $(date)" +fi diff --git a/util/multi/bootscript.rcS b/util/dist/test/simple_bootscript.rcS index 95736f4b7..7c9b75538 100644 --- a/util/multi/bootscript.rcS +++ b/util/dist/test/simple_bootscript.rcS @@ -40,33 +40,28 @@ # Authors: Gabor Dozsa # # -# This is an example boot script to use for muti gem5 runs. The important -# task here is to extract the rank and size information from the kernel -# boot args and use those to configure MAC/IP addresses and hostname. -# Then we can kick off our (parallel) workload ... +# This is an example boot script to use for dist-gem5 runs. The important +# task here is to extract the rank and size information through the m5 +# initparam utility and use those to configure MAC/IP addresses and hostname. # -# You are expected to costumize this scipt for your needs (e.g. change +# You are expected to customize this scipt for your needs (e.g. change # the command at the end of the scipt to run your tests/workloads. source /root/.bashrc echo "bootscript.rcS is running" -m='GEM5\_RANK=([0-9]+) GEM5\_SIZE=([0-9]+)' -if [[ $(cat /proc/cmdline) =~ $m ]] -then - MY_RANK=${BASH_REMATCH[1]} - MY_SIZE=${BASH_REMATCH[2]} -else - echo "(E) GEM5_RANK/GEM5_SIZE was not defined in bootargs, exiting ..." - /sbin/m5 abort -fi +# Retrieve dist-gem5 rank and size parameters using the 'm5' utility +MY_RANK=$(/sbin/m5 initparam dist-rank) +[ $? = 0 ] || { echo "m5 initparam failed"; exit -1; } +MY_SIZE=$(/sbin/m5 initparam dist-size) +[ $? = 0 ] || { echo "m5 initparam failed"; exit -1; } /bin/hostname node${MY_RANK} # Keep MAC address assignment simple for now ... -(($MY_RANK>97)) && { echo "(E) Rank must be less than 98"; /sbin/m5 abort; } -((MY_ADDR=MY_RANK+2)) -if (($MY_ADDR<10)) +(($MY_RANK > 97)) && { echo "(E) Rank must be less than 98"; /sbin/m5 abort; } +((MY_ADDR = MY_RANK + 2)) +if (($MY_ADDR < 10)) then MY_ADDR_PADDED=0${MY_ADDR} else @@ -78,45 +73,35 @@ fi /sbin/ifconfig -a -# Prepare host lists for mpirun -MY_MPI_HOSTS="192.168.0.2" -for ((i=1; i<MY_SIZE; i++)) -do - MY_MPI_HOSTS+=",192.168.0.$((i+2))" -done +echo "Hello from $MY_RANK of $MY_SIZE" -# Check that Ethernet links work, then take a checkpoint -if [ "$MY_RANK" == "0" ] +# Now that our network interface is configured we can use the usual commands to +# contact the other systems, e.g. let's try to ping a "neighbour" system +if ((MY_RANK < MY_SIZE - 1)) then - OLDIFS=$IFS - IFS="," - for i in $MY_MPI_HOSTS - do - ping -c 1 $i || { echo "ping $i failed, exiting ..."; exit -1; } - ssh $i hostname || { echo "ssh $i failed, exiting ..."; exit -1; } - done - IFS=$OLDIFS - /sbin/m5 checkpoint + ping -c 1 192.168.0.$((MY_ADDR + 1)) +else + ping -c 1 192.168.0.2 fi -# -------------------------------------------- -# ------ Start your tests below ... --------- -# -------------------------------------------- if [ "$MY_RANK" == "0" ] then - echo "MPI test" - #mpirun -H 192.168.0.3,192.168.0.2 hostname - cd /benchmarks - mpirun -H $MY_MPI_HOSTS lulesh/lulesh2.0-mpi -s 5 + # Trigger an immediate checkpoint at the next sync (by passing a non-zero + # delay param to m5 ckpt) + /sbin/m5 checkpoint 1 + echo "A real multi node workload might start here ..." + # Trigger an immediate exit at the next sync (by passing a non-zero delay + # param to m5 exit) + /sbin/m5 exit 1 else # This is to avoid other (rank!=0) gem5 processes exiting # before the test (started by rank 0) completes. When rank 0 completes the - # test it will exit and that will trigger a notification to all the peer - # gem5 peocesses to stop the simulation. + # test it will exit and that will trigger a notification to all the peer + # gem5 processes to stop the simulation. echo "sleep forever..." while /bin/true do - sleep 5 + sleep 5 done fi diff --git a/util/dist/test/test-2nodes-AArch64.sh b/util/dist/test/test-2nodes-AArch64.sh new file mode 100644 index 000000000..ebdf07110 --- /dev/null +++ b/util/dist/test/test-2nodes-AArch64.sh @@ -0,0 +1,82 @@ +#! /bin/bash + +# +# Copyright (c) 2015 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gabor Dozsa +# +# +# This is an example script to start a dist gem5 simulations using +# two AArch64 systems. It is also uses the example +# dist gem5 bootscript util/dist/test/simple_bootscript.rcS that will +# run the linux ping command to check if we can see the peer system +# connected via the simulated Ethernet link. + +GEM5_DIR=$(pwd)/$(dirname $0)/../../.. + +IMG=$M5_PATH/disks/aarch64-ubuntu-trusty-headless.img +VMLINUX=$M5_PATH/binaries/vmlinux.aarch64.20140821 +DTB=$M5_PATH/binaries/vexpress.aarch64.20140821.dtb + +FS_CONFIG=$GEM5_DIR/configs/example/fs.py +SW_CONFIG=$GEM5_DIR/configs/example/sw.py +GEM5_EXE=$GEM5_DIR/build/ARM/gem5.opt + +BOOT_SCRIPT=$GEM5_DIR/util/dist/test/simple_bootscript.rcS +GEM5_DIST_SH=$GEM5_DIR/util/dist/gem5-dist.sh + +DEBUG_FLAGS="--debug-flags=DistEthernet" +#CHKPT_RESTORE="-r1" + +NNODES=2 + +$GEM5_DIST_SH -n $NNODES \ + -x $GEM5_EXE \ + -s $SW_CONFIG \ + -f $FS_CONFIG \ + --m5-args \ + $DEBUG_FLAGS \ + --fs-args \ + --cpu-type=atomic \ + --num-cpus=1 \ + --machine-type=VExpress_EMM64 \ + --disk-image=$IMG \ + --kernel=$VMLINUX \ + --dtb-filename=$DTB \ + --script=$BOOT_SCRIPT \ + --cf-args \ + $CHKPT_RESTORE + diff --git a/util/multi/gem5-multi.sh b/util/multi/gem5-multi.sh deleted file mode 100755 index 4b4937c90..000000000 --- a/util/multi/gem5-multi.sh +++ /dev/null @@ -1,275 +0,0 @@ -#! /bin/bash - -# -# Copyright (c) 2015 ARM Limited -# All rights reserved -# -# The license below extends only to copyright in the software and shall -# not be construed as granting a license to any other intellectual -# property including but not limited to intellectual property relating -# to a hardware implementation of the functionality of the software -# licensed hereunder. You may use the software subject to the license -# terms below provided that you ensure that this notice is replicated -# unmodified and in its entirety in all distributions of the software, -# modified or unmodified, in source code or in binary form. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Authors: Gabor Dozsa - - -# This is a wrapper script to run a multi gem5 simulations. -# See the usage_func() below for hints on how to use it. Also, -# there are some examples in the util/multi directory (e.g. -# see util/multi/test-2nodes-AArch64.sh) -# -# -# Allocated hosts/cores are assumed to be listed in the LSB_MCPU_HOSTS -# environment variable (which is what LSF does by default). -# E.g. LSB_MCPU_HOSTS=\"hname1 2 hname2 4\" means we have altogether 6 slots -# allocated to launch the gem5 processes, 2 of them are on host hname1 -# and 4 of them are on host hname2. -# If LSB_MCPU_HOSTS environment variable is not defined then we launch all -# processes on the localhost. -# -# Each gem5 process are passed in a unique rank ID [0..N-1] via the kernel -# boot params. The total number of gem5 processes is also passed in. -# These values can be used in the boot script to configure the MAC/IP -# addresses - among other things (see util/multi/bootscript.rcS). -# -# Each gem5 process will create an m5out.$GEM5_RANK directory for -# the usual output files. Furthermore, there will be a separate log file -# for each ssh session (we use ssh to start gem5 processes) and one for -# the server. These are called log.$GEM5_RANK and log.server. -# - - -# print help -usage_func () -{ - echo "Usage:$0 [-debug] [-n nnodes] [-s server] [-p port] gem5_exe gem5_args" - echo " -debug : debug mode (start gem5 in gdb)" - echo " nnodes : number of gem5 processes" - echo " server : message server executable" - echo " port : message server listen port" - echo " gem5_exe : gem5 executable (full path required)" - echo " gem5_args: usual gem5 arguments ( m5 options, config script options)" - echo "Note: if no LSF slots allocation is found all proceses are launched on the localhost." -} - - -# Process (optional) command line options - -while true -do - case "x$1" in - x-n|x-nodes) - NNODES=$2 - shift 2 - ;; - x-s|x-server) - TCP_SERVER=$2 - shift 2 - ;; - x-p|x-port) - SERVER_PORT=$2 - shift 2 - ;; - x-debug) - GEM5_DEBUG="-debug" - shift 1 - ;; - *) - break - ;; - esac -done - -# The remaining command line args must be the usual gem5 command -(($# < 2)) && { usage_func; exit -1; } -GEM5_EXE=$1 -shift -GEM5_ARGS="$*" - -# Default values to use (in case they are not defined as command line options) -DEFAULT_TCP_SERVER=$(dirname $0)/../../util/multi/tcp_server -DEFAULT_SERVER_PORT=2200 - -[ -z "$TCP_SERVER" ] && TCP_SERVER=$DEFAULT_TCP_SERVER -[ -z "$SERVER_PORT" ] && SERVER_PORT=$DEFAULT_SERVER_PORT -[ -z "$NNODES" ] && NNODES=2 - - -# Check if all the executables we need exist -[ -x "$TCP_SERVER" ] || { echo "Executable ${TCP_SERVER} not found"; exit 1; } -[ -x "$GEM5_EXE" ] || { echo "Executable ${GEM5_EXE} not found"; exit 1; } - - -declare -a SSH_PIDS -declare -a HOSTS -declare -a NCORES - -# Find out which cluster hosts/slots are allocated or -# use localhost if there is no LSF allocation. -# We assume that allocated slots are listed in the LSB_MCPU_HOSTS -# environment variable in the form: -# host1 nslots1 host2 nslots2 ... -# (This is what LSF does by default.) -NH=0 -[ "x$LSB_MCPU_HOSTS" != "x" ] || LSB_MCPU_HOSTS="localhost $NNODES" -host="" -for hc in $LSB_MCPU_HOSTS -do - if [ "x$host" == "x" ] - then - host=$hc - HOSTS+=($hc) - else - NCORES+=($hc) - ((NH+=hc)) - host="" - fi -done -((NNODES==NH)) || { echo "(E) Number of cluster slots ($NH) and gem5 instances ($N) differ"; exit -1; } -#echo "hosts: ${HOSTS[@]}" -#echo "hosts: ${NCORES[@]}" -#echo ${#HOSTS[@]} - - -# function to clean up and abort if something goes wrong -abort_func () -{ - echo - echo "KILLED $(date)" - # (try to) kill all gem5 processes on all hosts - bname=$(basename $GEM5_EXE) - killall -q $bname - for h in ${HOSTS[@]} - do - ssh $h killall -q $bname - done - sleep 3 - # kill the message server and the watchdog - [ "x$SERVER_PID" != "x" ] && kill $SERVER_PID 2>/dev/null - [ "x$WATCHDOG_PID" != "x" ] && kill $WATCHDOG_PID 2>/dev/null - exit -1 -} - - -# We need a watchdog to trigger full clean up if a gem5 process dies -watchdog_func () -{ - while true - do - sleep 30 - ((NDEAD=0)) - for p in ${SSH_PIDS[*]} - do - kill -0 $p 2>/dev/null || ((NDEAD+=1)) - done - kill -0 $SERVER_PID || ((NDEAD+=1)) - if ((NDEAD>0)) - then - # we may be in the middle of an orderly termination, - # give it some time to complete before reporting abort - sleep 60 - echo -n "(I) (some) gem5 process(es) exited" - abort_func - fi - done -} - -# This function launches the gem5 processes. We use it only to allow launching -# gem5 processes under gdb control (in the foreground) for debugging -start_func () -{ - local N=$1 - local HOST=$2 - local ENV_ARGS=$3 - shift 3 - if [ "x$GEM5_DEBUG" != "x" ] - then - gdb --args "$@" - else - ssh $HOST $ENV_ARGS "$@" &>log.$N & - fi -} - - -# Trigger full clean up in case we are being killed by external signal -trap 'abort_func' INT TERM - -# env args to be passed explicitly to gem5 processes started via ssh -ENV_ARGS="LD_LIBRARY_PATH=$LD_LIBRARY_PATH M5_PATH=$M5_PATH" - -# launch the mesage server and check if it has started okay -$TCP_SERVER $GEM5_DEBUG $NNODES $SERVER_PORT &>log.server & -SERVER_PID=$! -sleep 2 -kill -0 $SERVER_PID || { echo "Failed to start message server"; exit -1; } - -# Now launch all the gem5 processes with ssh. -echo "START $(date)" -n=0 -for ((i=0; i < ${#HOSTS[@]}; i++)) -do - h=${HOSTS[$i]} - for ((j=0; j < ${NCORES[i]}; j++)) - do - echo "starting gem5 on $h ..." - start_func $n $h "$ENV_ARGS" $GEM5_EXE -d $(pwd)/m5out.$n $GEM5_ARGS \ - --multi \ - --multi-rank=$n \ - --multi-server-name=${HOSTS[0]} \ - --multi-server-port=$SERVER_PORT \ - --testsys-toplevel-LinuxArmSystem.boot_osflags="\"GEM5_RANK=$n GEM5_SIZE=$NNODES\"" - SSH_PIDS[$n]=$! - ((n+=1)) - done -done - -[ "x$GEM5_DEBUG" == "x" ] || { kill $SERVER_PID; echo "DEBUG exit"; exit -1; } - -# start watchdog to trigger complete abort (after a grace period) if any -# gem5 process dies -watchdog_func & -WATCHDOG_PID=$! - -# wait for exit statuses -((NFAIL=0)) -for p in ${SSH_PIDS[*]} -do - wait $p || ((NFAIL+=1)) -done -wait $SERVER_PID || ((NFAIL+=1)) - -# all done, let's terminate the watchdog -kill $WATCHDOG_PID 2>/dev/null - -if ((NFAIL==0)) -then - echo "EXIT $(date)" -else - echo "ABORT $(date)" -fi |