summaryrefslogtreecommitdiff
path: root/src/dev/net/dist_iface.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/dev/net/dist_iface.cc')
-rw-r--r--src/dev/net/dist_iface.cc145
1 files changed, 120 insertions, 25 deletions
diff --git a/src/dev/net/dist_iface.cc b/src/dev/net/dist_iface.cc
index 26fe45317..79408c304 100644
--- a/src/dev/net/dist_iface.cc
+++ b/src/dev/net/dist_iface.cc
@@ -48,24 +48,28 @@
#include "base/random.hh"
#include "base/trace.hh"
+#include "cpu/thread_context.hh"
#include "debug/DistEthernet.hh"
#include "debug/DistEthernetPkt.hh"
#include "dev/net/etherpkt.hh"
#include "sim/sim_exit.hh"
#include "sim/sim_object.hh"
+#include "sim/system.hh"
using namespace std;
DistIface::Sync *DistIface::sync = nullptr;
+System *DistIface::sys = nullptr;
DistIface::SyncEvent *DistIface::syncEvent = nullptr;
unsigned DistIface::distIfaceNum = 0;
unsigned DistIface::recvThreadsNum = 0;
DistIface *DistIface::master = nullptr;
+bool DistIface::isSwitch = false;
void
DistIface::Sync::init(Tick start_tick, Tick repeat_tick)
{
- if (start_tick < firstAt) {
- firstAt = start_tick;
+ if (start_tick < nextAt) {
+ nextAt = start_tick;
inform("Next dist synchronisation tick is changed to %lu.\n", nextAt);
}
@@ -85,10 +89,11 @@ DistIface::SyncSwitch::SyncSwitch(int num_nodes)
waitNum = num_nodes;
numExitReq = 0;
numCkptReq = 0;
+ numStopSyncReq = 0;
doExit = false;
doCkpt = false;
- firstAt = std::numeric_limits<Tick>::max();
- nextAt = 0;
+ doStopSync = false;
+ nextAt = std::numeric_limits<Tick>::max();
nextRepeat = std::numeric_limits<Tick>::max();
}
@@ -97,10 +102,11 @@ DistIface::SyncNode::SyncNode()
waitNum = 0;
needExit = ReqType::none;
needCkpt = ReqType::none;
+ needStopSync = ReqType::none;
doExit = false;
doCkpt = false;
- firstAt = std::numeric_limits<Tick>::max();
- nextAt = 0;
+ doStopSync = false;
+ nextAt = std::numeric_limits<Tick>::max();
nextRepeat = std::numeric_limits<Tick>::max();
}
@@ -117,11 +123,14 @@ DistIface::SyncNode::run(bool same_tick)
header.sendTick = curTick();
header.syncRepeat = nextRepeat;
header.needCkpt = needCkpt;
+ header.needStopSync = needStopSync;
if (needCkpt != ReqType::none)
needCkpt = ReqType::pending;
header.needExit = needExit;
if (needExit != ReqType::none)
needExit = ReqType::pending;
+ if (needStopSync != ReqType::none)
+ needStopSync = ReqType::pending;
DistIface::master->sendCmd(header);
// now wait until all receiver threads complete the synchronisation
auto lf = [this]{ return waitNum == 0; };
@@ -161,6 +170,13 @@ DistIface::SyncSwitch::run(bool same_tick)
} else {
header.needExit = ReqType::none;
}
+ if (doStopSync || numStopSyncReq == numNodes) {
+ doStopSync = true;
+ numStopSyncReq = 0;
+ header.needStopSync = ReqType::immediate;
+ } else {
+ header.needStopSync = ReqType::none;
+ }
DistIface::master->sendCmd(header);
}
@@ -168,7 +184,8 @@ void
DistIface::SyncSwitch::progress(Tick send_tick,
Tick sync_repeat,
ReqType need_ckpt,
- ReqType need_exit)
+ ReqType need_exit,
+ ReqType need_stop_sync)
{
std::unique_lock<std::mutex> sync_lock(lock);
assert(waitNum > 0);
@@ -186,6 +203,10 @@ DistIface::SyncSwitch::progress(Tick send_tick,
numExitReq++;
else if (need_exit == ReqType::immediate)
doExit = true;
+ if (need_stop_sync == ReqType::collective)
+ numStopSyncReq++;
+ else if (need_stop_sync == ReqType::immediate)
+ doStopSync = true;
waitNum--;
// Notify the simulation thread if the on-going sync is complete
@@ -199,7 +220,8 @@ void
DistIface::SyncNode::progress(Tick max_send_tick,
Tick next_repeat,
ReqType do_ckpt,
- ReqType do_exit)
+ ReqType do_exit,
+ ReqType do_stop_sync)
{
std::unique_lock<std::mutex> sync_lock(lock);
assert(waitNum > 0);
@@ -208,6 +230,7 @@ DistIface::SyncNode::progress(Tick max_send_tick,
nextRepeat = next_repeat;
doCkpt = (do_ckpt != ReqType::none);
doExit = (do_exit != ReqType::none);
+ doStopSync = (do_stop_sync != ReqType::none);
waitNum--;
// Notify the simulation thread if the on-going sync is complete
@@ -288,29 +311,27 @@ DistIface::SyncEvent::start()
// At this point, all DistIface objects has already called Sync::init() so
// we have a local minimum of the start tick and repeat for the periodic
// sync.
- Tick firstAt = DistIface::sync->firstAt;
repeat = DistIface::sync->nextRepeat;
// Do a global barrier to agree on a common repeat value (the smallest
- // one from all participating nodes
- DistIface::sync->run(curTick() == 0);
+ // one from all participating nodes.
+ DistIface::sync->run(false);
assert(!DistIface::sync->doCkpt);
assert(!DistIface::sync->doExit);
+ assert(!DistIface::sync->doStopSync);
assert(DistIface::sync->nextAt >= curTick());
assert(DistIface::sync->nextRepeat <= repeat);
- // if this is called at tick 0 then we use the config start param otherwise
- // the maximum of the current tick of all participating nodes
- if (curTick() == 0) {
+ if (curTick() == 0)
assert(!scheduled());
- assert(DistIface::sync->nextAt == 0);
- schedule(firstAt);
- } else {
- if (scheduled())
- reschedule(DistIface::sync->nextAt);
- else
- schedule(DistIface::sync->nextAt);
- }
+
+ // Use the maximum of the current tick for all participating nodes or a
+ // user provided starting tick.
+ if (scheduled())
+ reschedule(DistIface::sync->nextAt);
+ else
+ schedule(DistIface::sync->nextAt);
+
inform("Dist sync scheduled at %lu and repeats %lu\n", when(),
DistIface::sync->nextRepeat);
}
@@ -352,7 +373,27 @@ DistIface::SyncEvent::process()
exitSimLoop("checkpoint");
if (DistIface::sync->doExit)
exitSimLoop("exit request from gem5 peers");
+ if (DistIface::sync->doStopSync) {
+ DistIface::sync->doStopSync = false;
+ inform("synchronization disabled at %lu\n", curTick());
+ // The switch node needs to wait for the next sync immediately.
+ if (DistIface::isSwitch) {
+ start();
+ } else {
+ // Wake up thread contexts on non-switch nodes.
+ for (int i = 0; i < DistIface::master->sys->numContexts(); i++) {
+ ThreadContext *tc =
+ DistIface::master->sys->getThreadContext(i);
+ if (tc->status() == ThreadContext::Suspended)
+ tc->activate();
+ else
+ warn_once("Tried to wake up thread in dist-gem5, but it "
+ "was already awake!\n");
+ }
+ }
+ return;
+ }
// schedule the next periodic sync
repeat = DistIface::sync->nextRepeat;
schedule(curTick() + repeat);
@@ -537,9 +578,10 @@ DistIface::DistIface(unsigned dist_rank,
Tick sync_start,
Tick sync_repeat,
EventManager *em,
+ bool use_pseudo_op,
bool is_switch, int num_nodes) :
syncStart(sync_start), syncRepeat(sync_repeat),
- recvThread(nullptr), recvScheduler(em),
+ recvThread(nullptr), recvScheduler(em), syncStartOnPseudoOp(use_pseudo_op),
rank(dist_rank), size(dist_size)
{
DPRINTF(DistEthernet, "DistIface() ctor rank:%d\n",dist_rank);
@@ -547,6 +589,7 @@ DistIface::DistIface(unsigned dist_rank,
if (master == nullptr) {
assert(sync == nullptr);
assert(syncEvent == nullptr);
+ isSwitch = is_switch;
if (is_switch)
sync = new SyncSwitch(num_nodes);
else
@@ -628,7 +671,8 @@ DistIface::recvThreadFunc(Event *recv_done, Tick link_delay)
sync->progress(header.sendTick,
header.syncRepeat,
header.needCkpt,
- header.needExit);
+ header.needExit,
+ header.needStopSync);
}
}
}
@@ -732,7 +776,8 @@ void
DistIface::startup()
{
DPRINTF(DistEthernet, "DistIface::startup() started\n");
- if (this == master)
+ // Schedule synchronization unless we are not a switch in pseudo_op mode.
+ if (this == master && (!syncStartOnPseudoOp || isSwitch))
syncEvent->start();
DPRINTF(DistEthernet, "DistIface::startup() done\n");
}
@@ -761,6 +806,52 @@ DistIface::readyToCkpt(Tick delay, Tick period)
return ret;
}
+void
+DistIface::SyncNode::requestStopSync(ReqType req)
+{
+ std::lock_guard<std::mutex> sync_lock(lock);
+ needStopSync = req;
+}
+
+void
+DistIface::toggleSync(ThreadContext *tc)
+{
+ // Unforunate that we have to populate the system pointer member this way.
+ master->sys = tc->getSystemPtr();
+
+ // The invariant for both syncing and "unsyncing" is that all threads will
+ // stop executing intructions until the desired sync state has been reached
+ // for all nodes. This is the easiest way to prevent deadlock (in the case
+ // of "unsyncing") and causality errors (in the case of syncing).
+ if (master->syncEvent->scheduled()) {
+ inform("Request toggling syncronization off\n");
+ master->sync->requestStopSync(ReqType::collective);
+
+ // At this point, we have no clue when everyone will reach the sync
+ // stop point. Suspend execution of all local thread contexts.
+ // Dist-gem5 will reactivate all thread contexts when everyone has
+ // reached the sync stop point.
+ for (int i = 0; i < master->sys->numContexts(); i++) {
+ ThreadContext *tc = master->sys->getThreadContext(i);
+ if (tc->status() == ThreadContext::Active)
+ tc->quiesce();
+ }
+ } else {
+ inform("Request toggling syncronization on\n");
+ master->syncEvent->start();
+
+ // We need to suspend all CPUs until the sync point is reached by all
+ // nodes to prevent causality errors. We can also schedule CPU
+ // activation here, since we know exactly when the next sync will
+ // occur.
+ for (int i = 0; i < master->sys->numContexts(); i++) {
+ ThreadContext *tc = master->sys->getThreadContext(i);
+ if (tc->status() == ThreadContext::Active)
+ tc->quiesceTick(master->syncEvent->when() + 1);
+ }
+ }
+}
+
bool
DistIface::readyToExit(Tick delay)
{
@@ -768,6 +859,10 @@ DistIface::readyToExit(Tick delay)
DPRINTF(DistEthernet, "DistIface::readyToExit() called, delay:%lu\n",
delay);
if (master) {
+ // To successfully coordinate an exit, all nodes must be synchronising
+ if (!master->syncEvent->scheduled())
+ master->syncEvent->start();
+
if (delay == 0) {
inform("m5 exit called with zero delay => triggering collaborative "
"exit\n");