diff options
Diffstat (limited to 'src/dev/net/dist_iface.cc')
-rw-r--r-- | src/dev/net/dist_iface.cc | 145 |
1 files changed, 120 insertions, 25 deletions
diff --git a/src/dev/net/dist_iface.cc b/src/dev/net/dist_iface.cc index 26fe45317..79408c304 100644 --- a/src/dev/net/dist_iface.cc +++ b/src/dev/net/dist_iface.cc @@ -48,24 +48,28 @@ #include "base/random.hh" #include "base/trace.hh" +#include "cpu/thread_context.hh" #include "debug/DistEthernet.hh" #include "debug/DistEthernetPkt.hh" #include "dev/net/etherpkt.hh" #include "sim/sim_exit.hh" #include "sim/sim_object.hh" +#include "sim/system.hh" using namespace std; DistIface::Sync *DistIface::sync = nullptr; +System *DistIface::sys = nullptr; DistIface::SyncEvent *DistIface::syncEvent = nullptr; unsigned DistIface::distIfaceNum = 0; unsigned DistIface::recvThreadsNum = 0; DistIface *DistIface::master = nullptr; +bool DistIface::isSwitch = false; void DistIface::Sync::init(Tick start_tick, Tick repeat_tick) { - if (start_tick < firstAt) { - firstAt = start_tick; + if (start_tick < nextAt) { + nextAt = start_tick; inform("Next dist synchronisation tick is changed to %lu.\n", nextAt); } @@ -85,10 +89,11 @@ DistIface::SyncSwitch::SyncSwitch(int num_nodes) waitNum = num_nodes; numExitReq = 0; numCkptReq = 0; + numStopSyncReq = 0; doExit = false; doCkpt = false; - firstAt = std::numeric_limits<Tick>::max(); - nextAt = 0; + doStopSync = false; + nextAt = std::numeric_limits<Tick>::max(); nextRepeat = std::numeric_limits<Tick>::max(); } @@ -97,10 +102,11 @@ DistIface::SyncNode::SyncNode() waitNum = 0; needExit = ReqType::none; needCkpt = ReqType::none; + needStopSync = ReqType::none; doExit = false; doCkpt = false; - firstAt = std::numeric_limits<Tick>::max(); - nextAt = 0; + doStopSync = false; + nextAt = std::numeric_limits<Tick>::max(); nextRepeat = std::numeric_limits<Tick>::max(); } @@ -117,11 +123,14 @@ DistIface::SyncNode::run(bool same_tick) header.sendTick = curTick(); header.syncRepeat = nextRepeat; header.needCkpt = needCkpt; + header.needStopSync = needStopSync; if (needCkpt != ReqType::none) needCkpt = ReqType::pending; header.needExit = needExit; if (needExit != ReqType::none) needExit = ReqType::pending; + if (needStopSync != ReqType::none) + needStopSync = ReqType::pending; DistIface::master->sendCmd(header); // now wait until all receiver threads complete the synchronisation auto lf = [this]{ return waitNum == 0; }; @@ -161,6 +170,13 @@ DistIface::SyncSwitch::run(bool same_tick) } else { header.needExit = ReqType::none; } + if (doStopSync || numStopSyncReq == numNodes) { + doStopSync = true; + numStopSyncReq = 0; + header.needStopSync = ReqType::immediate; + } else { + header.needStopSync = ReqType::none; + } DistIface::master->sendCmd(header); } @@ -168,7 +184,8 @@ void DistIface::SyncSwitch::progress(Tick send_tick, Tick sync_repeat, ReqType need_ckpt, - ReqType need_exit) + ReqType need_exit, + ReqType need_stop_sync) { std::unique_lock<std::mutex> sync_lock(lock); assert(waitNum > 0); @@ -186,6 +203,10 @@ DistIface::SyncSwitch::progress(Tick send_tick, numExitReq++; else if (need_exit == ReqType::immediate) doExit = true; + if (need_stop_sync == ReqType::collective) + numStopSyncReq++; + else if (need_stop_sync == ReqType::immediate) + doStopSync = true; waitNum--; // Notify the simulation thread if the on-going sync is complete @@ -199,7 +220,8 @@ void DistIface::SyncNode::progress(Tick max_send_tick, Tick next_repeat, ReqType do_ckpt, - ReqType do_exit) + ReqType do_exit, + ReqType do_stop_sync) { std::unique_lock<std::mutex> sync_lock(lock); assert(waitNum > 0); @@ -208,6 +230,7 @@ DistIface::SyncNode::progress(Tick max_send_tick, nextRepeat = next_repeat; doCkpt = (do_ckpt != ReqType::none); doExit = (do_exit != ReqType::none); + doStopSync = (do_stop_sync != ReqType::none); waitNum--; // Notify the simulation thread if the on-going sync is complete @@ -288,29 +311,27 @@ DistIface::SyncEvent::start() // At this point, all DistIface objects has already called Sync::init() so // we have a local minimum of the start tick and repeat for the periodic // sync. - Tick firstAt = DistIface::sync->firstAt; repeat = DistIface::sync->nextRepeat; // Do a global barrier to agree on a common repeat value (the smallest - // one from all participating nodes - DistIface::sync->run(curTick() == 0); + // one from all participating nodes. + DistIface::sync->run(false); assert(!DistIface::sync->doCkpt); assert(!DistIface::sync->doExit); + assert(!DistIface::sync->doStopSync); assert(DistIface::sync->nextAt >= curTick()); assert(DistIface::sync->nextRepeat <= repeat); - // if this is called at tick 0 then we use the config start param otherwise - // the maximum of the current tick of all participating nodes - if (curTick() == 0) { + if (curTick() == 0) assert(!scheduled()); - assert(DistIface::sync->nextAt == 0); - schedule(firstAt); - } else { - if (scheduled()) - reschedule(DistIface::sync->nextAt); - else - schedule(DistIface::sync->nextAt); - } + + // Use the maximum of the current tick for all participating nodes or a + // user provided starting tick. + if (scheduled()) + reschedule(DistIface::sync->nextAt); + else + schedule(DistIface::sync->nextAt); + inform("Dist sync scheduled at %lu and repeats %lu\n", when(), DistIface::sync->nextRepeat); } @@ -352,7 +373,27 @@ DistIface::SyncEvent::process() exitSimLoop("checkpoint"); if (DistIface::sync->doExit) exitSimLoop("exit request from gem5 peers"); + if (DistIface::sync->doStopSync) { + DistIface::sync->doStopSync = false; + inform("synchronization disabled at %lu\n", curTick()); + // The switch node needs to wait for the next sync immediately. + if (DistIface::isSwitch) { + start(); + } else { + // Wake up thread contexts on non-switch nodes. + for (int i = 0; i < DistIface::master->sys->numContexts(); i++) { + ThreadContext *tc = + DistIface::master->sys->getThreadContext(i); + if (tc->status() == ThreadContext::Suspended) + tc->activate(); + else + warn_once("Tried to wake up thread in dist-gem5, but it " + "was already awake!\n"); + } + } + return; + } // schedule the next periodic sync repeat = DistIface::sync->nextRepeat; schedule(curTick() + repeat); @@ -537,9 +578,10 @@ DistIface::DistIface(unsigned dist_rank, Tick sync_start, Tick sync_repeat, EventManager *em, + bool use_pseudo_op, bool is_switch, int num_nodes) : syncStart(sync_start), syncRepeat(sync_repeat), - recvThread(nullptr), recvScheduler(em), + recvThread(nullptr), recvScheduler(em), syncStartOnPseudoOp(use_pseudo_op), rank(dist_rank), size(dist_size) { DPRINTF(DistEthernet, "DistIface() ctor rank:%d\n",dist_rank); @@ -547,6 +589,7 @@ DistIface::DistIface(unsigned dist_rank, if (master == nullptr) { assert(sync == nullptr); assert(syncEvent == nullptr); + isSwitch = is_switch; if (is_switch) sync = new SyncSwitch(num_nodes); else @@ -628,7 +671,8 @@ DistIface::recvThreadFunc(Event *recv_done, Tick link_delay) sync->progress(header.sendTick, header.syncRepeat, header.needCkpt, - header.needExit); + header.needExit, + header.needStopSync); } } } @@ -732,7 +776,8 @@ void DistIface::startup() { DPRINTF(DistEthernet, "DistIface::startup() started\n"); - if (this == master) + // Schedule synchronization unless we are not a switch in pseudo_op mode. + if (this == master && (!syncStartOnPseudoOp || isSwitch)) syncEvent->start(); DPRINTF(DistEthernet, "DistIface::startup() done\n"); } @@ -761,6 +806,52 @@ DistIface::readyToCkpt(Tick delay, Tick period) return ret; } +void +DistIface::SyncNode::requestStopSync(ReqType req) +{ + std::lock_guard<std::mutex> sync_lock(lock); + needStopSync = req; +} + +void +DistIface::toggleSync(ThreadContext *tc) +{ + // Unforunate that we have to populate the system pointer member this way. + master->sys = tc->getSystemPtr(); + + // The invariant for both syncing and "unsyncing" is that all threads will + // stop executing intructions until the desired sync state has been reached + // for all nodes. This is the easiest way to prevent deadlock (in the case + // of "unsyncing") and causality errors (in the case of syncing). + if (master->syncEvent->scheduled()) { + inform("Request toggling syncronization off\n"); + master->sync->requestStopSync(ReqType::collective); + + // At this point, we have no clue when everyone will reach the sync + // stop point. Suspend execution of all local thread contexts. + // Dist-gem5 will reactivate all thread contexts when everyone has + // reached the sync stop point. + for (int i = 0; i < master->sys->numContexts(); i++) { + ThreadContext *tc = master->sys->getThreadContext(i); + if (tc->status() == ThreadContext::Active) + tc->quiesce(); + } + } else { + inform("Request toggling syncronization on\n"); + master->syncEvent->start(); + + // We need to suspend all CPUs until the sync point is reached by all + // nodes to prevent causality errors. We can also schedule CPU + // activation here, since we know exactly when the next sync will + // occur. + for (int i = 0; i < master->sys->numContexts(); i++) { + ThreadContext *tc = master->sys->getThreadContext(i); + if (tc->status() == ThreadContext::Active) + tc->quiesceTick(master->syncEvent->when() + 1); + } + } +} + bool DistIface::readyToExit(Tick delay) { @@ -768,6 +859,10 @@ DistIface::readyToExit(Tick delay) DPRINTF(DistEthernet, "DistIface::readyToExit() called, delay:%lu\n", delay); if (master) { + // To successfully coordinate an exit, all nodes must be synchronising + if (!master->syncEvent->scheduled()) + master->syncEvent->start(); + if (delay == 0) { inform("m5 exit called with zero delay => triggering collaborative " "exit\n"); |