8 files changed, 96 insertions, 200 deletions
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index ffa5243d2..87f29eb68 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -669,9 +669,8 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
         return true;
     }
 
-    ComputeUnit::DataPort::MemRespEvent *mem_resp_event =
-        new ComputeUnit::DataPort::MemRespEvent(computeUnit->memPort[index],
-                                                pkt);
+    EventFunctionWrapper *mem_resp_event =
+        computeUnit->memPort[index]->createMemRespEvent(pkt);
 
     DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x received!\n",
             computeUnit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
@@ -845,8 +844,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
 
             // translation is done. Schedule the mem_req_event at the
             // appropriate cycle to send the timing memory request to ruby
-            ComputeUnit::DataPort::MemReqEvent *mem_req_event =
-                new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
+            EventFunctionWrapper *mem_req_event =
+                memPort[index]->createMemReqEvent(pkt);
 
             DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
                     "scheduled\n", cu_id, gpuDynInst->simdId,
@@ -923,8 +922,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
 void
 ComputeUnit::sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
 {
-    ComputeUnit::DataPort::MemReqEvent *mem_req_event =
-        new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
+    EventFunctionWrapper *mem_req_event =
+        memPort[index]->createMemReqEvent(pkt);
 
 
     // New SenderState for the memory access
@@ -972,26 +971,20 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
     sendSyncRequest(gpuDynInst, 0, pkt);
 }
 
-const char*
-ComputeUnit::DataPort::MemRespEvent::description() const
-{
-    return "ComputeUnit memory response event";
-}
-
 void
-ComputeUnit::DataPort::MemRespEvent::process()
+ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
 {
     DataPort::SenderState *sender_state =
         safe_cast<DataPort::SenderState*>(pkt->senderState);
 
     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    ComputeUnit *compute_unit = dataPort->computeUnit;
+    ComputeUnit *compute_unit = computeUnit;
 
     assert(gpuDynInst);
 
     DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
             compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
-            pkt->req->getPaddr(), dataPort->index);
+            pkt->req->getPaddr(), index);
 
     Addr paddr = pkt->req->getPaddr();
 
@@ -1045,8 +1038,9 @@ ComputeUnit::DataPort::MemRespEvent::process()
                 // this memory request
                 if (gpuDynInst->useContinuation) {
                     assert(!gpuDynInst->isNoScope());
-                    gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
-                                                 gpuDynInst);
+                    gpuDynInst->execContinuation(
+                        gpuDynInst->staticInstruction(),
+                        gpuDynInst);
                 }
             }
         }
@@ -1230,9 +1224,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
 
     // translation is done. Schedule the mem_req_event at the appropriate
     // cycle to send the timing memory request to ruby
-    ComputeUnit::DataPort::MemReqEvent *mem_req_event =
-        new ComputeUnit::DataPort::MemReqEvent(computeUnit->memPort[mp_index],
-                                               new_pkt);
+    EventFunctionWrapper *mem_req_event =
+        computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);
 
     DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data scheduled\n",
             computeUnit->cu_id, gpuDynInst->simdId,
@@ -1244,32 +1237,42 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
     return true;
 }
 
-const char*
-ComputeUnit::DataPort::MemReqEvent::description() const
+EventFunctionWrapper*
+ComputeUnit::DataPort::createMemReqEvent(PacketPtr pkt)
+{
+    return new EventFunctionWrapper(
+        [this, pkt]{ processMemReqEvent(pkt); },
+        "ComputeUnit memory request event", true);
+}
+
+EventFunctionWrapper*
+ComputeUnit::DataPort::createMemRespEvent(PacketPtr pkt)
 {
-    return "ComputeUnit memory request event";
+    return new EventFunctionWrapper(
+        [this, pkt]{ processMemRespEvent(pkt); },
+        "ComputeUnit memory response event", true);
 }
 
 void
-ComputeUnit::DataPort::MemReqEvent::process()
+ComputeUnit::DataPort::processMemReqEvent(PacketPtr pkt)
 {
     SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    ComputeUnit *compute_unit M5_VAR_USED = dataPort->computeUnit;
+    ComputeUnit *compute_unit M5_VAR_USED = computeUnit;
 
-    if (!(dataPort->sendTimingReq(pkt))) {
-        dataPort->retries.push_back(std::make_pair(pkt, gpuDynInst));
+    if (!(sendTimingReq(pkt))) {
+        retries.push_back(std::make_pair(pkt, gpuDynInst));
 
         DPRINTF(GPUPort,
                 "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
                 compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, dataPort->index,
+                gpuDynInst->wfSlotId, index,
                 pkt->req->getPaddr());
     } else {
         DPRINTF(GPUPort,
                 "CU%d: WF[%d][%d]: index %d, addr %#x data req sent!\n",
                 compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, dataPort->index,
+                gpuDynInst->wfSlotId, index,
                 pkt->req->getPaddr());
     }
 }
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index 4a1c09c27..150228694 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -440,39 +440,11 @@ class ComputeUnit : public MemObject
                   saved(sender_state) { }
         };
 
-        class MemReqEvent : public Event
-        {
-          private:
-            DataPort *dataPort;
-            PacketPtr pkt;
-
-          public:
-            MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
-                : Event(), dataPort(_data_port), pkt(_pkt)
-            {
-              setFlags(Event::AutoDelete);
-            }
-
-            void process();
-            const char *description() const;
-        };
+        void processMemReqEvent(PacketPtr pkt);
+        EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);
 
-        class MemRespEvent : public Event
-        {
-          private:
-            DataPort *dataPort;
-            PacketPtr pkt;
-
-          public:
-            MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
-                : Event(), dataPort(_data_port), pkt(_pkt)
-            {
-              setFlags(Event::AutoDelete);
-            }
-
-            void process();
-            const char *description() const;
-        };
+        void processMemRespEvent(PacketPtr pkt);
+        EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);
 
         std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;
 
diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc
index 2ce96ec34..7fd1101b1 100644
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -50,7 +50,9 @@ GpuDispatcher::GpuDispatcher(const Params *p)
     : DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")),
       pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
       dispatchCount(0), dispatchActive(false), cpu(p->cpu),
-      shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this)
+      shader(p->shader_pointer), driver(p->cl_driver),
+      tickEvent([this]{ exec(); }, "GPU Dispatcher tick",
+                false, Event::CPU_Tick_Pri)
 {
     shader->handshake(this);
     driver->handshake(this);
@@ -363,23 +365,6 @@ GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
     }
 }
 
-GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher)
-    : Event(CPU_Tick_Pri), dispatcher(_dispatcher)
-{
-}
-
-void
-GpuDispatcher::TickEvent::process()
-{
-    dispatcher->exec();
-}
-
-const char*
-GpuDispatcher::TickEvent::description() const
-{
-    return "GPU Dispatcher tick";
-}
-
 // helper functions for driver to retrieve GPU attributes
 int
 GpuDispatcher::getNumCUs()
diff --git a/src/gpu-compute/dispatcher.hh b/src/gpu-compute/dispatcher.hh
index f5e89e8aa..50a1d800e 100644
--- a/src/gpu-compute/dispatcher.hh
+++ b/src/gpu-compute/dispatcher.hh
@@ -55,17 +55,6 @@ class GpuDispatcher : public DmaDevice
     public:
         typedef GpuDispatcherParams Params;
 
-        class TickEvent : public Event
-        {
-            private:
-                GpuDispatcher *dispatcher;
-
-            public:
-                TickEvent(GpuDispatcher *);
-                void process();
-                const char *description() const;
-        };
-
         MasterID masterId() { return _masterId; }
 
     protected:
@@ -93,7 +82,8 @@ class GpuDispatcher : public DmaDevice
         BaseCPU *cpu;
         Shader *shader;
         ClDriver *driver;
-        TickEvent tickEvent;
+        EventFunctionWrapper tickEvent;
+
 
         static GpuDispatcher *instance;
 
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 6d6154503..41671f85b 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -50,14 +50,17 @@
 #include "mem/ruby/system/RubySystem.hh"
 #include "sim/sim_exit.hh"
 
-Shader::Shader(const Params *p) : ClockedObject(p),
-    clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
-    cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
-    hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
-    separate_acquire_release(p->separate_acquire_release), coissue_return(1),
-    trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
-    globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
-    box_tick_cnt(0), start_tick_cnt(0)
+Shader::Shader(const Params *p)
+    : ClockedObject(p), clock(p->clk_domain->clockPeriod()),
+      cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
+      tickEvent([this]{ processTick(); }, "Shader tick",
+                false, Event::CPU_Tick_Pri),
+      timingSim(p->timing), hsail_mode(SIMT),
+      impl_kern_boundary_sync(p->impl_kern_boundary_sync),
+      separate_acquire_release(p->separate_acquire_release), coissue_return(1),
+      trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
+      globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
+      box_tick_cnt(0), start_tick_cnt(0)
 {
 
     cuList.resize(n_cu);
@@ -317,27 +320,16 @@ Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
     ++sa_n;
 }
 
-Shader::TickEvent::TickEvent(Shader *_shader)
-    : Event(CPU_Tick_Pri), shader(_shader)
-{
-}
-
 
 void
-Shader::TickEvent::process()
+Shader::processTick()
 {
-    if (shader->busy()) {
-        shader->exec();
-        shader->schedule(this, curTick() + shader->ticks(1));
+    if (busy()) {
+        exec();
+        schedule(tickEvent, curTick() + ticks(1));
     }
 }
 
-const char*
-Shader::TickEvent::description() const
-{
-    return "Shader tick";
-}
-
 void
 Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
                   MemCmd cmd, bool suppress_func_errors)
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index 55c3feef9..f9c1ad4b2 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -99,18 +99,8 @@ class Shader : public ClockedObject
     ThreadContext *gpuTc;
     BaseCPU *cpuPointer;
 
-    class TickEvent : public Event
-    {
-      private:
-        Shader *shader;
-
-      public:
-        TickEvent(Shader*);
-        void process();
-        const char* description() const;
-    };
-
-    TickEvent tickEvent;
+    void processTick();
+    EventFunctionWrapper tickEvent;
 
     // is this simulation going to be timing mode in the memory?
     bool timingSim;
diff --git a/src/gpu-compute/tlb_coalescer.cc b/src/gpu-compute/tlb_coalescer.cc
index c9b888d5f..9b6c9e941 100644
--- a/src/gpu-compute/tlb_coalescer.cc
+++ b/src/gpu-compute/tlb_coalescer.cc
@@ -39,11 +39,18 @@
 
 #include "debug/GPUTLB.hh"
 
-TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
-    clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
-    coalescingWindow(p->coalescingWindow),
-    disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
-    cleanupEvent(this)
+TLBCoalescer::TLBCoalescer(const Params *p)
+    : MemObject(p),
+      clock(p->clk_domain->clockPeriod()),
+      TLBProbesPerCycle(p->probesPerCycle),
+      coalescingWindow(p->coalescingWindow),
+      disableCoalescing(p->disableCoalescing),
+      probeTLBEvent([this]{ processProbeTLBEvent(); },
+                    "Probe the TLB below",
+                    false, Event::CPU_Tick_Pri),
+      cleanupEvent([this]{ processCleanupEvent(); },
+                   "Cleanup issuedTranslationsTable hashmap",
+                   false, Event::Maximum_Pri)
 {
     // create the slave ports based on the number of connected ports
     for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
@@ -390,17 +397,6 @@ TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
     fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
 }
 
-TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
-    : Event(CPU_Tick_Pri), coalescer(_coalescer)
-{
-}
-
-const char*
-TLBCoalescer::IssueProbeEvent::description() const
-{
-    return "Probe the TLB below";
-}
-
 /*
  * Here we scan the coalescer FIFO and issue the max
  * number of permitted probes to the TLB below. We
@@ -414,7 +410,7 @@ TLBCoalescer::IssueProbeEvent::description() const
  * track of the outstanding reqs)
  */
 void
-TLBCoalescer::IssueProbeEvent::process()
+TLBCoalescer::processProbeTLBEvent()
 {
     // number of TLB probes sent so far
     int sent_probes = 0;
@@ -425,10 +421,10 @@ TLBCoalescer::IssueProbeEvent::process()
     // returns false or when there is another outstanding request for the
     // same virt. page.
 
-    DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
+    DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
 
-    for (auto iter = coalescer->coalescerFIFO.begin();
-         iter != coalescer->coalescerFIFO.end() && !rejected; ) {
+    for (auto iter = coalescerFIFO.begin();
+         iter != coalescerFIFO.end() && !rejected; ) {
         int coalescedReq_cnt = iter->second.size();
         int i = 0;
         int vector_index = 0;
@@ -446,7 +442,7 @@ TLBCoalescer::IssueProbeEvent::process()
 
             // is there another outstanding request for the same page addr?
             int pending_reqs =
-                coalescer->issuedTranslationsTable.count(virt_page_addr);
+                issuedTranslationsTable.count(virt_page_addr);
 
             if (pending_reqs) {
                 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
@@ -459,7 +455,7 @@ TLBCoalescer::IssueProbeEvent::process()
             }
 
             // send the coalesced request for virt_page_addr
-            if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
+            if (!memSidePort[0]->sendTimingReq(first_packet)) {
                 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
                        virt_page_addr);
 
@@ -479,22 +475,22 @@ TLBCoalescer::IssueProbeEvent::process()
                     // by the one we just sent counting all the way from
                     // the top of TLB hiearchy (i.e., from the CU)
                     int req_cnt = tmp_sender_state->reqCnt.back();
-                    coalescer->queuingCycles += (curTick() * req_cnt);
+                    queuingCycles += (curTick() * req_cnt);
 
                     DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
-                            coalescer->name(), req_cnt);
+                            name(), req_cnt);
 
                     // pkt_cnt is number of packets we coalesced into the one
                     // we just sent but only at this coalescer level
                     int pkt_cnt = iter->second[vector_index].size();
-                    coalescer->localqueuingCycles += (curTick() * pkt_cnt);
+                    localqueuingCycles += (curTick() * pkt_cnt);
                 }
 
                 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
                        virt_page_addr);
 
                 //copy coalescedReq to issuedTranslationsTable
-                coalescer->issuedTranslationsTable[virt_page_addr]
+                issuedTranslationsTable[virt_page_addr]
                     = iter->second[vector_index];
 
                 //erase the entry of this coalesced req
@@ -504,7 +500,7 @@ TLBCoalescer::IssueProbeEvent::process()
                     assert(i == coalescedReq_cnt);
 
                 sent_probes++;
-                if (sent_probes == coalescer->TLBProbesPerCycle)
+                if (sent_probes == TLBProbesPerCycle)
                    return;
             }
         }
@@ -512,31 +508,20 @@ TLBCoalescer::IssueProbeEvent::process()
         //if there are no more coalesced reqs for this tick_index
         //erase the hash_map with the first iterator
         if (iter->second.empty()) {
-            coalescer->coalescerFIFO.erase(iter++);
+            coalescerFIFO.erase(iter++);
         } else {
             ++iter;
         }
     }
 }
 
-TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
-    : Event(Maximum_Pri), coalescer(_coalescer)
-{
-}
-
-const char*
-TLBCoalescer::CleanupEvent::description() const
-{
-    return "Cleanup issuedTranslationsTable hashmap";
-}
-
 void
-TLBCoalescer::CleanupEvent::process()
+TLBCoalescer::processCleanupEvent()
 {
-    while (!coalescer->cleanupQueue.empty()) {
-        Addr cleanup_addr = coalescer->cleanupQueue.front();
-        coalescer->cleanupQueue.pop();
-        coalescer->issuedTranslationsTable.erase(cleanup_addr);
+    while (!cleanupQueue.empty()) {
+        Addr cleanup_addr = cleanupQueue.front();
+        cleanupQueue.pop();
+        issuedTranslationsTable.erase(cleanup_addr);
 
         DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
                 cleanup_addr);
diff --git a/src/gpu-compute/tlb_coalescer.hh b/src/gpu-compute/tlb_coalescer.hh
index 09210148b..b03e77150 100644
--- a/src/gpu-compute/tlb_coalescer.hh
+++ b/src/gpu-compute/tlb_coalescer.hh
@@ -214,35 +214,14 @@ class TLBCoalescer : public MemObject
     BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
     BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);
 
-    class IssueProbeEvent : public Event
-    {
-      private:
-        TLBCoalescer *coalescer;
-
-      public:
-        IssueProbeEvent(TLBCoalescer *_coalescer);
-        void process();
-        const char *description() const;
-    };
-
-    // this event issues the TLB probes
-    IssueProbeEvent probeTLBEvent;
-
-    // the cleanupEvent is scheduled after a TLBEvent triggers
-    // in order to free memory and do the required clean-up
-    class CleanupEvent : public Event
-    {
-      private:
-        TLBCoalescer *coalescer;
-
-      public:
-        CleanupEvent(TLBCoalescer *_coalescer);
-        void process();
-        const char* description() const;
-     };
-
-    // schedule cleanup
-    CleanupEvent cleanupEvent;
+    void processProbeTLBEvent();
+    /// This event issues the TLB probes
+    EventFunctionWrapper probeTLBEvent;
+
+    void processCleanupEvent();
+    /// The cleanupEvent is scheduled after a TLBEvent triggers
+    /// in order to free memory and do the required clean-up
+    EventFunctionWrapper cleanupEvent;
 
     // this FIFO queue keeps track of the virt. page
     // addresses that are pending cleanup