summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Wilson <spwilson2@wisc.edu>2017-06-27 14:18:10 -0500
committerSean Wilson <spwilson2@wisc.edu>2017-07-12 20:07:05 +0000
commit741261f10bb308cdc200c5dfd8eb68567349cf19 (patch)
treeb44581cb4b93d64c079a80ab9e510205ffe93d0a
parent55f70760de9cf9d16905372d885b7925722721a8 (diff)
downloadgem5-741261f10bb308cdc200c5dfd8eb68567349cf19.tar.xz
gpu-compute: Refactor some Event subclasses to lambdas
Change-Id: Ic1332b8e8ba0afacbe591c80f4d06afbf5f04bd9 Signed-off-by: Sean Wilson <spwilson2@wisc.edu> Reviewed-on: https://gem5-review.googlesource.com/3922 Reviewed-by: Jason Lowe-Power <jason@lowepower.com> Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
-rw-r--r--src/gpu-compute/compute_unit.cc63
-rw-r--r--src/gpu-compute/compute_unit.hh36
-rw-r--r--src/gpu-compute/dispatcher.cc21
-rw-r--r--src/gpu-compute/dispatcher.hh14
-rw-r--r--src/gpu-compute/shader.cc38
-rw-r--r--src/gpu-compute/shader.hh14
-rw-r--r--src/gpu-compute/tlb_coalescer.cc73
-rw-r--r--src/gpu-compute/tlb_coalescer.hh37
8 files changed, 96 insertions, 200 deletions
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index ffa5243d2..87f29eb68 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -669,9 +669,8 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
return true;
}
- ComputeUnit::DataPort::MemRespEvent *mem_resp_event =
- new ComputeUnit::DataPort::MemRespEvent(computeUnit->memPort[index],
- pkt);
+ EventFunctionWrapper *mem_resp_event =
+ computeUnit->memPort[index]->createMemRespEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x received!\n",
computeUnit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
@@ -845,8 +844,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
// translation is done. Schedule the mem_req_event at the
// appropriate cycle to send the timing memory request to ruby
- ComputeUnit::DataPort::MemReqEvent *mem_req_event =
- new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
+ EventFunctionWrapper *mem_req_event =
+ memPort[index]->createMemReqEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
"scheduled\n", cu_id, gpuDynInst->simdId,
@@ -923,8 +922,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
void
ComputeUnit::sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
{
- ComputeUnit::DataPort::MemReqEvent *mem_req_event =
- new ComputeUnit::DataPort::MemReqEvent(memPort[index], pkt);
+ EventFunctionWrapper *mem_req_event =
+ memPort[index]->createMemReqEvent(pkt);
// New SenderState for the memory access
@@ -972,26 +971,20 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
sendSyncRequest(gpuDynInst, 0, pkt);
}
-const char*
-ComputeUnit::DataPort::MemRespEvent::description() const
-{
- return "ComputeUnit memory response event";
-}
-
void
-ComputeUnit::DataPort::MemRespEvent::process()
+ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
{
DataPort::SenderState *sender_state =
safe_cast<DataPort::SenderState*>(pkt->senderState);
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
- ComputeUnit *compute_unit = dataPort->computeUnit;
+ ComputeUnit *compute_unit = computeUnit;
assert(gpuDynInst);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
- pkt->req->getPaddr(), dataPort->index);
+ pkt->req->getPaddr(), index);
Addr paddr = pkt->req->getPaddr();
@@ -1045,8 +1038,9 @@ ComputeUnit::DataPort::MemRespEvent::process()
// this memory request
if (gpuDynInst->useContinuation) {
assert(!gpuDynInst->isNoScope());
- gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
- gpuDynInst);
+ gpuDynInst->execContinuation(
+ gpuDynInst->staticInstruction(),
+ gpuDynInst);
}
}
}
@@ -1230,9 +1224,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
// translation is done. Schedule the mem_req_event at the appropriate
// cycle to send the timing memory request to ruby
- ComputeUnit::DataPort::MemReqEvent *mem_req_event =
- new ComputeUnit::DataPort::MemReqEvent(computeUnit->memPort[mp_index],
- new_pkt);
+ EventFunctionWrapper *mem_req_event =
+ computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data scheduled\n",
computeUnit->cu_id, gpuDynInst->simdId,
@@ -1244,32 +1237,42 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
return true;
}
-const char*
-ComputeUnit::DataPort::MemReqEvent::description() const
+EventFunctionWrapper*
+ComputeUnit::DataPort::createMemReqEvent(PacketPtr pkt)
+{
+ return new EventFunctionWrapper(
+ [this, pkt]{ processMemReqEvent(pkt); },
+ "ComputeUnit memory request event", true);
+}
+
+EventFunctionWrapper*
+ComputeUnit::DataPort::createMemRespEvent(PacketPtr pkt)
{
- return "ComputeUnit memory request event";
+ return new EventFunctionWrapper(
+ [this, pkt]{ processMemRespEvent(pkt); },
+ "ComputeUnit memory response event", true);
}
void
-ComputeUnit::DataPort::MemReqEvent::process()
+ComputeUnit::DataPort::processMemReqEvent(PacketPtr pkt)
{
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
- ComputeUnit *compute_unit M5_VAR_USED = dataPort->computeUnit;
+ ComputeUnit *compute_unit M5_VAR_USED = computeUnit;
- if (!(dataPort->sendTimingReq(pkt))) {
- dataPort->retries.push_back(std::make_pair(pkt, gpuDynInst));
+ if (!(sendTimingReq(pkt))) {
+ retries.push_back(std::make_pair(pkt, gpuDynInst));
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
compute_unit->cu_id, gpuDynInst->simdId,
- gpuDynInst->wfSlotId, dataPort->index,
+ gpuDynInst->wfSlotId, index,
pkt->req->getPaddr());
} else {
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: index %d, addr %#x data req sent!\n",
compute_unit->cu_id, gpuDynInst->simdId,
- gpuDynInst->wfSlotId, dataPort->index,
+ gpuDynInst->wfSlotId, index,
pkt->req->getPaddr());
}
}
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index 4a1c09c27..150228694 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -440,39 +440,11 @@ class ComputeUnit : public MemObject
saved(sender_state) { }
};
- class MemReqEvent : public Event
- {
- private:
- DataPort *dataPort;
- PacketPtr pkt;
-
- public:
- MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
- : Event(), dataPort(_data_port), pkt(_pkt)
- {
- setFlags(Event::AutoDelete);
- }
-
- void process();
- const char *description() const;
- };
+ void processMemReqEvent(PacketPtr pkt);
+ EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);
- class MemRespEvent : public Event
- {
- private:
- DataPort *dataPort;
- PacketPtr pkt;
-
- public:
- MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
- : Event(), dataPort(_data_port), pkt(_pkt)
- {
- setFlags(Event::AutoDelete);
- }
-
- void process();
- const char *description() const;
- };
+ void processMemRespEvent(PacketPtr pkt);
+ EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);
std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;
diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc
index 2ce96ec34..7fd1101b1 100644
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -50,7 +50,9 @@ GpuDispatcher::GpuDispatcher(const Params *p)
: DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")),
pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
dispatchCount(0), dispatchActive(false), cpu(p->cpu),
- shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this)
+ shader(p->shader_pointer), driver(p->cl_driver),
+ tickEvent([this]{ exec(); }, "GPU Dispatcher tick",
+ false, Event::CPU_Tick_Pri)
{
shader->handshake(this);
driver->handshake(this);
@@ -363,23 +365,6 @@ GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
}
}
-GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher)
- : Event(CPU_Tick_Pri), dispatcher(_dispatcher)
-{
-}
-
-void
-GpuDispatcher::TickEvent::process()
-{
- dispatcher->exec();
-}
-
-const char*
-GpuDispatcher::TickEvent::description() const
-{
- return "GPU Dispatcher tick";
-}
-
// helper functions for driver to retrieve GPU attributes
int
GpuDispatcher::getNumCUs()
diff --git a/src/gpu-compute/dispatcher.hh b/src/gpu-compute/dispatcher.hh
index f5e89e8aa..50a1d800e 100644
--- a/src/gpu-compute/dispatcher.hh
+++ b/src/gpu-compute/dispatcher.hh
@@ -55,17 +55,6 @@ class GpuDispatcher : public DmaDevice
public:
typedef GpuDispatcherParams Params;
- class TickEvent : public Event
- {
- private:
- GpuDispatcher *dispatcher;
-
- public:
- TickEvent(GpuDispatcher *);
- void process();
- const char *description() const;
- };
-
MasterID masterId() { return _masterId; }
protected:
@@ -93,7 +82,8 @@ class GpuDispatcher : public DmaDevice
BaseCPU *cpu;
Shader *shader;
ClDriver *driver;
- TickEvent tickEvent;
+ EventFunctionWrapper tickEvent;
+
static GpuDispatcher *instance;
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 6d6154503..41671f85b 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -50,14 +50,17 @@
#include "mem/ruby/system/RubySystem.hh"
#include "sim/sim_exit.hh"
-Shader::Shader(const Params *p) : ClockedObject(p),
- clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
- cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
- hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
- separate_acquire_release(p->separate_acquire_release), coissue_return(1),
- trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
- globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
- box_tick_cnt(0), start_tick_cnt(0)
+Shader::Shader(const Params *p)
+ : ClockedObject(p), clock(p->clk_domain->clockPeriod()),
+ cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
+ tickEvent([this]{ processTick(); }, "Shader tick",
+ false, Event::CPU_Tick_Pri),
+ timingSim(p->timing), hsail_mode(SIMT),
+ impl_kern_boundary_sync(p->impl_kern_boundary_sync),
+ separate_acquire_release(p->separate_acquire_release), coissue_return(1),
+ trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
+ globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
+ box_tick_cnt(0), start_tick_cnt(0)
{
cuList.resize(n_cu);
@@ -317,27 +320,16 @@ Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
++sa_n;
}
-Shader::TickEvent::TickEvent(Shader *_shader)
- : Event(CPU_Tick_Pri), shader(_shader)
-{
-}
-
void
-Shader::TickEvent::process()
+Shader::processTick()
{
- if (shader->busy()) {
- shader->exec();
- shader->schedule(this, curTick() + shader->ticks(1));
+ if (busy()) {
+ exec();
+ schedule(tickEvent, curTick() + ticks(1));
}
}
-const char*
-Shader::TickEvent::description() const
-{
- return "Shader tick";
-}
-
void
Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
MemCmd cmd, bool suppress_func_errors)
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index 55c3feef9..f9c1ad4b2 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -99,18 +99,8 @@ class Shader : public ClockedObject
ThreadContext *gpuTc;
BaseCPU *cpuPointer;
- class TickEvent : public Event
- {
- private:
- Shader *shader;
-
- public:
- TickEvent(Shader*);
- void process();
- const char* description() const;
- };
-
- TickEvent tickEvent;
+ void processTick();
+ EventFunctionWrapper tickEvent;
// is this simulation going to be timing mode in the memory?
bool timingSim;
diff --git a/src/gpu-compute/tlb_coalescer.cc b/src/gpu-compute/tlb_coalescer.cc
index c9b888d5f..9b6c9e941 100644
--- a/src/gpu-compute/tlb_coalescer.cc
+++ b/src/gpu-compute/tlb_coalescer.cc
@@ -39,11 +39,18 @@
#include "debug/GPUTLB.hh"
-TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
- clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
- coalescingWindow(p->coalescingWindow),
- disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
- cleanupEvent(this)
+TLBCoalescer::TLBCoalescer(const Params *p)
+ : MemObject(p),
+ clock(p->clk_domain->clockPeriod()),
+ TLBProbesPerCycle(p->probesPerCycle),
+ coalescingWindow(p->coalescingWindow),
+ disableCoalescing(p->disableCoalescing),
+ probeTLBEvent([this]{ processProbeTLBEvent(); },
+ "Probe the TLB below",
+ false, Event::CPU_Tick_Pri),
+ cleanupEvent([this]{ processCleanupEvent(); },
+ "Cleanup issuedTranslationsTable hashmap",
+ false, Event::Maximum_Pri)
{
// create the slave ports based on the number of connected ports
for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
@@ -390,17 +397,6 @@ TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
}
-TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
- : Event(CPU_Tick_Pri), coalescer(_coalescer)
-{
-}
-
-const char*
-TLBCoalescer::IssueProbeEvent::description() const
-{
- return "Probe the TLB below";
-}
-
/*
* Here we scan the coalescer FIFO and issue the max
* number of permitted probes to the TLB below. We
@@ -414,7 +410,7 @@ TLBCoalescer::IssueProbeEvent::description() const
* track of the outstanding reqs)
*/
void
-TLBCoalescer::IssueProbeEvent::process()
+TLBCoalescer::processProbeTLBEvent()
{
// number of TLB probes sent so far
int sent_probes = 0;
@@ -425,10 +421,10 @@ TLBCoalescer::IssueProbeEvent::process()
// returns false or when there is another outstanding request for the
// same virt. page.
- DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
+ DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
- for (auto iter = coalescer->coalescerFIFO.begin();
- iter != coalescer->coalescerFIFO.end() && !rejected; ) {
+ for (auto iter = coalescerFIFO.begin();
+ iter != coalescerFIFO.end() && !rejected; ) {
int coalescedReq_cnt = iter->second.size();
int i = 0;
int vector_index = 0;
@@ -446,7 +442,7 @@ TLBCoalescer::IssueProbeEvent::process()
// is there another outstanding request for the same page addr?
int pending_reqs =
- coalescer->issuedTranslationsTable.count(virt_page_addr);
+ issuedTranslationsTable.count(virt_page_addr);
if (pending_reqs) {
DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
@@ -459,7 +455,7 @@ TLBCoalescer::IssueProbeEvent::process()
}
// send the coalesced request for virt_page_addr
- if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
+ if (!memSidePort[0]->sendTimingReq(first_packet)) {
DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
virt_page_addr);
@@ -479,22 +475,22 @@ TLBCoalescer::IssueProbeEvent::process()
// by the one we just sent counting all the way from
// the top of TLB hiearchy (i.e., from the CU)
int req_cnt = tmp_sender_state->reqCnt.back();
- coalescer->queuingCycles += (curTick() * req_cnt);
+ queuingCycles += (curTick() * req_cnt);
DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
- coalescer->name(), req_cnt);
+ name(), req_cnt);
// pkt_cnt is number of packets we coalesced into the one
// we just sent but only at this coalescer level
int pkt_cnt = iter->second[vector_index].size();
- coalescer->localqueuingCycles += (curTick() * pkt_cnt);
+ localqueuingCycles += (curTick() * pkt_cnt);
}
DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
virt_page_addr);
//copy coalescedReq to issuedTranslationsTable
- coalescer->issuedTranslationsTable[virt_page_addr]
+ issuedTranslationsTable[virt_page_addr]
= iter->second[vector_index];
//erase the entry of this coalesced req
@@ -504,7 +500,7 @@ TLBCoalescer::IssueProbeEvent::process()
assert(i == coalescedReq_cnt);
sent_probes++;
- if (sent_probes == coalescer->TLBProbesPerCycle)
+ if (sent_probes == TLBProbesPerCycle)
return;
}
}
@@ -512,31 +508,20 @@ TLBCoalescer::IssueProbeEvent::process()
//if there are no more coalesced reqs for this tick_index
//erase the hash_map with the first iterator
if (iter->second.empty()) {
- coalescer->coalescerFIFO.erase(iter++);
+ coalescerFIFO.erase(iter++);
} else {
++iter;
}
}
}
-TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
- : Event(Maximum_Pri), coalescer(_coalescer)
-{
-}
-
-const char*
-TLBCoalescer::CleanupEvent::description() const
-{
- return "Cleanup issuedTranslationsTable hashmap";
-}
-
void
-TLBCoalescer::CleanupEvent::process()
+TLBCoalescer::processCleanupEvent()
{
- while (!coalescer->cleanupQueue.empty()) {
- Addr cleanup_addr = coalescer->cleanupQueue.front();
- coalescer->cleanupQueue.pop();
- coalescer->issuedTranslationsTable.erase(cleanup_addr);
+ while (!cleanupQueue.empty()) {
+ Addr cleanup_addr = cleanupQueue.front();
+ cleanupQueue.pop();
+ issuedTranslationsTable.erase(cleanup_addr);
DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
cleanup_addr);
diff --git a/src/gpu-compute/tlb_coalescer.hh b/src/gpu-compute/tlb_coalescer.hh
index 09210148b..b03e77150 100644
--- a/src/gpu-compute/tlb_coalescer.hh
+++ b/src/gpu-compute/tlb_coalescer.hh
@@ -214,35 +214,14 @@ class TLBCoalescer : public MemObject
BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);
- class IssueProbeEvent : public Event
- {
- private:
- TLBCoalescer *coalescer;
-
- public:
- IssueProbeEvent(TLBCoalescer *_coalescer);
- void process();
- const char *description() const;
- };
-
- // this event issues the TLB probes
- IssueProbeEvent probeTLBEvent;
-
- // the cleanupEvent is scheduled after a TLBEvent triggers
- // in order to free memory and do the required clean-up
- class CleanupEvent : public Event
- {
- private:
- TLBCoalescer *coalescer;
-
- public:
- CleanupEvent(TLBCoalescer *_coalescer);
- void process();
- const char* description() const;
- };
-
- // schedule cleanup
- CleanupEvent cleanupEvent;
+ void processProbeTLBEvent();
+ /// This event issues the TLB probes
+ EventFunctionWrapper probeTLBEvent;
+
+ void processCleanupEvent();
+ /// The cleanupEvent is scheduled after a TLBEvent triggers
+ /// in order to free memory and do the required clean-up
+ EventFunctionWrapper cleanupEvent;
// this FIFO queue keeps track of the virt. page
// addresses that are pending cleanup