summaryrefslogtreecommitdiff
path: root/src/gpu-compute
diff options
context:
space:
mode:
authorGiacomo Travaglini <giacomo.travaglini@arm.com>2018-06-04 09:40:19 +0100
committerGiacomo Travaglini <giacomo.travaglini@arm.com>2018-06-11 16:55:30 +0000
commitf54020eb8155371725ab75b0fc5c419287eca084 (patch)
tree65d379f7603e689e083e9a58ff4c2e90abd19fbf /src/gpu-compute
parent2113b21996d086dab32b9fd388efe3df241bfbd2 (diff)
downloadgem5-f54020eb8155371725ab75b0fc5c419287eca084.tar.xz
misc: Using smart pointers for memory Requests
This patch is changing the underlying type for RequestPtr from Request* to shared_ptr<Request>. Having memory requests being managed by smart pointers will simplify the code; it will also prevent memory leakage and dangling pointers. Change-Id: I7749af38a11ac8eb4d53d8df1252951e0890fde3 Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Reviewed-on: https://gem5-review.googlesource.com/10996 Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com> Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Diffstat (limited to 'src/gpu-compute')
-rw-r--r--src/gpu-compute/compute_unit.cc23
-rw-r--r--src/gpu-compute/fetch_unit.cc6
-rw-r--r--src/gpu-compute/gpu_tlb.cc15
-rw-r--r--src/gpu-compute/gpu_tlb.hh17
-rw-r--r--src/gpu-compute/shader.cc9
-rw-r--r--src/gpu-compute/shader.hh2
6 files changed, 35 insertions, 37 deletions
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 042347cf2..1a174c98b 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -655,7 +655,6 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
}
delete pkt->senderState;
- delete pkt->req;
delete pkt;
return true;
} else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
@@ -666,7 +665,6 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
}
delete pkt->senderState;
- delete pkt->req;
delete pkt;
return true;
}
@@ -916,7 +914,6 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
delete sender_state->tlbEntry;
delete new_pkt;
delete pkt->senderState;
- delete pkt->req;
delete pkt;
}
}
@@ -941,12 +938,13 @@ ComputeUnit::sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
void
ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
- Request* req)
+ RequestPtr req)
{
assert(gpuDynInst->isGlobalSeg());
if (!req) {
- req = new Request(0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
+ req = std::make_shared<Request>(
+ 0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
}
req->setPaddr(0);
if (kernelLaunch) {
@@ -1057,7 +1055,6 @@ ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
}
delete pkt->senderState;
- delete pkt->req;
delete pkt;
}
@@ -1178,11 +1175,11 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
if (!stride)
break;
- RequestPtr prefetch_req = new Request(0, vaddr + stride * pf *
- TheISA::PageBytes,
- sizeof(uint8_t), 0,
- computeUnit->masterId(),
- 0, 0, 0);
+ RequestPtr prefetch_req = std::make_shared<Request>(
+ 0, vaddr + stride * pf * TheISA::PageBytes,
+ sizeof(uint8_t), 0,
+ computeUnit->masterId(),
+ 0, 0, nullptr);
PacketPtr prefetch_pkt = new Packet(prefetch_req, requestCmd);
uint8_t foo = 0;
@@ -1205,7 +1202,6 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
delete tlb_state->tlbEntry;
delete tlb_state;
- delete prefetch_pkt->req;
delete prefetch_pkt;
}
}
@@ -1801,7 +1797,7 @@ ComputeUnit::sendToLds(GPUDynInstPtr gpuDynInst)
{
// this is just a request to carry the GPUDynInstPtr
// back and forth
- RequestPtr newRequest = new Request();
+ RequestPtr newRequest = std::make_shared<Request>();
newRequest->setPaddr(0x0);
// ReadReq is not evaluted by the LDS but the Packet ctor requires this
@@ -1827,7 +1823,6 @@ ComputeUnit::LDSPort::recvTimingResp(PacketPtr packet)
GPUDynInstPtr gpuDynInst = senderState->getMemInst();
delete packet->senderState;
- delete packet->req;
delete packet;
computeUnit->localMemoryPipe.getLMRespFIFO().push(gpuDynInst);
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 36ef1e1e8..a9cea9bf6 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -145,8 +145,9 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
}
// set up virtual request
- RequestPtr req = new Request(0, vaddr, size, Request::INST_FETCH,
- computeUnit->masterId(), 0, 0, 0);
+ RequestPtr req = std::make_shared<Request>(
+ 0, vaddr, size, Request::INST_FETCH,
+ computeUnit->masterId(), 0, 0, nullptr);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
// This fetchBlock is kind of faux right now - because the translations so
@@ -306,7 +307,6 @@ FetchUnit::processFetchReturn(PacketPtr pkt)
wavefront->pendingFetch = false;
delete pkt->senderState;
- delete pkt->req;
delete pkt;
}
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc
index 8b9bd4320..fea6183ed 100644
--- a/src/gpu-compute/gpu_tlb.cc
+++ b/src/gpu-compute/gpu_tlb.cc
@@ -287,7 +287,7 @@ namespace X86ISA
}
Fault
- GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
+ GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
{
DPRINTF(GPUTLB, "Addresses references internal memory.\n");
Addr vaddr = req->getVaddr();
@@ -662,7 +662,8 @@ namespace X86ISA
* On a hit it will update the LRU stack.
*/
bool
- GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
+ GpuTLB::tlbLookup(const RequestPtr &req,
+ ThreadContext *tc, bool update_stats)
{
bool tlb_hit = false;
#ifndef NDEBUG
@@ -710,7 +711,7 @@ namespace X86ISA
}
Fault
- GpuTLB::translate(RequestPtr req, ThreadContext *tc,
+ GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
Translation *translation, Mode mode,
bool &delayedResponse, bool timing, int &latency)
{
@@ -913,8 +914,8 @@ namespace X86ISA
};
Fault
- GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
- int &latency)
+ GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
+ Mode mode, int &latency)
{
bool delayedResponse;
@@ -923,7 +924,7 @@ namespace X86ISA
}
void
- GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
+ GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
Translation *translation, Mode mode, int &latency)
{
bool delayedResponse;
@@ -1069,7 +1070,7 @@ namespace X86ISA
}
tlbOutcome lookup_outcome = TLB_MISS;
- RequestPtr tmp_req = pkt->req;
+ const RequestPtr &tmp_req = pkt->req;
// Access the TLB and figure out if it's a hit or a miss.
bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
diff --git a/src/gpu-compute/gpu_tlb.hh b/src/gpu-compute/gpu_tlb.hh
index f479eb6ce..04d9bfce8 100644
--- a/src/gpu-compute/gpu_tlb.hh
+++ b/src/gpu-compute/gpu_tlb.hh
@@ -113,8 +113,8 @@ namespace X86ISA
* may be responsible for cleaning itslef up which will happen in
* this function. Once it's called the object is no longer valid.
*/
- virtual void finish(Fault fault, RequestPtr req, ThreadContext *tc,
- Mode mode) = 0;
+ virtual void finish(Fault fault, const RequestPtr &req,
+ ThreadContext *tc, Mode mode) = 0;
};
void dumpAll();
@@ -177,9 +177,9 @@ namespace X86ISA
*/
std::vector<EntryList> entryList;
- Fault translateInt(RequestPtr req, ThreadContext *tc);
+ Fault translateInt(const RequestPtr &req, ThreadContext *tc);
- Fault translate(RequestPtr req, ThreadContext *tc,
+ Fault translate(const RequestPtr &req, ThreadContext *tc,
Translation *translation, Mode mode, bool &delayedResponse,
bool timing, int &latency);
@@ -222,10 +222,10 @@ namespace X86ISA
void printAccessPattern();
- Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
- int &latency);
+ Fault translateAtomic(const RequestPtr &req, ThreadContext *tc,
+ Mode mode, int &latency);
- void translateTiming(RequestPtr req, ThreadContext *tc,
+ void translateTiming(const RequestPtr &req, ThreadContext *tc,
Translation *translation, Mode mode,
int &latency);
@@ -239,7 +239,8 @@ namespace X86ISA
virtual void unserialize(CheckpointIn& cp);
void issueTranslation();
enum tlbOutcome {TLB_HIT, TLB_MISS, PAGE_WALK, MISS_RETURN};
- bool tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats);
+ bool tlbLookup(const RequestPtr &req,
+ ThreadContext *tc, bool update_stats);
void handleTranslationReturn(Addr addr, tlbOutcome outcome,
PacketPtr pkt);
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 8e7ba9ad5..d0bd41a0d 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -226,7 +226,7 @@ Shader::handshake(GpuDispatcher *_dispatcher)
}
void
-Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
+Shader::doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
bool suppress_func_errors, int cu_id)
{
int block_size = cuList.at(cu_id)->cacheLineSize();
@@ -338,12 +338,13 @@ Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
!gen.done(); gen.next()) {
- RequestPtr req = new Request(0, gen.addr(), gen.size(), 0,
- cuList[0]->masterId(), 0, 0, 0);
+
+ RequestPtr req = std::make_shared<Request>(
+ 0, gen.addr(), gen.size(), 0,
+ cuList[0]->masterId(), 0, 0, nullptr);
doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id);
data_buf += gen.size();
- delete req;
}
}
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index f9c1ad4b2..5c14d9898 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -181,7 +181,7 @@ class Shader : public ClockedObject
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
bool suppress_func_errors);
- void doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
+ void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
bool suppress_func_errors, int cu_id);
void