summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-10-26 22:47:47 -0400
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-10-26 22:47:47 -0400
commitaa7364276f16bbe6aa300b43bc57ff1b73be42a7 (patch)
treebd384607585d98dd996b88c74864aa105b6843a7
parent844fb845a51b15f13c7c744e0d5fdf5567c3da98 (diff)
downloadgem5-aa7364276f16bbe6aa300b43bc57ff1b73be42a7.tar.xz
gpu-compute: use System cache line size in the GPU
-rw-r--r--src/gpu-compute/compute_unit.cc3
-rw-r--r--src/gpu-compute/compute_unit.hh3
-rw-r--r--src/gpu-compute/fetch_unit.cc4
-rw-r--r--src/gpu-compute/shader.cc4
4 files changed, 9 insertions, 5 deletions
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index f05ecc1b2..93cffbe1e 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -75,7 +75,8 @@ ComputeUnit::ComputeUnit(const Params *p) : MemObject(p), fetchStage(p),
req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(name() + ".ComputeUnit")),
- lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize),
+ lds(*p->localDataStore), _cacheLineSize(p->system->cacheLineSize()),
+ globalSeqNum(0), wavefrontSize(p->wfSize),
kernelLaunchInst(new KernelLaunchStaticInst())
{
/**
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index 2187bec38..4a1c09c27 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -390,6 +390,8 @@ class ComputeUnit : public MemObject
int32_t
getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
+ int cacheLineSize() const { return _cacheLineSize; }
+
bool
sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
@@ -767,6 +769,7 @@ class ComputeUnit : public MemObject
uint64_t getAndIncSeqNum() { return globalSeqNum++; }
private:
+ const int _cacheLineSize;
uint64_t globalSeqNum;
int wavefrontSize;
GPUStaticInst *kernelLaunchInst;
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 1b19a3223..9a059f7fc 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -132,10 +132,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
// Since this is an instruction prefetch, if you're split then just finish
// out the current line.
- unsigned block_size = RubySystem::getBlockSizeBytes();
+ int block_size = computeUnit->cacheLineSize();
// check for split accesses
Addr split_addr = roundDown(vaddr + block_size - 1, block_size);
- unsigned size = block_size;
+ int size = block_size;
if (split_addr > vaddr) {
// misaligned access, just grab the rest of the line
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index d02f95d29..e47edce2c 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -224,7 +224,7 @@ void
Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
bool suppress_func_errors, int cu_id)
{
- unsigned block_size = RubySystem::getBlockSizeBytes();
+ int block_size = cuList.at(cu_id)->cacheLineSize();
unsigned size = req->getSize();
Addr tmp_addr;
@@ -342,7 +342,7 @@ Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
{
uint8_t *data_buf = (uint8_t*)ptr;
- for (ChunkGenerator gen(address, size, RubySystem::getBlockSizeBytes());
+ for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
!gen.done(); gen.next()) {
Request *req = new Request(0, gen.addr(), gen.size(), 0,
cuList[0]->masterId(), 0, 0, 0);