diff options
-rw-r--r-- | src/gpu-compute/code_enums.hh | 4 | ||||
-rw-r--r-- | src/gpu-compute/compute_unit.cc | 12 | ||||
-rw-r--r-- | src/gpu-compute/wavefront.cc | 4 |
3 files changed, 9 insertions, 11 deletions
diff --git a/src/gpu-compute/code_enums.hh b/src/gpu-compute/code_enums.hh index 126cf6c50..6cd9bfe26 100644 --- a/src/gpu-compute/code_enums.hh +++ b/src/gpu-compute/code_enums.hh @@ -84,6 +84,7 @@ ||(a)==Enums::OT_PRIVATE_ATOMIC \ ||(a)==Enums::OT_SPILL_ATOMIC \ ||(a)==Enums::OT_READONLY_ATOMIC \ + ||(a)==Enums::OT_BOTH_MEMFENCE \ ||(a)==Enums::OT_FLAT_ATOMIC) #define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \ @@ -93,8 +94,7 @@ ||(a)==Enums::OT_BOTH_MEMFENCE) #define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \ - ||(a)==Enums::OT_SHARED_MEMFENCE \ - ||(a)==Enums::OT_BOTH_MEMFENCE) + ||(a)==Enums::OT_SHARED_MEMFENCE) #define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC) diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index d3622007a..63f3e8fb5 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -587,8 +587,8 @@ void ComputeUnit::init() { // Initialize CU Bus models - glbMemToVrfBus.init(&shader->tick_cnt, 1); - locMemToVrfBus.init(&shader->tick_cnt, 1); + glbMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1)); + locMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1)); nextGlbMemBus = 0; nextLocMemBus = 0; fatal_if(numGlbMemUnits > 1, @@ -596,7 +596,7 @@ ComputeUnit::init() vrfToGlobalMemPipeBus.resize(numGlbMemUnits); for (int j = 0; j < numGlbMemUnits; ++j) { vrfToGlobalMemPipeBus[j] = WaitClass(); - vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, 1); + vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1)); } fatal_if(numLocMemUnits > 1, @@ -604,7 +604,7 @@ ComputeUnit::init() vrfToLocalMemPipeBus.resize(numLocMemUnits); for (int j = 0; j < numLocMemUnits; ++j) { vrfToLocalMemPipeBus[j] = WaitClass(); - vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, 1); + vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1)); } vectorRegsReserved.resize(numSIMDs, 0); aluPipe.resize(numSIMDs); @@ -612,12 +612,12 @@ ComputeUnit::init() for (int i = 0; i < numSIMDs + numLocMemUnits + numGlbMemUnits; ++i) { wfWait[i] = WaitClass(); - wfWait[i].init(&shader->tick_cnt, 1); + wfWait[i].init(&shader->tick_cnt, shader->ticks(1)); } for (int i = 0; i < numSIMDs; ++i) { aluPipe[i] = WaitClass(); - aluPipe[i].init(&shader->tick_cnt, 1); + aluPipe[i].init(&shader->tick_cnt, shader->ticks(1)); } // Setup space for call args diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 0aa033db1..ed13b22c7 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -162,7 +162,6 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii) if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) { - return true; } @@ -349,7 +348,7 @@ Wavefront::ready(itype_e type) } bool locMemBusRdy = false; bool locMemIssueRdy = false; - if (type == I_SHARED) { + if (type == I_SHARED || type == I_FLAT) { for (int j=0; j < computeUnit->numLocMemUnits; ++j) { if (computeUnit->vrfToLocalMemPipeBus[j].prerdy()) locMemBusRdy = true; @@ -598,7 +597,6 @@ Wavefront::ready(itype_e type) DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id, simdId, wfSlotId, ii->disassemble()); - return 1; } |