From 98d8a7051d8caa9b5aebebe5bf16f9d731c34c0e Mon Sep 17 00:00:00 2001
From: Tony Gutierrez <anthony.gutierrez@amd.com>
Date: Wed, 26 Oct 2016 22:47:30 -0400
Subject: gpu-compute: add instruction mix stats for the gpu

---
 src/gpu-compute/compute_unit.cc | 141 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)

(limited to 'src/gpu-compute/compute_unit.cc')

diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index abf8ff2c5..f05ecc1b2 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -1408,6 +1408,114 @@ ComputeUnit::regStats()
 {
     MemObject::regStats();
 
+    vALUInsts
+        .name(name() + ".valu_insts")
+        .desc("Number of vector ALU insts issued.")
+        ;
+    vALUInstsPerWF
+        .name(name() + ".valu_insts_per_wf")
+        .desc("The avg. number of vector ALU insts issued per-wavefront.")
+        ;
+    sALUInsts
+        .name(name() + ".salu_insts")
+        .desc("Number of scalar ALU insts issued.")
+        ;
+    sALUInstsPerWF
+        .name(name() + ".salu_insts_per_wf")
+        .desc("The avg. number of scalar ALU insts issued per-wavefront.")
+        ;
+    instCyclesVALU
+        .name(name() + ".inst_cycles_valu")
+        .desc("Number of cycles needed to execute VALU insts.")
+        ;
+    instCyclesSALU
+        .name(name() + ".inst_cycles_salu")
+        .desc("Number of cycles needed to execute SALU insts.")
+        ;
+    threadCyclesVALU
+        .name(name() + ".thread_cycles_valu")
+        .desc("Number of thread cycles used to execute vector ALU ops. "
+              "Similar to instCyclesVALU but multiplied by the number of "
+              "active threads.")
+        ;
+    vALUUtilization
+        .name(name() + ".valu_utilization")
+        .desc("Percentage of active vector ALU threads in a wave.")
+        ;
+    ldsNoFlatInsts
+        .name(name() + ".lds_no_flat_insts")
+        .desc("Number of LDS insts issued, not including FLAT "
+              "accesses that resolve to LDS.")
+        ;
+    ldsNoFlatInstsPerWF
+        .name(name() + ".lds_no_flat_insts_per_wf")
+        .desc("The avg. number of LDS insts (not including FLAT "
+              "accesses that resolve to LDS) per-wavefront.")
+        ;
+    flatVMemInsts
+        .name(name() + ".flat_vmem_insts")
+        .desc("The number of FLAT insts that resolve to vmem issued.")
+        ;
+    flatVMemInstsPerWF
+        .name(name() + ".flat_vmem_insts_per_wf")
+        .desc("The average number of FLAT insts that resolve to vmem "
+              "issued per-wavefront.")
+        ;
+    flatLDSInsts
+        .name(name() + ".flat_lds_insts")
+        .desc("The number of FLAT insts that resolve to LDS issued.")
+        ;
+    flatLDSInstsPerWF
+        .name(name() + ".flat_lds_insts_per_wf")
+        .desc("The average number of FLAT insts that resolve to LDS "
+              "issued per-wavefront.")
+        ;
+    vectorMemWrites
+        .name(name() + ".vector_mem_writes")
+        .desc("Number of vector mem write insts (excluding FLAT insts).")
+        ;
+    vectorMemWritesPerWF
+        .name(name() + ".vector_mem_writes_per_wf")
+        .desc("The average number of vector mem write insts "
+              "(excluding FLAT insts) per-wavefront.")
+        ;
+    vectorMemReads
+        .name(name() + ".vector_mem_reads")
+        .desc("Number of vector mem read insts (excluding FLAT insts).")
+        ;
+    vectorMemReadsPerWF
+        .name(name() + ".vector_mem_reads_per_wf")
+        .desc("The avg. number of vector mem read insts (excluding "
+              "FLAT insts) per-wavefront.")
+        ;
+    scalarMemWrites
+        .name(name() + ".scalar_mem_writes")
+        .desc("Number of scalar mem write insts.")
+        ;
+    scalarMemWritesPerWF
+        .name(name() + ".scalar_mem_writes_per_wf")
+        .desc("The average number of scalar mem write insts per-wavefront.")
+        ;
+    scalarMemReads
+        .name(name() + ".scalar_mem_reads")
+        .desc("Number of scalar mem read insts.")
+        ;
+    scalarMemReadsPerWF
+        .name(name() + ".scalar_mem_reads_per_wf")
+        .desc("The average number of scalar mem read insts per-wavefront.")
+        ;
+
+    vALUInstsPerWF = vALUInsts / completedWfs;
+    sALUInstsPerWF = sALUInsts / completedWfs;
+    vALUUtilization = (threadCyclesVALU / (64 * instCyclesVALU)) * 100;
+    ldsNoFlatInstsPerWF = ldsNoFlatInsts / completedWfs;
+    flatVMemInstsPerWF = flatVMemInsts / completedWfs;
+    flatLDSInstsPerWF = flatLDSInsts / completedWfs;
+    vectorMemWritesPerWF = vectorMemWrites / completedWfs;
+    vectorMemReadsPerWF = vectorMemReads / completedWfs;
+    scalarMemWritesPerWF = scalarMemWrites / completedWfs;
+    scalarMemReadsPerWF = scalarMemReads / completedWfs;
+
     tlbCycles
         .name(name() + ".tlb_cycles")
         .desc("total number of cycles for all uncoalesced requests")
@@ -1566,6 +1674,39 @@ ComputeUnit::regStats()
     localMemoryPipe.regStats();
 }
 
+void
+ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst)
+{
+    if (gpuDynInst->isScalar()) {
+        if (gpuDynInst->isALU() && !gpuDynInst->isWaitcnt()) {
+            sALUInsts++;
+            instCyclesSALU++;
+        } else if (gpuDynInst->isLoad()) {
+            scalarMemReads++;
+        } else if (gpuDynInst->isStore()) {
+            scalarMemWrites++;
+        }
+    } else {
+        if (gpuDynInst->isALU()) {
+            vALUInsts++;
+            instCyclesVALU++;
+            threadCyclesVALU += gpuDynInst->wavefront()->execMask().count();
+        } else if (gpuDynInst->isFlat()) {
+            if (gpuDynInst->isLocalMem()) {
+                flatLDSInsts++;
+            } else {
+                flatVMemInsts++;
+            }
+        } else if (gpuDynInst->isLocalMem()) {
+            ldsNoFlatInsts++;
+        } else if (gpuDynInst->isLoad()) {
+            vectorMemReads++;
+        } else if (gpuDynInst->isStore()) {
+            vectorMemWrites++;
+        }
+    }
+}
+
 void
 ComputeUnit::updatePageDivergenceDist(Addr addr)
 {
-- 
cgit v1.2.3