1 files changed, 341 insertions, 0 deletions
diff --git a/src/gpu-compute/lds_state.cc b/src/gpu-compute/lds_state.cc
new file mode 100644
index 000000000..91ee8009a
--- /dev/null
+++ b/src/gpu-compute/lds_state.cc
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: John Kalamatianos, Joe Gross
+ */
+
+#include "gpu-compute/lds_state.hh"
+
+#include <array>
+#include <cstdio>
+#include <cstdlib>
+
+#include "gpu-compute/compute_unit.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "gpu-compute/shader.hh"
+
+/**
+ * the default constructor that works with SWIG
+ */
+LdsState::LdsState(const Params *params) :
+    MemObject(params),
+    tickEvent(this),
+    cuPort(name() + ".port", this),
+    maximumSize(params->size),
+    range(params->range),
+    bankConflictPenalty(params->bankConflictPenalty),
+    banks(params->banks)
+{
+    fatal_if(params->banks <= 0,
+             "Number of LDS banks should be positive number");
+    fatal_if((params->banks & (params->banks - 1)) != 0,
+             "Number of LDS banks should be a power of 2");
+    fatal_if(params->size <= 0,
+             "cannot allocate an LDS with a size less than 1");
+    fatal_if(params->size % 2,
+          "the LDS should be an even number");
+}
+
+/**
+ * Needed by the SWIG compiler
+ */
+LdsState *
+LdsStateParams::create()
+{
+    return new LdsState(this);
+}
+
+/**
+ * set the parent and name based on the parent
+ */
+void
+LdsState::setParent(ComputeUnit *x_parent)
+{
+    // check that this gets assigned to the same thing each time
+    fatal_if(!x_parent, "x_parent should not be nullptr");
+    fatal_if(x_parent == parent,
+             "should not be setting the parent twice");
+
+    parent = x_parent;
+    _name = x_parent->name() + ".LdsState";
+}
+
+/**
+ * derive the gpu mem packet from the packet and then count the bank conflicts
+ */
+unsigned
+LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
+{
+    Packet::SenderState *baseSenderState = packet->senderState;
+    while (baseSenderState->predecessor) {
+        baseSenderState = baseSenderState->predecessor;
+    }
+    const ComputeUnit::LDSPort::SenderState *senderState =
+            dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
+
+    fatal_if(!senderState,
+             "did not get the right sort of sender state");
+
+    GPUDynInstPtr gpuDynInst = senderState->getMemInst();
+
+    return countBankConflicts(gpuDynInst, bankAccesses);
+}
+
+// Count the total number of bank conflicts for the local memory packet
+unsigned
+LdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
+                             unsigned *numBankAccesses)
+{
+    int bank_conflicts = 0;
+    std::vector<int> bank;
+    // the number of LDS banks being touched by the memory instruction
+    int numBanks = std::min(parent->wfSize(), banks);
+    // if the wavefront size is larger than the number of LDS banks, we
+    // need to iterate over all work items to calculate the total
+    // number of bank conflicts
+    int groups = (parent->wfSize() > numBanks) ?
+        (parent->wfSize() / numBanks) : 1;
+    for (int i = 0; i < groups; i++) {
+        // Address Array holding all the work item addresses of an instruction
+        std::vector<Addr> addr_array;
+        addr_array.resize(numBanks, 0);
+        bank.clear();
+        bank.resize(banks, 0);
+        int max_bank = 0;
+
+        // populate the address array for all active work items
+        for (int j = 0; j < numBanks; j++) {
+            if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
+                addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
+            } else {
+                addr_array[j] = std::numeric_limits<Addr>::max();
+            }
+        }
+
+        if (gpuDynInst->m_op == Enums::MO_LD ||
+            gpuDynInst->m_op == Enums::MO_ST) {
+            // mask identical addresses
+            for (int j = 0; j < numBanks; ++j) {
+                for (int j0 = 0; j0 < j; j0++) {
+                    if (addr_array[j] != std::numeric_limits<Addr>::max()
+                                    && addr_array[j] == addr_array[j0]) {
+                        addr_array[j] = std::numeric_limits<Addr>::max();
+                    }
+                }
+            }
+        }
+        // calculate bank conflicts
+        for (int j = 0; j < numBanks; ++j) {
+            if (addr_array[j] != std::numeric_limits<Addr>::max()) {
+                int bankId = addr_array[j] % banks;
+                bank[bankId]++;
+                max_bank = std::max(max_bank, bank[bankId]);
+                // Count the number of LDS banks accessed.
+                // Since we have masked identical addresses all remaining
+                // accesses will need to be serialized if they access
+                // the same bank (bank conflict).
+                (*numBankAccesses)++;
+            }
+        }
+        bank_conflicts += max_bank;
+    }
+    panic_if(bank_conflicts > parent->wfSize(),
+             "Max bank conflicts should match num of work items per instr");
+    return bank_conflicts;
+}
+
+/**
+ * receive the packet from the CU
+ */
+bool
+LdsState::CuSidePort::recvTimingReq(PacketPtr packet)
+{
+    return ownerLds->processPacket(packet);
+}
+
+GPUDynInstPtr
+LdsState::getDynInstr(PacketPtr packet)
+{
+    ComputeUnit::LDSPort::SenderState *ss =
+        dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
+                     packet->senderState);
+    return ss->getMemInst();
+}
+
+/**
+ * process an incoming packet, add it to the return queue
+ */
+bool
+LdsState::processPacket(PacketPtr packet)
+{
+    unsigned bankAccesses = 0;
+    // the number of conflicts this packet will have when accessing the LDS
+    unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
+    // count the total number of physical LDS bank accessed
+    parent->ldsBankAccesses += bankAccesses;
+    // count the LDS bank conflicts. A number set to 1 indicates one
+    // access per bank maximum so there are no bank conflicts
+    parent->ldsBankConflictDist.sample(bankConflicts-1);
+
+    GPUDynInstPtr dynInst = getDynInstr(packet);
+    // account for the LDS bank conflict overhead
+    int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
+        (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
+        parent->loadBusLength();
+    // delay for accessing the LDS
+    Tick processingTime =
+        parent->shader->ticks(bankConflicts * bankConflictPenalty) +
+        parent->shader->ticks(busLength);
+    // choose (delay + last packet in queue) or (now + delay) as the time to
+    // return this
+    Tick doneAt = earliestReturnTime() + processingTime;
+    // then store it for processing
+    return returnQueuePush(std::make_pair(doneAt, packet));
+}
+
+/**
+ * add this to the queue of packets to be returned
+ */
+bool
+LdsState::returnQueuePush(std::pair<Tick, PacketPtr> thePair)
+{
+    // TODO add time limits (e.g. one packet per cycle) and queue size limits
+    // and implement flow control
+    returnQueue.push(thePair);
+
+    // if there is no set wakeup time, look through the queue
+    if (!tickEvent.scheduled()) {
+        process();
+    }
+
+    return true;
+}
+
+/**
+ * receive a packet in functional mode
+ */
+void
+LdsState::CuSidePort::recvFunctional(PacketPtr pkt)
+{
+    fatal("not implemented");
+}
+
+/**
+ * receive a retry for a response
+ */
+void
+LdsState::CuSidePort::recvRespRetry()
+{
+    // TODO verify that this is the right way to do this
+    assert(ownerLds->isRetryResp());
+    ownerLds->setRetryResp(false);
+    ownerLds->process();
+}
+
+/**
+ * receive a retry
+ */
+void
+LdsState::CuSidePort::recvRetry()
+{
+    fatal("not implemented");
+}
+
+/**
+ * look for packets to return at this time
+ */
+bool
+LdsState::process()
+{
+    Tick now = clockEdge();
+
+    // send back completed packets
+    while (!returnQueue.empty() && returnQueue.front().first <= now) {
+        PacketPtr packet = returnQueue.front().second;
+
+        ComputeUnit::LDSPort::SenderState *ss =
+            dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
+                            packet->senderState);
+
+        GPUDynInstPtr gpuDynInst = ss->getMemInst();
+
+        gpuDynInst->initiateAcc(gpuDynInst);
+
+        packet->makeTimingResponse();
+
+        returnQueue.pop();
+
+        bool success = cuPort.sendTimingResp(packet);
+
+        if (!success) {
+            retryResp = true;
+            panic("have not handled timing responses being NACK'd when sent"
+                            "back");
+        }
+    }
+
+    // determine the next wakeup time
+    if (!returnQueue.empty()) {
+
+        Tick next = returnQueue.front().first;
+
+        if (tickEvent.scheduled()) {
+
+            if (next < tickEvent.when()) {
+
+                tickEvent.deschedule();
+                tickEvent.schedule(next);
+            }
+        } else {
+            tickEvent.schedule(next);
+        }
+    }
+
+    return true;
+}
+
+/**
+ * wake up at this time and perform specified actions
+ */
+void
+LdsState::TickEvent::process()
+{
+    ldsState->process();
+}
+
+/**
+ *
+ */
+void
+LdsState::regStats()
+{
+}