summaryrefslogtreecommitdiff
path: root/src/gpu-compute/shader.hh
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
commit1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree867510a147cd095f19499d26b7c02d27de4cae9d /src/gpu-compute/shader.hh
parent28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
downloadgem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/gpu-compute/shader.hh')
-rw-r--r--src/gpu-compute/shader.hh212
1 files changed, 212 insertions, 0 deletions
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
new file mode 100644
index 000000000..91ea8aae0
--- /dev/null
+++ b/src/gpu-compute/shader.hh
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Steve Reinhardt
+ */
+
+#ifndef __SHADER_HH__
+#define __SHADER_HH__
+
+#include <functional>
+#include <string>
+
+#include "arch/isa.hh"
+#include "arch/isa_traits.hh"
+#include "base/types.hh"
+#include "cpu/simple/atomic.hh"
+#include "cpu/simple/timing.hh"
+#include "cpu/simple_thread.hh"
+#include "cpu/thread_context.hh"
+#include "cpu/thread_state.hh"
+#include "enums/MemOpType.hh"
+#include "enums/MemType.hh"
+#include "gpu-compute/compute_unit.hh"
+#include "gpu-compute/gpu_tlb.hh"
+#include "gpu-compute/lds_state.hh"
+#include "gpu-compute/qstruct.hh"
+#include "mem/page_table.hh"
+#include "mem/port.hh"
+#include "mem/request.hh"
+#include "params/Shader.hh"
+#include "sim/faults.hh"
+#include "sim/process.hh"
+#include "sim/sim_object.hh"
+
+class BaseTLB;
+class GpuDispatcher;
+
+namespace TheISA
+{
+ class GpuTLB;
+}
+
+static const int LDS_SIZE = 65536;
+
+// Class Shader: This describes a single shader instance. Most
+// configurations will only have a single shader.
+
+class Shader : public SimObject
+{
+ protected:
+ // Shader's clock period in terms of number of ticks of curTime,
+ // aka global simulation clock
+ Tick clock;
+
+ public:
+ typedef ShaderParams Params;
+ enum hsail_mode_e {SIMT,VECTOR_SCALAR};
+
+ // clock related functions ; maps to-and-from
+ // Simulation ticks and shader clocks.
+ Tick frequency() const { return SimClock::Frequency / clock; }
+
+ Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
+
+ Tick getClock() const { return clock; }
+ Tick curCycle() const { return curTick() / clock; }
+ Tick tickToCycles(Tick val) const { return val / clock;}
+
+
+ SimpleThread *cpuThread;
+ ThreadContext *gpuTc;
+ BaseCPU *cpuPointer;
+
+ class TickEvent : public Event
+ {
+ private:
+ Shader *shader;
+
+ public:
+ TickEvent(Shader*);
+ void process();
+ const char* description() const;
+ };
+
+ TickEvent tickEvent;
+
+ // is this simulation going to be timing mode in the memory?
+ bool timingSim;
+ hsail_mode_e hsail_mode;
+
+ // If set, issue acq packet @ kernel launch
+ int impl_kern_boundary_sync;
+ // If set, generate a separate packet for acquire/release on
+ // ld_acquire/st_release/atomic operations
+ int separate_acquire_release;
+ // If set, fetch returns may be coissued with instructions
+ int coissue_return;
+ // If set, always dump all 64 gprs to trace
+ int trace_vgpr_all;
+ // Number of cu units in the shader
+ int n_cu;
+ // Number of wavefront slots per cu
+ int n_wf;
+ // The size of global memory
+ int globalMemSize;
+
+ /*
+ * Bytes/work-item for call instruction
+ * The number of arguments for an hsail function will
+ * vary. We simply determine the maximum # of arguments
+ * required by any hsail function up front before the
+ * simulation (during parsing of the Brig) and record
+ * that number here.
+ */
+ int funcargs_size;
+
+ // Tracks CU that rr dispatcher should attempt scheduling
+ int nextSchedCu;
+
+ // Size of scheduled add queue
+ uint32_t sa_n;
+
+ // Pointer to value to be increments
+ std::vector<uint32_t*> sa_val;
+ // When to do the increment
+ std::vector<uint64_t> sa_when;
+ // Amount to increment by
+ std::vector<int32_t> sa_x;
+
+ // List of Compute Units (CU's)
+ std::vector<ComputeUnit*> cuList;
+
+ uint64_t tick_cnt;
+ uint64_t box_tick_cnt;
+ uint64_t start_tick_cnt;
+
+ GpuDispatcher *dispatcher;
+
+ Shader(const Params *p);
+ ~Shader();
+ virtual void init();
+
+ // Run shader
+ void exec();
+
+ // Check to see if shader is busy
+ bool busy();
+
+ // Schedule a 32-bit value to be incremented some time in the future
+ void ScheduleAdd(uint32_t *val, Tick when, int x);
+ bool processTimingPacket(PacketPtr pkt);
+
+ void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
+ MemCmd cmd, bool suppress_func_errors);
+
+ void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
+
+ void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
+ bool suppress_func_errors);
+
+ void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
+
+ void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
+ bool suppress_func_errors);
+
+ void doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
+ bool suppress_func_errors, int cu_id);
+
+ void
+ registerCU(int cu_id, ComputeUnit *compute_unit)
+ {
+ cuList[cu_id] = compute_unit;
+ }
+
+ void handshake(GpuDispatcher *dispatcher);
+ bool dispatch_workgroups(NDRange *ndr);
+ Addr mmap(int length);
+ void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
+ void updateThreadContext(int tid);
+ void hostWakeUp(BaseCPU *cpu);
+};
+
+#endif // __SHADER_HH__