From 1a7d3f9fcb76a68540dd948f91413533a383bfde Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Tue, 19 Jan 2016 14:28:22 -0500 Subject: gpu-compute: AMD's baseline GPU model --- src/gpu-compute/qstruct.hh | 201 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 src/gpu-compute/qstruct.hh (limited to 'src/gpu-compute/qstruct.hh') diff --git a/src/gpu-compute/qstruct.hh b/src/gpu-compute/qstruct.hh new file mode 100644 index 000000000..092303c00 --- /dev/null +++ b/src/gpu-compute/qstruct.hh @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Brad Beckmann, Marc Orr + */ + +#ifndef __Q_STRUCT_HH__ +#define __Q_STRUCT_HH__ + +#include +#include + +// Maximum number of arguments +static const int KER_NUM_ARGS = 32; +// Kernel argument buffer size +static const int KER_ARGS_LENGTH = 512; + +class LdsChunk; +struct NDRange; + +// Be very careful of alignment in this structure. The structure +// must compile to the same layout in both 32-bit and 64-bit mode. +struct HsaQueueEntry +{ + // Base pointer for array of instruction pointers + uint64_t code_ptr; + // Grid Size (3 dimensions) + uint32_t gdSize[3]; + // Workgroup Size (3 dimensions) + uint32_t wgSize[3]; + uint16_t sRegCount; + uint16_t dRegCount; + uint16_t cRegCount; + uint64_t privMemStart; + uint32_t privMemPerItem; + uint32_t privMemTotal; + uint64_t spillMemStart; + uint32_t spillMemPerItem; + uint32_t spillMemTotal; + uint64_t roMemStart; + uint32_t roMemTotal; + // Size (in bytes) of LDS + uint32_t ldsSize; + // Virtual Memory Id (unused right now) + uint32_t vmId; + + // Pointer to dependency chain (unused now) + uint64_t depends; + + // pointer to bool + uint64_t addrToNotify; + // pointer to uint32_t + uint64_t numDispLeft; + + // variables to pass arguments when running in standalone mode, + // will be removed when run.py and sh.cpp have been updated to + // use args and offset arrays + uint64_t arg1; + uint64_t arg2; + uint64_t arg3; + uint64_t arg4; + + // variables to pass arguments when running in cpu+gpu mode + uint8_t args[KER_ARGS_LENGTH]; + uint16_t offsets[KER_NUM_ARGS]; + uint16_t num_args; +}; + +// State used to start (or restart) a WF +struct WFContext +{ + // 32 bit values + // barrier state + int bar_cnt[VSZ]; + + // id (which WF in the WG) + int cnt; + + // more barrier state + int max_bar_cnt; + int old_barrier_cnt; + int barrier_cnt; + + // More Program Counter Stuff + uint32_t pc; + + // Program counter of the immediate post-dominator instruction + uint32_t rpc; + + // WG wide state (I don't see how to avoid redundancy here) + int cu_id; + uint32_t wg_id; + uint32_t barrier_id; + + // 64 bit values (these values depend on the wavefront size) + // masks + uint64_t init_mask; + uint64_t exec_mask; + + // private memory; + Addr privBase; + Addr spillBase; + + LdsChunk *ldsChunk; + + /* + * Kernel wide state + * This is a hack. This state should be moved through simulated memory + * during a yield. Though not much is being used here, so it's probably + * probably not a big deal. + * + * Just to add to this comment... The ndr is derived from simulated + * memory when the cl-runtime allocates an HsaQueueEntry and populates it + * for a kernel launch. So in theory the runtime should be able to keep + * that state around. Then a WF can reference it upon restart to derive + * kernel wide state. The runtime can deallocate the state when the + * kernel completes. + */ + NDRange *ndr; +}; + +// State that needs to be passed between the simulation and simulated app, a +// pointer to this struct can be passed through the depends field in the +// HsaQueueEntry struct +struct HostState +{ + // cl_event* has original HsaQueueEntry for init + uint64_t event; +}; + +// Total number of HSA queues +static const int HSAQ_NQUEUES = 8; + +// These values will eventually live in memory mapped registers +// and be settable by the kernel mode driver. + +// Number of entries in each HSA queue +static const int HSAQ_SIZE = 64; +// Address of first HSA queue index +static const int HSAQ_INDX_BASE = 0x10000ll; +// Address of first HSA queue +static const int HSAQ_BASE = 0x11000ll; +// Suggested start of HSA code +static const int HSA_CODE_BASE = 0x18000ll; + +// These are shortcuts for deriving the address of a specific +// HSA queue or queue index +#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n) +#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue)) +#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0)) +#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1)) +#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2)) + +/* + * Example code for writing to a queue + * + * void + * ToQueue(int n,struct fsaQueue *val) + * { + * int wi = *(int*)HSAQ_WI(n); + * int ri = *(int*)HSAQ_RI(n); + * int ci = *(int*)HSAQ_CI(n); + * + * if (ci - ri < HSAQ_SIZE) { + * (*(int*)HSAQ_CI(n))++; + * *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val; + * (*(int*)HSAQ_WI(n))++; + * } + * } + */ + +#endif // __Q_STRUCT_HH__ -- cgit v1.2.3