summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 13:57:50 -0500
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 13:57:50 -0500
commitd658b6e1cc22de852fef611e28f448257acc298a (patch)
tree9d4ab8f7531647eb7df619c77e8ddb2ae1022bcf
parent34fb6b5e35db751f310aee824046107e57a0ba03 (diff)
downloadgem5-d658b6e1cc22de852fef611e28f448257acc298a.tar.xz
* * *
mem: support for gpu-style RMWs in ruby This patch adds support for GPU-style read-modify-write (RMW) operations in ruby. Such atomic operations are traditionally executed at the memory controller (instead of through an L1 cache using cache-line locking). Currently, this patch works by propogating operation functors through the memory system.
-rw-r--r--src/base/types.hh13
-rw-r--r--src/mem/abstract_mem.cc69
-rw-r--r--src/mem/packet.hh6
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm7
-rw-r--r--src/mem/protocol/RubySlicc_Types.sm1
-rw-r--r--src/mem/request.hh69
6 files changed, 124 insertions, 41 deletions
diff --git a/src/base/types.hh b/src/base/types.hh
index bc5c715ce..7b115901a 100644
--- a/src/base/types.hh
+++ b/src/base/types.hh
@@ -200,6 +200,19 @@ typedef std::shared_ptr<FaultBase> Fault;
constexpr decltype(nullptr) NoFault = nullptr;
#endif
+struct AtomicOpFunctor
+{
+ virtual void operator()(uint8_t *p) = 0;
+ virtual ~AtomicOpFunctor() {}
+};
+
+template <class T>
+struct TypedAtomicOpFunctor : public AtomicOpFunctor
+{
+ void operator()(uint8_t *p) { execute((T *)p); }
+ virtual void execute(T * p) = 0;
+};
+
enum ByteOrder {
BigEndianByteOrder,
LittleEndianByteOrder
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index 0835d3fdf..cbe360779 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -341,39 +341,46 @@ AbstractMemory::access(PacketPtr pkt)
uint8_t *hostAddr = pmemAddr + pkt->getAddr() - range.start();
if (pkt->cmd == MemCmd::SwapReq) {
- std::vector<uint8_t> overwrite_val(pkt->getSize());
- uint64_t condition_val64;
- uint32_t condition_val32;
-
- if (!pmemAddr)
- panic("Swap only works if there is real memory (i.e. null=False)");
-
- bool overwrite_mem = true;
- // keep a copy of our possible write value, and copy what is at the
- // memory address into the packet
- std::memcpy(&overwrite_val[0], pkt->getConstPtr<uint8_t>(),
- pkt->getSize());
- std::memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
-
- if (pkt->req->isCondSwap()) {
- if (pkt->getSize() == sizeof(uint64_t)) {
- condition_val64 = pkt->req->getExtraData();
- overwrite_mem = !std::memcmp(&condition_val64, hostAddr,
- sizeof(uint64_t));
- } else if (pkt->getSize() == sizeof(uint32_t)) {
- condition_val32 = (uint32_t)pkt->req->getExtraData();
- overwrite_mem = !std::memcmp(&condition_val32, hostAddr,
- sizeof(uint32_t));
- } else
- panic("Invalid size for conditional read/write\n");
- }
+ if (pkt->isAtomicOp()) {
+ if (pmemAddr) {
+ memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
+ (*(pkt->getAtomicOp()))(hostAddr);
+ }
+ } else {
+ std::vector<uint8_t> overwrite_val(pkt->getSize());
+ uint64_t condition_val64;
+ uint32_t condition_val32;
+
+ if (!pmemAddr)
+ panic("Swap only works if there is real memory (i.e. null=False)");
+
+ bool overwrite_mem = true;
+ // keep a copy of our possible write value, and copy what is at the
+ // memory address into the packet
+ std::memcpy(&overwrite_val[0], pkt->getConstPtr<uint8_t>(),
+ pkt->getSize());
+ std::memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
+
+ if (pkt->req->isCondSwap()) {
+ if (pkt->getSize() == sizeof(uint64_t)) {
+ condition_val64 = pkt->req->getExtraData();
+ overwrite_mem = !std::memcmp(&condition_val64, hostAddr,
+ sizeof(uint64_t));
+ } else if (pkt->getSize() == sizeof(uint32_t)) {
+ condition_val32 = (uint32_t)pkt->req->getExtraData();
+ overwrite_mem = !std::memcmp(&condition_val32, hostAddr,
+ sizeof(uint32_t));
+ } else
+ panic("Invalid size for conditional read/write\n");
+ }
- if (overwrite_mem)
- std::memcpy(hostAddr, &overwrite_val[0], pkt->getSize());
+ if (overwrite_mem)
+ std::memcpy(hostAddr, &overwrite_val[0], pkt->getSize());
- assert(!pkt->req->isInstFetch());
- TRACE_PACKET("Read/Write");
- numOther[pkt->req->masterId()]++;
+ assert(!pkt->req->isInstFetch());
+ TRACE_PACKET("Read/Write");
+ numOther[pkt->req->masterId()]++;
+ }
} else if (pkt->isRead()) {
assert(!pkt->isWrite());
if (pkt->isLLSC()) {
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 0e7135d73..19c7e6397 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -661,6 +661,12 @@ class Packet : public Printable
}
/**
+ * Accessor function to atomic op.
+ */
+ AtomicOpFunctor *getAtomicOp() const { return req->getAtomicOpFunctor(); }
+ bool isAtomicOp() const { return req->isAtomic(); }
+
+ /**
* It has been determined that the SC packet should successfully update
* memory. Therefore, convert this SC packet to a normal write.
*/
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 6fedfeb2d..882102923 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -56,6 +56,7 @@ bool testAndWrite(Addr addr, DataBlock datablk, Packet *pkt);
enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent") {
// Valid data
Read_Only, desc="block is Read Only (modulo functional writes)";
+ Write_Only, desc="block is Write Only";
Read_Write, desc="block is Read/Write";
// Possibly Invalid data
@@ -144,7 +145,9 @@ enumeration(TransitionResult, desc="...") {
enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
LD, desc="Load";
ST, desc="Store";
- ATOMIC, desc="Atomic Load/Store";
+ ATOMIC, desc="Atomic Load/Store -- depricated. use ATOMIC_RETURN or ATOMIC_NO_RETURN";
+ ATOMIC_RETURN, desc="Atomic Load/Store, return data";
+ ATOMIC_NO_RETURN, desc="Atomic Load/Store, do not return data";
IFETCH, desc="Instruction fetch";
IO, desc="I/O";
REPLACEMENT, desc="Replacement";
@@ -166,6 +169,8 @@ enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL
Default, desc="Replace this with access_types passed to the DMA Ruby object";
LD, desc="Load";
ST, desc="Store";
+ ATOMIC, desc="Atomic Load/Store";
+ REPLACEMENT, desc="Replacement";
FLUSH, desc="Flush request type";
NULL, desc="Invalid request type";
}
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index c7479089b..95fa1db17 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -126,6 +126,7 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
int Size, desc="size in bytes of access";
PrefetchBit Prefetch, desc="Is this a prefetch request";
int contextId, desc="this goes away but must be replace with Nilay";
+ int wfid, desc="Writethrough wavefront";
HSAScope scope, desc="HSA scope";
HSASegment segment, desc="HSA segment";
}
diff --git a/src/mem/request.hh b/src/mem/request.hh
index bb5e5d59c..0d2750a16 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -160,6 +160,11 @@ class Request
/** The request should be marked with RELEASE. */
RELEASE = 0x00040000,
+ /** The request is an atomic that returns data. */
+ ATOMIC_RETURN_OP = 0x40000000,
+ /** The request is an atomic that does not return data. */
+ ATOMIC_NO_RETURN_OP = 0x80000000,
+
/** The request should be marked with KERNEL.
* Used to indicate the synchronization associated with a GPU kernel
* launch or completion.
@@ -345,6 +350,9 @@ class Request
/** Sequence number of the instruction that creates the request */
InstSeqNum _reqInstSeqNum;
+ /** A pointer to an atomic operation */
+ AtomicOpFunctor *atomicOpFunctor;
+
public:
/**
@@ -356,7 +364,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0),
- _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0)
+ _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
+ accessDelta(0), depth(0)
{}
Request(Addr paddr, unsigned size, Flags flags, MasterID mid,
@@ -364,7 +373,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0),
- _reqInstSeqNum(seq_num), translateDelta(0), accessDelta(0), depth(0)
+ _reqInstSeqNum(seq_num), atomicOpFunctor(nullptr), translateDelta(0),
+ accessDelta(0), depth(0)
{
setPhys(paddr, size, flags, mid, curTick());
setThreadContext(cid, tid);
@@ -380,7 +390,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0),
- _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0)
+ _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
+ accessDelta(0), depth(0)
{
setPhys(paddr, size, flags, mid, curTick());
}
@@ -389,7 +400,8 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0),
- _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0)
+ _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
+ accessDelta(0), depth(0)
{
setPhys(paddr, size, flags, mid, time);
}
@@ -398,12 +410,12 @@ class Request
Addr pc)
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
- _extraData(0), _contextId(0), _threadId(0), _pc(0),
- _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0)
+ _extraData(0), _contextId(0), _threadId(0), _pc(pc),
+ _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
+ accessDelta(0), depth(0)
{
setPhys(paddr, size, flags, mid, time);
privateFlags.set(VALID_PC);
- _pc = pc;
}
Request(int asid, Addr vaddr, unsigned size, Flags flags, MasterID mid,
@@ -411,13 +423,27 @@ class Request
: _paddr(0), _size(0), _masterId(invldMasterId), _time(0),
_taskId(ContextSwitchTaskId::Unknown), _asid(0), _vaddr(0),
_extraData(0), _contextId(0), _threadId(0), _pc(0),
- _reqInstSeqNum(0), translateDelta(0), accessDelta(0), depth(0)
+ _reqInstSeqNum(0), atomicOpFunctor(nullptr), translateDelta(0),
+ accessDelta(0), depth(0)
+ {
+ setVirt(asid, vaddr, size, flags, mid, pc);
+ setThreadContext(cid, tid);
+ }
+
+ Request(int asid, Addr vaddr, int size, Flags flags, MasterID mid, Addr pc,
+ int cid, ThreadID tid, AtomicOpFunctor *atomic_op)
+ : atomicOpFunctor(atomic_op)
{
setVirt(asid, vaddr, size, flags, mid, pc);
setThreadContext(cid, tid);
}
- ~Request() {}
+ ~Request()
+ {
+ if (hasAtomicOpFunctor()) {
+ delete atomicOpFunctor;
+ }
+ }
/**
* Set up CPU and thread numbers.
@@ -541,6 +567,22 @@ class Request
return _time;
}
+ /**
+ * Accessor for atomic-op functor.
+ */
+ bool
+ hasAtomicOpFunctor()
+ {
+ return atomicOpFunctor != NULL;
+ }
+
+ AtomicOpFunctor *
+ getAtomicOpFunctor()
+ {
+ assert(atomicOpFunctor != NULL);
+ return atomicOpFunctor;
+ }
+
/** Accessor for flags. */
Flags
getFlags()
@@ -749,6 +791,15 @@ class Request
bool isAcquire() const { return _flags.isSet(ACQUIRE); }
bool isRelease() const { return _flags.isSet(RELEASE); }
bool isKernel() const { return _flags.isSet(KERNEL); }
+ bool isAtomicReturn() const { return _flags.isSet(ATOMIC_RETURN_OP); }
+ bool isAtomicNoReturn() const { return _flags.isSet(ATOMIC_NO_RETURN_OP); }
+
+ bool
+ isAtomic() const
+ {
+ return _flags.isSet(ATOMIC_RETURN_OP) ||
+ _flags.isSet(ATOMIC_NO_RETURN_OP);
+ }
/**
* Accessor functions for the memory space configuration flags and used by