summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlake Hechtman <blake.hechtman@amd.com>2015-07-20 09:15:18 -0500
committerBlake Hechtman <blake.hechtman@amd.com>2015-07-20 09:15:18 -0500
commit34fb6b5e35db751f310aee824046107e57a0ba03 (patch)
tree4f07b86c4d50f0431a8451406026a693ccbb1e39
parentb7ea2bc705bfae2e7719d6259cc14de95f4f991d (diff)
downloadgem5-34fb6b5e35db751f310aee824046107e57a0ba03.tar.xz
mem: misc flags for AMD gpu model
This patch add support to mark memory requests/packets with attributes defined in HSA, such as memory order and scope.
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm25
-rw-r--r--src/mem/protocol/RubySlicc_Types.sm2
-rw-r--r--src/mem/request.hh131
-rw-r--r--src/mem/ruby/common/DataBlock.hh1
-rw-r--r--src/mem/ruby/slicc_interface/RubyRequest.hh70
-rw-r--r--src/mem/ruby/system/RubyPort.cc51
6 files changed, 252 insertions, 28 deletions
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 617989d15..6fedfeb2d 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -41,7 +41,7 @@ external_type(Tick, primitive="yes", default="0");
structure(DataBlock, external = "yes", desc="..."){
void clear();
- void copyPartial(DataBlock, int, int);
+ void atomicPartial(DataBlock, WriteMask);
}
bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
@@ -78,6 +78,26 @@ enumeration(AccessPermission, desc="...", default="AccessPermission_NotPresent")
NotPresent, desc="block is NotPresent";
Busy, desc="block is in a transient state, currently invalid";
}
+//HSA scopes
+enumeration(HSAScope, desc="...", default="HSAScope_UNSPECIFIED") {
+ UNSPECIFIED, desc="Unspecified scope";
+ NOSCOPE, desc="Explictly unscoped";
+ WAVEFRONT, desc="Wavefront scope";
+ WORKGROUP, desc="Workgroup scope";
+ DEVICE, desc="Device scope";
+ SYSTEM, desc="System scope";
+}
+
+// HSA segment types
+enumeration(HSASegment, desc="...", default="HSASegment_GLOBAL") {
+ GLOBAL, desc="Global segment";
+ GROUP, desc="Group segment";
+ PRIVATE, desc="Private segment";
+ KERNARG, desc="Kernarg segment";
+ READONLY, desc="Readonly segment";
+ SPILL, desc="Spill segment";
+ ARG, desc="Arg segment";
+}
// TesterStatus
enumeration(TesterStatus, desc="...") {
@@ -143,9 +163,10 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
}
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
- Default, desc="Replace this with access_types passed to the DMA Ruby object";
+ Default, desc="Replace this with access_types passed to the DMA Ruby object";
LD, desc="Load";
ST, desc="Store";
+ FLUSH, desc="Flush request type";
NULL, desc="Invalid request type";
}
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index 8e846098c..c7479089b 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -126,6 +126,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
int Size, desc="size in bytes of access";
PrefetchBit Prefetch, desc="Is this a prefetch request";
int contextId, desc="this goes away but must be replace with Nilay";
+ HSAScope scope, desc="HSA scope";
+ HSASegment segment, desc="HSA segment";
}
structure(AbstractEntry, primitive="yes", external = "yes") {
diff --git a/src/mem/request.hh b/src/mem/request.hh
index de781f5d6..bb5e5d59c 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -160,6 +160,12 @@ class Request
/** The request should be marked with RELEASE. */
RELEASE = 0x00040000,
+ /** The request should be marked with KERNEL.
+ * Used to indicate the synchronization associated with a GPU kernel
+ * launch or completion.
+ */
+ KERNEL = 0x00001000,
+
/**
* The request should be handled by the generic IPR code (only
* valid together with MMAPPED_IPR)
@@ -198,6 +204,37 @@ class Request
};
/** @} */
+ typedef uint32_t MemSpaceConfigFlagsType;
+ typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
+
+ enum : MemSpaceConfigFlagsType {
+ /** Has a synchronization scope been set? */
+ SCOPE_VALID = 0x00000001,
+ /** Access has Wavefront scope visibility */
+ WAVEFRONT_SCOPE = 0x00000002,
+ /** Access has Workgroup scope visibility */
+ WORKGROUP_SCOPE = 0x00000004,
+ /** Access has Device (e.g., GPU) scope visibility */
+ DEVICE_SCOPE = 0x00000008,
+ /** Access has System (e.g., CPU + GPU) scope visibility */
+ SYSTEM_SCOPE = 0x00000010,
+
+ /** Global Segment */
+ GLOBAL_SEGMENT = 0x00000020,
+ /** Group Segment */
+ GROUP_SEGMENT = 0x00000040,
+ /** Private Segment */
+ PRIVATE_SEGMENT = 0x00000080,
+ /** Kergarg Segment */
+ KERNARG_SEGMENT = 0x00000100,
+ /** Readonly Segment */
+ READONLY_SEGMENT = 0x00000200,
+ /** Spill Segment */
+ SPILL_SEGMENT = 0x00000400,
+ /** Arg Segment */
+ ARG_SEGMENT = 0x00000800,
+ };
+
private:
typedef uint8_t PrivateFlagsType;
typedef ::Flags<PrivateFlagsType> PrivateFlags;
@@ -268,6 +305,9 @@ class Request
/** Flag structure for the request. */
Flags _flags;
+ /** Memory space configuraiton flag structure for the request. */
+ MemSpaceConfigFlags _memSpaceConfigFlags;
+
/** Private flags for field validity checking. */
PrivateFlags privateFlags;
@@ -520,6 +560,13 @@ class Request
_flags.set(flags);
}
+ void
+ setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+ {
+ assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
+ _memSpaceConfigFlags.set(extraFlags);
+ }
+
/** Accessor function for vaddr.*/
bool
hasVaddr() const
@@ -685,7 +732,7 @@ class Request
_reqInstSeqNum = seq_num;
}
- /** Accessor functions for flags. Note that these are for testing
+ /** Accessor functions for flags. Note that these are for testing
only; setting flags should be done via setFlags(). */
bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
@@ -701,6 +748,88 @@ class Request
bool isPTWalk() const { return _flags.isSet(PT_WALK); }
bool isAcquire() const { return _flags.isSet(ACQUIRE); }
bool isRelease() const { return _flags.isSet(RELEASE); }
+ bool isKernel() const { return _flags.isSet(KERNEL); }
+
+ /**
+ * Accessor functions for the memory space configuration flags and used by
+ * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
+ * these are for testing only; setting extraFlags should be done via
+ * setMemSpaceConfigFlags().
+ */
+ bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
+
+ bool
+ isWavefrontScope() const
+ {
+ assert(isScoped());
+ return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
+ }
+
+ bool
+ isWorkgroupScope() const
+ {
+ assert(isScoped());
+ return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
+ }
+
+ bool
+ isDeviceScope() const
+ {
+ assert(isScoped());
+ return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
+ }
+
+ bool
+ isSystemScope() const
+ {
+ assert(isScoped());
+ return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
+ }
+
+ bool
+ isGlobalSegment() const
+ {
+ return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
+ (!isGroupSegment() && !isPrivateSegment() &&
+ !isKernargSegment() && !isReadonlySegment() &&
+ !isSpillSegment() && !isArgSegment());
+ }
+
+ bool
+ isGroupSegment() const
+ {
+ return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
+ }
+
+ bool
+ isPrivateSegment() const
+ {
+ return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
+ }
+
+ bool
+ isKernargSegment() const
+ {
+ return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
+ }
+
+ bool
+ isReadonlySegment() const
+ {
+ return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
+ }
+
+ bool
+ isSpillSegment() const
+ {
+ return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
+ }
+
+ bool
+ isArgSegment() const
+ {
+ return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
+ }
};
#endif // __MEM_REQUEST_HH__
diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh
index ac08fac82..49ce3624a 100644
--- a/src/mem/ruby/common/DataBlock.hh
+++ b/src/mem/ruby/common/DataBlock.hh
@@ -60,7 +60,6 @@ class DataBlock
const uint8_t *getData(int offset, int len) const;
void setByte(int whichByte, uint8_t data);
void setData(const uint8_t *data, int offset, int len);
- void copyPartial(const DataBlock & dblk, int offset, int len);
bool equal(const DataBlock& obj) const;
void print(std::ostream& out) const;
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index b17269a78..73f214a20 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -30,12 +30,16 @@
#define __MEM_RUBY_SLICC_INTERFACE_RUBY_REQUEST_HH__
#include <ostream>
+#include <vector>
+#include "mem/protocol/HSAScope.hh"
+#include "mem/protocol/HSASegment.hh"
#include "mem/protocol/Message.hh"
#include "mem/protocol/PrefetchBit.hh"
#include "mem/protocol/RubyAccessMode.hh"
#include "mem/protocol/RubyRequestType.hh"
#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/DataBlock.hh"
class RubyRequest : public Message
{
@@ -50,11 +54,41 @@ class RubyRequest : public Message
uint8_t* data;
PacketPtr pkt;
ContextID m_contextId;
+ int m_wfid;
+ HSAScope m_scope;
+ HSASegment m_segment;
+
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
- ContextID _proc_id = 100)
+ ContextID _proc_id = 100, ContextID _core_id = 99,
+ HSAScope _scope = HSAScope_UNSPECIFIED,
+ HSASegment _segment = HSASegment_GLOBAL)
+ : Message(curTime),
+ m_PhysicalAddress(_paddr),
+ m_Type(_type),
+ m_ProgramCounter(_pc),
+ m_AccessMode(_access_mode),
+ m_Size(_len),
+ m_Prefetch(_pb),
+ data(_data),
+ pkt(_pkt),
+ m_contextId(_core_id),
+ m_scope(_scope),
+ m_segment(_segment)
+ {
+ m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ }
+
+ RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
+ uint64_t _pc, RubyRequestType _type,
+ RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
+ unsigned _proc_id, unsigned _core_id,
+ int _wm_size, std::vector<bool> & _wm_mask,
+ DataBlock & _Data,
+ HSAScope _scope = HSAScope_UNSPECIFIED,
+ HSASegment _segment = HSASegment_GLOBAL)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -64,11 +98,41 @@ class RubyRequest : public Message
m_Prefetch(_pb),
data(_data),
pkt(_pkt),
- m_contextId(_proc_id)
+ m_contextId(_core_id),
+ m_wfid(_proc_id),
+ m_scope(_scope),
+ m_segment(_segment)
{
- m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
+ RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
+ uint64_t _pc, RubyRequestType _type,
+ RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
+ unsigned _proc_id, unsigned _core_id,
+ int _wm_size, std::vector<bool> & _wm_mask,
+ DataBlock & _Data,
+ std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
+ HSAScope _scope = HSAScope_UNSPECIFIED,
+ HSASegment _segment = HSASegment_GLOBAL)
+ : Message(curTime),
+ m_PhysicalAddress(_paddr),
+ m_Type(_type),
+ m_ProgramCounter(_pc),
+ m_AccessMode(_access_mode),
+ m_Size(_len),
+ m_Prefetch(_pb),
+ data(_data),
+ pkt(_pkt),
+ m_contextId(_core_id),
+ m_wfid(_proc_id),
+ m_scope(_scope),
+ m_segment(_segment)
+ {
+ m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ }
+
+
RubyRequest(Tick curTime) : Message(curTime) {}
MsgPtr clone() const
{ return std::shared_ptr<Message>(new RubyRequest(*this)); }
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 52acaf8c3..5a5f528bb 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -237,25 +237,27 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
// Check for pio requests and directly send them to the dedicated
// pio port.
- if (!isPhysMemAddress(pkt->getAddr())) {
- assert(ruby_port->memMasterPort.isConnected());
- DPRINTF(RubyPort, "Request address %#x assumed to be a pio address\n",
- pkt->getAddr());
-
- // Save the port in the sender state object to be used later to
- // route the response
- pkt->pushSenderState(new SenderState(this));
+ if (pkt->cmd != MemCmd::MemFenceReq) {
+ if (!isPhysMemAddress(pkt->getAddr())) {
+ assert(ruby_port->memMasterPort.isConnected());
+ DPRINTF(RubyPort, "Request address %#x assumed to be a "
+ "pio address\n", pkt->getAddr());
+
+ // Save the port in the sender state object to be used later to
+ // route the response
+ pkt->pushSenderState(new SenderState(this));
+
+ // send next cycle
+ RubySystem *rs = ruby_port->m_ruby_system;
+ ruby_port->memMasterPort.schedTimingReq(pkt,
+ curTick() + rs->clockPeriod());
+ return true;
+ }
- // send next cycle
- RubySystem *rs = ruby_port->m_ruby_system;
- ruby_port->memMasterPort.schedTimingReq(pkt,
- curTick() + rs->clockPeriod());
- return true;
+ assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
+ RubySystem::getBlockSizeBytes());
}
- assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
- RubySystem::getBlockSizeBytes());
-
// Submit the ruby request
RequestStatus requestStatus = ruby_port->makeRequest(pkt);
@@ -272,9 +274,11 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
return true;
}
-
- DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n",
- pkt->getAddr(), RequestStatus_to_string(requestStatus));
+ if (pkt->cmd != MemCmd::MemFenceReq) {
+ DPRINTF(RubyPort,
+ "Request for address %#x did not issued because %s\n",
+ pkt->getAddr(), RequestStatus_to_string(requestStatus));
+ }
addToRetryList();
@@ -466,9 +470,14 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
}
}
- // Flush requests don't access physical memory
- if (pkt->isFlush()) {
+ // Flush, acquire, release requests don't access physical memory
+ if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
+ accessPhysMem = false;
+ }
+
+ if (pkt->req->isKernel()) {
accessPhysMem = false;
+ needsResponse = true;
}
DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);