From 48e43c9ad1cd292b494f3d05f9d13845dd1a6d1e Mon Sep 17 00:00:00 2001 From: Michael LeBeane Date: Wed, 26 Oct 2016 22:48:37 -0400 Subject: ruby: Allow multiple outstanding DMA requests DMA sequencers and protocols can currently only issue one DMA access at a time. This patch implements the necessary functionality to support multiple outstanding DMA requests in Ruby. --- src/mem/ruby/system/DMASequencer.cc | 84 +++++++++++++++++++++++++------------ src/mem/ruby/system/DMASequencer.hh | 23 ++++++---- src/mem/ruby/system/Sequencer.py | 1 + 3 files changed, 74 insertions(+), 34 deletions(-) (limited to 'src/mem/ruby/system') diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index 3b0304158..4bee19b52 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -35,8 +35,18 @@ #include "mem/ruby/system/DMASequencer.hh" #include "mem/ruby/system/RubySystem.hh" +DMARequest::DMARequest(uint64_t start_paddr, int len, bool write, + int bytes_completed, int bytes_issued, uint8_t *data, + PacketPtr pkt) + : start_paddr(start_paddr), len(len), write(write), + bytes_completed(bytes_completed), bytes_issued(bytes_issued), data(data), + pkt(pkt) +{ +} + DMASequencer::DMASequencer(const Params *p) - : RubyPort(p) + : RubyPort(p), m_outstanding_count(0), + m_max_outstanding_requests(p->max_outstanding_requests) { } @@ -44,7 +54,6 @@ void DMASequencer::init() { RubyPort::init(); - m_is_busy = false; m_data_block_mask = mask(RubySystem::getBlockSizeBits()); for (const auto &s_port : slave_ports) @@ -54,7 +63,7 @@ DMASequencer::init() RequestStatus DMASequencer::makeRequest(PacketPtr pkt) { - if (m_is_busy) { + if (m_outstanding_count == m_max_outstanding_requests) { return RequestStatus_BufferFull; } @@ -63,21 +72,29 @@ DMASequencer::makeRequest(PacketPtr pkt) int len = pkt->getSize(); bool write = pkt->isWrite(); - assert(!m_is_busy); // only support one outstanding DMA request - m_is_busy = true; + assert(m_outstanding_count < m_max_outstanding_requests); + Addr line_addr = makeLineAddress(paddr); + auto emplace_pair = + m_RequestTable.emplace(std::piecewise_construct, + std::forward_as_tuple(line_addr), + std::forward_as_tuple(paddr, len, write, 0, + 0, data, pkt)); + DMARequest& active_request = emplace_pair.first->second; + + // This is pretty conservative. A regular Sequencer with a more beefy + // request table that can track multiple requests for a cache line should + // be used if a more aggressive policy is needed. + if (!emplace_pair.second) { + DPRINTF(RubyDma, "DMA aliased: addr %p, len %d\n", line_addr, len); + return RequestStatus_Aliased; + } - active_request.start_paddr = paddr; - active_request.write = write; - active_request.data = data; - active_request.len = len; - active_request.bytes_completed = 0; - active_request.bytes_issued = 0; - active_request.pkt = pkt; + DPRINTF(RubyDma, "DMA req created: addr %p, len %d\n", line_addr, len); std::shared_ptr msg = std::make_shared(clockEdge()); msg->getPhysicalAddress() = paddr; - msg->getLineAddress() = makeLineAddress(msg->getPhysicalAddress()); + msg->getLineAddress() = line_addr; msg->getType() = write ? SequencerRequestType_ST : SequencerRequestType_LD; int offset = paddr & m_data_block_mask; @@ -90,6 +107,8 @@ DMASequencer::makeRequest(PacketPtr pkt) } } + m_outstanding_count++; + assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); active_request.bytes_issued += msg->getLen(); @@ -98,18 +117,22 @@ DMASequencer::makeRequest(PacketPtr pkt) } void -DMASequencer::issueNext() +DMASequencer::issueNext(const Addr& address) { - assert(m_is_busy); + RequestTable::iterator i = m_RequestTable.find(address); + assert(i != m_RequestTable.end()); + + DMARequest &active_request = i->second; + + assert(m_outstanding_count <= m_max_outstanding_requests); active_request.bytes_completed = active_request.bytes_issued; if (active_request.len == active_request.bytes_completed) { - // - // Must unset the busy flag before calling back the dma port because - // the callback may cause a previously nacked request to be reissued - // - DPRINTF(RubyDma, "DMA request completed\n"); - m_is_busy = false; - ruby_hit_callback(active_request.pkt); + DPRINTF(RubyDma, "DMA request completed: addr %p, size %d\n", + address, active_request.len); + m_outstanding_count--; + PacketPtr pkt = active_request.pkt; + m_RequestTable.erase(i); + ruby_hit_callback(pkt); return; } @@ -146,9 +169,13 @@ DMASequencer::issueNext() } void -DMASequencer::dataCallback(const DataBlock & dblk) +DMASequencer::dataCallback(const DataBlock & dblk, const Addr& address) { - assert(m_is_busy); + + RequestTable::iterator i = m_RequestTable.find(address); + assert(i != m_RequestTable.end()); + + DMARequest &active_request = i->second; int len = active_request.bytes_issued - active_request.bytes_completed; int offset = 0; if (active_request.bytes_completed == 0) @@ -158,13 +185,16 @@ DMASequencer::dataCallback(const DataBlock & dblk) memcpy(&active_request.data[active_request.bytes_completed], dblk.getData(offset, len), len); } - issueNext(); + issueNext(address); } void -DMASequencer::ackCallback() +DMASequencer::ackCallback(const Addr& address) { - issueNext(); + RequestTable::iterator i = m_RequestTable.find(address); + assert(i != m_RequestTable.end()); + + issueNext(address); } void diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh index 3b408e5ac..9f1f4e503 100644 --- a/src/mem/ruby/system/DMASequencer.hh +++ b/src/mem/ruby/system/DMASequencer.hh @@ -31,14 +31,19 @@ #include #include +#include #include "mem/protocol/DMASequencerRequestType.hh" +#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/DataBlock.hh" #include "mem/ruby/system/RubyPort.hh" #include "params/DMASequencer.hh" struct DMARequest { + DMARequest(uint64_t start_paddr, int len, bool write, int bytes_completed, + int bytes_issued, uint8_t *data, PacketPtr pkt); + uint64_t start_paddr; int len; bool write; @@ -57,23 +62,27 @@ class DMASequencer : public RubyPort /* external interface */ RequestStatus makeRequest(PacketPtr pkt) override; - bool busy() { return m_is_busy;} - int outstandingCount() const override { return (m_is_busy ? 1 : 0); } + bool busy() { return m_outstanding_count > 0; } + int outstandingCount() const override { return m_outstanding_count; } bool isDeadlockEventScheduled() const override { return false; } void descheduleDeadlockEvent() override {} /* SLICC callback */ - void dataCallback(const DataBlock & dblk); - void ackCallback(); + void dataCallback(const DataBlock &dblk, const Addr &addr); + void ackCallback(const Addr &addr); void recordRequestType(DMASequencerRequestType requestType); private: - void issueNext(); + void issueNext(const Addr &addr); - bool m_is_busy; uint64_t m_data_block_mask; - DMARequest active_request; + + typedef std::unordered_map RequestTable; + RequestTable m_RequestTable; + + int m_outstanding_count; + int m_max_outstanding_requests; }; #endif // __MEM_RUBY_SYSTEM_DMASEQUENCER_HH__ diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index ed142e914..22d545d30 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -81,3 +81,4 @@ class RubySequencer(RubyPort): class DMASequencer(RubyPort): type = 'DMASequencer' cxx_header = "mem/ruby/system/DMASequencer.hh" + max_outstanding_requests = Param.Int(64, "max outstanding requests") -- cgit v1.2.3