/* * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Authors: Sooraj Puthoor */ #include "base/logging.hh" #include "base/str.hh" #include "config/the_isa.hh" #if THE_ISA == X86_ISA #include "arch/x86/insts/microldstop.hh" #endif // X86_ISA #include "mem/ruby/system/VIPERCoalescer.hh" #include "cpu/testers/rubytest/RubyTester.hh" #include "debug/GPUCoalescer.hh" #include "debug/MemoryAccess.hh" #include "mem/packet.hh" #include "mem/ruby/common/SubBlock.hh" #include "mem/ruby/network/MessageBuffer.hh" #include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/slicc_interface/RubyRequest.hh" #include "mem/ruby/structures/CacheMemory.hh" #include "mem/ruby/system/GPUCoalescer.hh" #include "mem/ruby/system/RubySystem.hh" #include "params/VIPERCoalescer.hh" using namespace std; VIPERCoalescer * VIPERCoalescerParams::create() { return new VIPERCoalescer(this); } VIPERCoalescer::VIPERCoalescer(const Params *p) : GPUCoalescer(p) { m_max_wb_per_cycle=p->max_wb_per_cycle; m_max_inv_per_cycle=p->max_inv_per_cycle; m_outstanding_inv = 0; m_outstanding_wb = 0; } VIPERCoalescer::~VIPERCoalescer() { } // Analyzes the packet to see if this request can be coalesced. // If request can be coalesced, this request is added to the reqCoalescer table // and makeRequest returns RequestStatus_Issued; // If this is the first request to a cacheline, request is added to both // newRequests queue and to the reqCoalescer table; makeRequest // returns RequestStatus_Issued. // If there is a pending request to this cacheline and this request // can't be coalesced, RequestStatus_Aliased is returned and // the packet needs to be reissued. RequestStatus VIPERCoalescer::makeRequest(PacketPtr pkt) { if (m_outstanding_wb | m_outstanding_inv) { DPRINTF(GPUCoalescer, "There are %d Writebacks and %d Invalidatons\n", m_outstanding_wb, m_outstanding_inv); } // Are we in the middle of a release if ((m_outstanding_wb) > 0) { if (pkt->req->isKernel()) { // Everythign is fine // Barriers and Kernel End scan coalesce // If it is a Kerenl Begin flush the cache if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { invL1(); } if (pkt->req->isRelease()) { insertKernel(pkt->req->contextId(), pkt); } return RequestStatus_Issued; } // return RequestStatus_Aliased; } else if (pkt->req->isKernel() && pkt->req->isRelease()) { // Flush Dirty Data on Kernel End // isKernel + isRelease insertKernel(pkt->req->contextId(), pkt); wbL1(); if (m_outstanding_wb == 0) { for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { newKernelEnds.push_back(it->first); } completeIssue(); } return RequestStatus_Issued; } RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); if (requestStatus!=RequestStatus_Issued) { // Request not isssued // enqueue Retry DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); return requestStatus; } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { // Invalidate clean Data on Kernel Begin // isKernel + isAcquire invL1(); } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { // Deschedule the AtomicAcqRel and // Flush and Invalidate the L1 cache invwbL1(); if (m_outstanding_wb > 0 && issueEvent.scheduled()) { DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); deschedule(issueEvent); } } else if (pkt->req->isRelease()) { // Deschedule the StoreRel and // Flush the L1 cache wbL1(); if (m_outstanding_wb > 0 && issueEvent.scheduled()) { DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); deschedule(issueEvent); } } else if (pkt->req->isAcquire()) { // LoadAcq or AtomicAcq // Invalidate the L1 cache invL1(); } // Request was successful if (m_outstanding_wb == 0) { if (!issueEvent.scheduled()) { DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); schedule(issueEvent, curTick()); } } return RequestStatus_Issued; } void VIPERCoalescer::wbCallback(Addr addr) { m_outstanding_wb--; // if L1 Flush Complete // attemnpt to schedule issueEvent assert(((int) m_outstanding_wb) >= 0); if (m_outstanding_wb == 0) { for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { newKernelEnds.push_back(it->first); } completeIssue(); } trySendRetries(); } void VIPERCoalescer::invCallback(Addr addr) { m_outstanding_inv--; // if L1 Flush Complete // attemnpt to schedule issueEvent // This probably won't happen, since // we dont wait on cache invalidations if (m_outstanding_wb == 0) { for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { newKernelEnds.push_back(it->first); } completeIssue(); } trySendRetries(); } /** * Invalidate L1 cache (Acquire) */ void VIPERCoalescer::invL1() { int size = m_dataCache_ptr->getNumBlocks(); DPRINTF(GPUCoalescer, "There are %d Invalidations outstanding before Cache Walk\n", m_outstanding_inv); // Walk the cache for (int i = 0; i < size; i++) { Addr addr = m_dataCache_ptr->getAddressAtIdx(i); // Evict Read-only data std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, nullptr); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_inv++; } DPRINTF(GPUCoalescer, "There are %d Invalidatons outstanding after Cache Walk\n", m_outstanding_inv); } /** * Writeback L1 cache (Release) */ void VIPERCoalescer::wbL1() { int size = m_dataCache_ptr->getNumBlocks(); DPRINTF(GPUCoalescer, "There are %d Writebacks outstanding before Cache Walk\n", m_outstanding_wb); // Walk the cache for (int i = 0; i < size; i++) { Addr addr = m_dataCache_ptr->getAddressAtIdx(i); // Write dirty data back std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_FLUSH, RubyAccessMode_Supervisor, nullptr); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_wb++; } DPRINTF(GPUCoalescer, "There are %d Writebacks outstanding after Cache Walk\n", m_outstanding_wb); } /** * Invalidate and Writeback L1 cache (Acquire&Release) */ void VIPERCoalescer::invwbL1() { int size = m_dataCache_ptr->getNumBlocks(); // Walk the cache for (int i = 0; i < size; i++) { Addr addr = m_dataCache_ptr->getAddressAtIdx(i); // Evict Read-only data std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, nullptr); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_inv++; } // Walk the cache for (int i = 0; i< size; i++) { Addr addr = m_dataCache_ptr->getAddressAtIdx(i); // Write dirty data back std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_FLUSH, RubyAccessMode_Supervisor, nullptr); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_wb++; } }