diff options
author | Andrew Bardsley <Andrew.Bardsley@arm.com> | 2014-07-23 16:09:04 -0500 |
---|---|---|
committer | Andrew Bardsley <Andrew.Bardsley@arm.com> | 2014-07-23 16:09:04 -0500 |
commit | 0e8a90f06bd3db00f700891a33458353478cce76 (patch) | |
tree | 50742efcc18254a36e80029b522139e8bd601dc2 /src/cpu/minor/lsq.cc | |
parent | 040fa23d01109c68d194d2517df777844e4e2f13 (diff) | |
download | gem5-0e8a90f06bd3db00f700891a33458353478cce76.tar.xz |
cpu: `Minor' in-order CPU model
This patch contains a new CPU model named `Minor'. Minor models a four
stage in-order execution pipeline (fetch lines, decompose into
macroops, decompose macroops into microops, execute).
The model was developed to support the ARM ISA but should be fixable
to support all the remaining gem5 ISAs. It currently also works for
Alpha, and regressions are included for ARM and Alpha (including Linux
boot).
Documentation for the model can be found in src/doc/inside-minor.doxygen and
its internal operations can be visualised using the Minorview tool
utils/minorview.py.
Minor was designed to be fairly simple and not to engage in a lot of
instruction annotation. As such, it currently has very few gathered
stats and may lack other gem5 features.
Minor is faster than the o3 model. Sample results:
Benchmark | Stat host_seconds (s)
---------------+--------v--------v--------
(on ARM, opt) | simple | o3 | minor
| timing | timing | timing
---------------+--------+--------+--------
10.linux-boot | 169 | 1883 | 1075
10.mcf | 117 | 967 | 491
20.parser | 668 | 6315 | 3146
30.eon | 542 | 3413 | 2414
40.perlbmk | 2339 | 20905 | 11532
50.vortex | 122 | 1094 | 588
60.bzip2 | 2045 | 18061 | 9662
70.twolf | 207 | 2736 | 1036
Diffstat (limited to 'src/cpu/minor/lsq.cc')
-rw-r--r-- | src/cpu/minor/lsq.cc | 1614 |
1 files changed, 1614 insertions, 0 deletions
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc new file mode 100644 index 000000000..c5e38c78d --- /dev/null +++ b/src/cpu/minor/lsq.cc @@ -0,0 +1,1614 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> + +#include "arch/locked_mem.hh" +#include "arch/mmapped_ipr.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/exec_context.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Activity.hh" +#include "debug/MinorMem.hh" + +namespace Minor +{ + +/** Returns the offset of addr into an aligned a block of size block_size */ +static Addr +addrBlockOffset(Addr addr, unsigned int block_size) +{ + return addr & (block_size - 1); +} + +/** Returns true if the given [addr .. addr+size-1] transfer needs to be + * fragmented across a block size of block_size */ +static bool +transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size) +{ + return (addrBlockOffset(addr, block_size) + size) > block_size; +} + +LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, + PacketDataPtr data_, uint64_t *res_) : + SenderState(), + port(port_), + inst(inst_), + isLoad(isLoad_), + data(data_), + packet(NULL), + request(), + fault(NoFault), + res(res_), + skipped(false), + issuedToMemory(false), + state(NotIssued) +{ } + +LSQ::AddrRangeCoverage +LSQ::LSQRequest::containsAddrRangeOf( + Addr req1_addr, unsigned int req1_size, + Addr req2_addr, unsigned int req2_size) +{ + /* 'end' here means the address of the byte just past the request + * blocks */ + Addr req2_end_addr = req2_addr + req2_size; + Addr req1_end_addr = req1_addr + req1_size; + + AddrRangeCoverage ret; + + if (req1_addr > req2_end_addr || req1_end_addr < req2_addr) + ret = NoAddrRangeCoverage; + else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) + ret = FullAddrRangeCoverage; + else + ret = PartialAddrRangeCoverage; + + return ret; +} + +LSQ::AddrRangeCoverage +LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) +{ + return containsAddrRangeOf(request.getPaddr(), request.getSize(), + other_request->request.getPaddr(), other_request->request.getSize()); +} + +bool +LSQ::LSQRequest::isBarrier() +{ + return inst->isInst() && inst->staticInst->isMemBarrier(); +} + +bool +LSQ::LSQRequest::needsToBeSentToStoreBuffer() +{ + return state == StoreToStoreBuffer; +} + +void +LSQ::LSQRequest::setState(LSQRequestState new_state) +{ + DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" + " %s\n", state, new_state, *inst); + state = new_state; +} + +bool +LSQ::LSQRequest::isComplete() const +{ + /* @todo, There is currently only one 'completed' state. This + * may not be a good choice */ + return state == Complete; +} + +void +LSQ::LSQRequest::reportData(std::ostream &os) const +{ + os << (isLoad ? 'R' : 'W') << ';'; + inst->reportData(os); + os << ';' << state; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) +{ + switch (coverage) { + case LSQ::PartialAddrRangeCoverage: + os << "PartialAddrRangeCoverage"; + break; + case LSQ::FullAddrRangeCoverage: + os << "FullAddrRangeCoverage"; + break; + case LSQ::NoAddrRangeCoverage: + os << "NoAddrRangeCoverage"; + break; + default: + os << "AddrRangeCoverage-" << static_cast<int>(coverage); + break; + } + return os; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) +{ + switch (state) { + case LSQ::LSQRequest::NotIssued: + os << "NotIssued"; + break; + case LSQ::LSQRequest::InTranslation: + os << "InTranslation"; + break; + case LSQ::LSQRequest::Translated: + os << "Translated"; + break; + case LSQ::LSQRequest::Failed: + os << "Failed"; + break; + case LSQ::LSQRequest::RequestIssuing: + os << "RequestIssuing"; + break; + case LSQ::LSQRequest::StoreToStoreBuffer: + os << "StoreToStoreBuffer"; + break; + case LSQ::LSQRequest::StoreInStoreBuffer: + os << "StoreInStoreBuffer"; + break; + case LSQ::LSQRequest::StoreBufferIssuing: + os << "StoreBufferIssuing"; + break; + case LSQ::LSQRequest::RequestNeedsRetry: + os << "RequestNeedsRetry"; + break; + case LSQ::LSQRequest::StoreBufferNeedsRetry: + os << "StoreBufferNeedsRetry"; + break; + case LSQ::LSQRequest::Complete: + os << "Complete"; + break; + default: + os << "LSQRequestState-" << static_cast<int>(state); + break; + } + return os; +} + +void +LSQ::clearMemBarrier(MinorDynInstPtr inst) +{ + bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier; + + DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", + (is_last_barrier ? "last" : "a"), *inst); + + if (is_last_barrier) + lastMemBarrier = 0; +} + +void +LSQ::SingleDataRequest::finish(Fault fault_, RequestPtr request_, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + port.numAccessesInDTLB--; + + DPRINTFS(MinorMem, (&port), "Received translation response for" + " request: %s\n", *inst); + + makePacket(); + + setState(Translated); + port.tryToSendToTransfers(this); + + /* Let's try and wake up the processor for the next cycle */ + port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +void +LSQ::SingleDataRequest::startAddrTranslation() +{ + ThreadContext *thread = port.cpu.getContext( + inst->id.threadId); + + port.numAccessesInDTLB++; + + setState(LSQ::LSQRequest::InTranslation); + + DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); + /* Submit the translation request. The response will come through + * finish/markDelayed on the LSQRequest as it bears the Translation + * interface */ + thread->getDTBPtr()->translateTiming( + &request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); +} + +void +LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) +{ + DPRINTFS(MinorMem, (&port), "Retiring packet\n"); + packet = packet_; + packetInFlight = false; + setState(Complete); +} + +void +LSQ::SplitDataRequest::finish(Fault fault_, RequestPtr request_, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + port.numAccessesInDTLB--; + + unsigned int M5_VAR_USED expected_fragment_index = + numTranslatedFragments; + + numInTranslationFragments--; + numTranslatedFragments++; + + DPRINTFS(MinorMem, (&port), "Received translation response for fragment" + " %d of request: %s\n", expected_fragment_index, *inst); + + assert(request_ == fragmentRequests[expected_fragment_index]); + + /* Wake up next cycle to get things going again in case the + * tryToSendToTransfers does take */ + port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + if (fault != NoFault) { + /* tryToSendToTransfers will handle the fault */ + + DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" + " %d of request: %s\n", + expected_fragment_index, *inst); + + setState(Translated); + port.tryToSendToTransfers(this); + } else if (numTranslatedFragments == numFragments) { + makeFragmentPackets(); + + setState(Translated); + port.tryToSendToTransfers(this); + } else { + /* Avoid calling translateTiming from within ::finish */ + assert(!translationEvent.scheduled()); + port.cpu.schedule(translationEvent, curTick()); + } +} + +LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_, uint64_t *res_) : + LSQRequest(port_, inst_, isLoad_, data_, res_), + translationEvent(*this), + numFragments(0), + numInTranslationFragments(0), + numTranslatedFragments(0), + numIssuedFragments(0), + numRetiredFragments(0), + fragmentRequests(), + fragmentPackets() +{ + /* Don't know how many elements are needed until the request is + * populated by the caller. */ +} + +LSQ::SplitDataRequest::~SplitDataRequest() +{ + for (auto i = fragmentRequests.begin(); + i != fragmentRequests.end(); i++) + { + delete *i; + } + + for (auto i = fragmentPackets.begin(); + i != fragmentPackets.end(); i++) + { + delete *i; + } +} + +void +LSQ::SplitDataRequest::makeFragmentRequests() +{ + Addr base_addr = request.getVaddr(); + unsigned int whole_size = request.getSize(); + unsigned int line_width = port.lineWidth; + + unsigned int fragment_size; + Addr fragment_addr; + + /* Assume that this transfer is across potentially many block snap + * boundaries: + * + * | _|________|________|________|___ | + * | |0| 1 | 2 | 3 | 4 | | + * | |_|________|________|________|___| | + * | | | | | | + * + * The first transfer (0) can be up to lineWidth in size. + * All the middle transfers (1-3) are lineWidth in size + * The last transfer (4) can be from zero to lineWidth - 1 in size + */ + unsigned int first_fragment_offset = + addrBlockOffset(base_addr, line_width); + unsigned int last_fragment_size = + addrBlockOffset(base_addr + whole_size, line_width); + unsigned int first_fragment_size = + line_width - first_fragment_offset; + + unsigned int middle_fragments_total_size = + whole_size - (first_fragment_size + last_fragment_size); + + assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); + + unsigned int middle_fragment_count = + middle_fragments_total_size / line_width; + + numFragments = 1 /* first */ + middle_fragment_count + + (last_fragment_size == 0 ? 0 : 1); + + DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." + " First fragment size: %d Last fragment size: %d\n", + numFragments, first_fragment_size, + (last_fragment_size == 0 ? line_width : last_fragment_size)); + + assert(((middle_fragment_count * line_width) + + first_fragment_size + last_fragment_size) == whole_size); + + fragment_addr = base_addr; + fragment_size = first_fragment_size; + + /* Just past the last address in the request */ + Addr end_addr = base_addr + whole_size; + + for (unsigned int fragment_index = 0; fragment_index < numFragments; + fragment_index++) + { + bool M5_VAR_USED is_last_fragment = false; + + if (fragment_addr == base_addr) { + /* First fragment */ + fragment_size = first_fragment_size; + } else { + if ((fragment_addr + line_width) > end_addr) { + /* Adjust size of last fragment */ + fragment_size = end_addr - fragment_addr; + is_last_fragment = true; + } else { + /* Middle fragments */ + fragment_size = line_width; + } + } + + Request *fragment = new Request(); + + fragment->setThreadContext(request.contextId(), /* thread id */ 0); + fragment->setVirt(0 /* asid */, + fragment_addr, fragment_size, request.getFlags(), + request.masterId(), + request.getPC()); + + DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d" + " (whole request addr: 0x%x size: %d) %s\n", + fragment_addr, fragment_size, base_addr, whole_size, + (is_last_fragment ? "last fragment" : "")); + + fragment_addr += fragment_size; + + fragmentRequests.push_back(fragment); + } +} + +void +LSQ::SplitDataRequest::makeFragmentPackets() +{ + Addr base_addr = request.getVaddr(); + + DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); + + for (unsigned int fragment_index = 0; fragment_index < numFragments; + fragment_index++) + { + Request *fragment = fragmentRequests[fragment_index]; + + DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" + " (%d, 0x%x)\n", + fragment_index, *inst, + (fragment->hasPaddr() ? "has paddr" : "no paddr"), + (fragment->hasPaddr() ? fragment->getPaddr() : 0)); + + Addr fragment_addr = fragment->getVaddr(); + unsigned int fragment_size = fragment->getSize(); + + uint8_t *request_data = NULL; + + if (!isLoad) { + /* Split data for Packets. Will become the property of the + * outgoing Packets */ + request_data = new uint8_t[fragment_size]; + std::memcpy(request_data, data + (fragment_addr - base_addr), + fragment_size); + } + + assert(fragment->hasPaddr()); + + PacketPtr fragment_packet = + makePacketForRequest(*fragment, isLoad, this, request_data); + + fragmentPackets.push_back(fragment_packet); + } + + /* Might as well make the overall/response packet here */ + /* Get the physical address for the whole request/packet from the first + * fragment */ + request.setPaddr(fragmentRequests[0]->getPaddr()); + makePacket(); +} + +void +LSQ::SplitDataRequest::startAddrTranslation() +{ + setState(LSQ::LSQRequest::InTranslation); + + makeFragmentRequests(); + + numInTranslationFragments = 0; + numTranslatedFragments = 0; + + /* @todo, just do these in sequence for now with + * a loop of: + * do { + * sendNextFragmentToTranslation ; translateTiming ; finish + * } while (numTranslatedFragments != numFragments); + */ + + /* Do first translation */ + sendNextFragmentToTranslation(); +} + +PacketPtr +LSQ::SplitDataRequest::getHeadPacket() +{ + assert(numIssuedFragments < numFragments); + + return fragmentPackets[numIssuedFragments]; +} + +void +LSQ::SplitDataRequest::stepToNextPacket() +{ + assert(numIssuedFragments < numFragments); + + numIssuedFragments++; +} + +void +LSQ::SplitDataRequest::retireResponse(PacketPtr response) +{ + assert(numRetiredFragments < numFragments); + + DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" + " offset: 0x%x (retired fragment num: %d) %s\n", + response->req->getVaddr(), response->req->getSize(), + request.getVaddr() - response->req->getVaddr(), + numRetiredFragments, + (fault == NoFault ? "" : fault->name())); + + numRetiredFragments++; + + if (skipped) { + /* Skip because we already knew the request had faulted or been + * skipped */ + DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); + } else if (response->isError()) { + /* Mark up the error and leave to execute to handle it */ + DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); + setSkipped(); + packet->copyError(response); + } else { + if (isLoad) { + if (!data) { + /* For a split transfer, a Packet must be constructed + * to contain all returning data. This is that packet's + * data */ + data = new uint8_t[request.getSize()]; + } + + /* Populate the portion of the overall response data represented + * by the response fragment */ + std::memcpy( + data + (response->req->getVaddr() - request.getVaddr()), + response->getPtr<uint8_t>(), + response->req->getSize()); + } + } + + /* Complete early if we're skipping are no more in-flight accesses */ + if (skipped && !hasPacketsInMemSystem()) { + DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); + setState(Complete); + if (packet->needsResponse()) + packet->makeResponse(); + } + + if (numRetiredFragments == numFragments) + setState(Complete); + + if (!skipped && isComplete()) { + DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); + + DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" + " needsResponse: %d packetSize: %s requestSize: %s responseSize:" + " %s\n", packet->isRead(), packet->isWrite(), + packet->needsResponse(), packet->getSize(), request.getSize(), + response->getSize()); + + /* A request can become complete by several paths, this is a sanity + * check to make sure the packet's data is created */ + if (!data) { + data = new uint8_t[request.getSize()]; + } + + if (isLoad) { + DPRINTFS(MinorMem, (&port), "Copying read data\n"); + std::memcpy(packet->getPtr<uint8_t>(), data, request.getSize()); + } + packet->makeResponse(); + } + + /* Packets are all deallocated together in ~SplitLSQRequest */ +} + +void +LSQ::SplitDataRequest::sendNextFragmentToTranslation() +{ + unsigned int fragment_index = numTranslatedFragments; + + ThreadContext *thread = port.cpu.getContext( + inst->id.threadId); + + DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", + fragment_index); + + port.numAccessesInDTLB++; + numInTranslationFragments++; + + thread->getDTBPtr()->translateTiming( + fragmentRequests[fragment_index], thread, this, (isLoad ? + BaseTLB::Read : BaseTLB::Write)); +} + +bool +LSQ::StoreBuffer::canInsert() const +{ + /* @todo, support store amalgamation */ + return slots.size() < numSlots; +} + +void +LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) +{ + auto found = std::find(slots.begin(), slots.end(), request); + + if (found != slots.end()) { + DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", + request, *found, *(request->inst)); + slots.erase(found); + + delete request; + } +} + +void +LSQ::StoreBuffer::insert(LSQRequestPtr request) +{ + if (!canInsert()) { + warn("%s: store buffer insertion without space to insert from" + " inst: %s\n", name(), *(request->inst)); + } + + DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); + + numUnissuedAccesses++; + + if (request->state != LSQRequest::Complete) + request->setState(LSQRequest::StoreInStoreBuffer); + + slots.push_back(request); + + /* Let's try and wake up the processor for the next cycle to step + * the store buffer */ + lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +LSQ::AddrRangeCoverage +LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, + unsigned int &found_slot) +{ + unsigned int slot_index = slots.size() - 1; + auto i = slots.rbegin(); + AddrRangeCoverage ret = NoAddrRangeCoverage; + + /* Traverse the store buffer in reverse order (most to least recent) + * and try to find a slot whose address range overlaps this request */ + while (ret == NoAddrRangeCoverage && i != slots.rend()) { + LSQRequestPtr slot = *i; + + if (slot->packet) { + AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); + + if (coverage != NoAddrRangeCoverage) { + DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" + " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", + slot_index, coverage, + request->request.getPaddr(), request->request.getSize(), + slot->request.getPaddr(), slot->request.getSize()); + + found_slot = slot_index; + ret = coverage; + } + } + + i++; + slot_index--; + } + + return ret; +} + +/** Fill the given packet with appropriate date from slot slot_number */ +void +LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, + unsigned int slot_number) +{ + assert(slot_number < slots.size()); + assert(load->packet); + assert(load->isLoad); + + LSQRequestPtr store = slots[slot_number]; + + assert(store->packet); + assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); + + Addr load_addr = load->request.getPaddr(); + Addr store_addr = store->request.getPaddr(); + Addr addr_offset = load_addr - store_addr; + + unsigned int load_size = load->request.getSize(); + + DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" + " slot: %d addr: 0x%x addressOffset: 0x%x\n", + load_size, load_addr, slot_number, + store_addr, addr_offset); + + void *load_packet_data = load->packet->getPtr<void>(); + void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; + + std::memcpy(load_packet_data, store_packet_data, load_size); +} + +void +LSQ::StoreBuffer::step() +{ + DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", + numUnissuedAccesses); + + if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { + /* Clear all the leading barriers */ + while (!slots.empty() && + slots.front()->isComplete() && slots.front()->isBarrier()) + { + LSQRequestPtr barrier = slots.front(); + + DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", + *(barrier->inst)); + + numUnissuedAccesses--; + lsq.clearMemBarrier(barrier->inst); + slots.pop_front(); + + delete barrier; + } + + auto i = slots.begin(); + bool issued = true; + unsigned int issue_count = 0; + + /* Skip trying if the memory system is busy */ + if (lsq.state == LSQ::MemoryNeedsRetry) + issued = false; + + /* Try to issue all stores in order starting from the head + * of the queue. Responses are allowed to be retired + * out of order */ + while (issued && + issue_count < storeLimitPerCycle && + lsq.canSendToMemorySystem() && + i != slots.end()) + { + LSQRequestPtr request = *i; + + DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" + " state: %s\n", + *(request->inst), request->sentAllPackets(), + request->state); + + if (request->isBarrier() && request->isComplete()) { + /* Give up at barriers */ + issued = false; + } else if (!(request->state == LSQRequest::StoreBufferIssuing && + request->sentAllPackets())) + { + DPRINTF(MinorMem, "Trying to send request: %s to memory" + " system\n", *(request->inst)); + + if (lsq.tryToSend(request)) { + /* Barrier are accounted for as they are cleared from + * the queue, not after their transfers are complete */ + if (!request->isBarrier()) + numUnissuedAccesses--; + issue_count++; + } else { + /* Don't step on to the next store buffer entry if this + * one hasn't issued all its packets as the store + * buffer must still enforce ordering */ + issued = false; + } + } + i++; + } + } +} + +void +LSQ::completeMemBarrierInst(MinorDynInstPtr inst, + bool committed) +{ + if (committed) { + /* Not already sent to the store buffer as a store request? */ + if (!inst->inStoreBuffer) { + /* Insert an entry into the store buffer to tick off barriers + * until there are none in flight */ + storeBuffer.insert(new BarrierDataRequest(*this, inst)); + } + } else { + /* Clear the barrier anyway if it wasn't actually committed */ + clearMemBarrier(inst); + } +} + +void +LSQ::StoreBuffer::minorTrace() const +{ + unsigned int size = slots.size(); + unsigned int i = 0; + std::ostringstream os; + + while (i < size) { + LSQRequestPtr request = slots[i]; + + request->reportData(os); + + i++; + if (i < numSlots) + os << ','; + } + + while (i < numSlots) { + os << '-'; + + i++; + if (i < numSlots) + os << ','; + } + + MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), + numUnissuedAccesses); +} + +void +LSQ::tryToSendToTransfers(LSQRequestPtr request) +{ + if (state == MemoryNeedsRetry) { + DPRINTF(MinorMem, "Request needs retry, not issuing to" + " memory until retry arrives\n"); + return; + } + + if (request->state == LSQRequest::InTranslation) { + DPRINTF(MinorMem, "Request still in translation, not issuing to" + " memory\n"); + return; + } + + assert(request->state == LSQRequest::Translated || + request->state == LSQRequest::RequestIssuing || + request->state == LSQRequest::Failed || + request->state == LSQRequest::Complete); + + if (requests.empty() || requests.front() != request) { + DPRINTF(MinorMem, "Request not at front of requests queue, can't" + " issue to memory\n"); + return; + } + + if (transfers.unreservedRemainingSpace() == 0) { + DPRINTF(MinorMem, "No space to insert request into transfers" + " queue\n"); + return; + } + + if (request->isComplete() || request->state == LSQRequest::Failed) { + DPRINTF(MinorMem, "Passing a %s transfer on to transfers" + " queue\n", (request->isComplete() ? "completed" : "failed")); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + return; + } + + if (!execute.instIsRightStream(request->inst)) { + /* Wrong stream, try to abort the transfer but only do so if + * there are no packets in flight */ + if (request->hasPacketsInMemSystem()) { + DPRINTF(MinorMem, "Request's inst. is from the wrong stream," + " waiting for responses before aborting request\n"); + } else { + DPRINTF(MinorMem, "Request's inst. is from the wrong stream," + " aborting request\n"); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + } + return; + } + + if (request->fault != NoFault) { + if (request->inst->staticInst->isPrefetch()) { + DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); + } + DPRINTF(MinorMem, "Moving faulting request into the transfers" + " queue\n"); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + return; + } + + bool is_load = request->isLoad; + bool is_llsc = request->request.isLLSC(); + bool is_swap = request->request.isSwap(); + bool bufferable = !(request->request.isUncacheable() || + is_llsc || is_swap); + + if (is_load) { + if (numStoresInTransfers != 0) { + DPRINTF(MinorMem, "Load request with stores still in transfers" + " queue, stalling\n"); + return; + } + } else { + /* Store. Can it be sent to the store buffer? */ + if (bufferable && !request->request.isMmappedIpr()) { + request->setState(LSQRequest::StoreToStoreBuffer); + moveFromRequestsToTransfers(request); + DPRINTF(MinorMem, "Moving store into transfers queue\n"); + return; + } + } + + /* Check if this is the head instruction (and so must be executable as + * its stream sequence number was checked above) for loads which must + * not be speculatively issued and stores which must be issued here */ + if (!bufferable) { + if (!execute.instIsHeadInst(request->inst)) { + DPRINTF(MinorMem, "Memory access not the head inst., can't be" + " sure it can be performed, not issuing\n"); + return; + } + + unsigned int forwarding_slot = 0; + + if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != + NoAddrRangeCoverage) + { + DPRINTF(MinorMem, "Memory access can receive forwarded data" + " from the store buffer, need to wait for store buffer to" + " drain\n"); + return; + } + } + + /* True: submit this packet to the transfers queue to be sent to the + * memory system. + * False: skip the memory and push a packet for this request onto + * requests */ + bool do_access = true; + + if (!is_llsc) { + /* Check for match in the store buffer */ + if (is_load) { + unsigned int forwarding_slot = 0; + AddrRangeCoverage forwarding_result = + storeBuffer.canForwardDataToLoad(request, + forwarding_slot); + + switch (forwarding_result) { + case FullAddrRangeCoverage: + /* Forward data from the store buffer into this request and + * repurpose this request's packet into a response packet */ + storeBuffer.forwardStoreData(request, forwarding_slot); + request->packet->makeResponse(); + + /* Just move between queues, no access */ + do_access = false; + break; + case PartialAddrRangeCoverage: + DPRINTF(MinorMem, "Load partly satisfied by store buffer" + " data. Must wait for the store to complete\n"); + return; + break; + case NoAddrRangeCoverage: + DPRINTF(MinorMem, "No forwardable data from store buffer\n"); + /* Fall through to try access */ + break; + } + } + } else { + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request to memory system yet\n"); + return; + } + + SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; + + TheISA::PCState old_pc = thread.pcState(); + ExecContext context(cpu, thread, execute, request->inst); + + /* Handle LLSC requests and tests */ + if (is_load) { + TheISA::handleLockedRead(&context, &request->request); + } else { + do_access = TheISA::handleLockedWrite(&context, + &request->request, cacheBlockMask); + + if (!do_access) { + DPRINTF(MinorMem, "Not perfoming a memory " + "access for store conditional\n"); + } + } + thread.pcState(old_pc); + } + + /* See the do_access comment above */ + if (do_access) { + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request to memory system yet\n"); + return; + } + + /* Remember if this is an access which can't be idly + * discarded by an interrupt */ + if (!bufferable) { + numAccessesIssuedToMemory++; + request->issuedToMemory = true; + } + + if (tryToSend(request)) + moveFromRequestsToTransfers(request); + } else { + request->setState(LSQRequest::Complete); + moveFromRequestsToTransfers(request); + } +} + +bool +LSQ::tryToSend(LSQRequestPtr request) +{ + bool ret = false; + + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", + *(request->inst)); + } else { + PacketPtr packet = request->getHeadPacket(); + + DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", + *(request->inst), packet->req->getVaddr()); + + /* The sender state of the packet *must* be an LSQRequest + * so the response can be correctly handled */ + assert(packet->findNextSenderState<LSQRequest>()); + + if (request->request.isMmappedIpr()) { + ThreadContext *thread = + cpu.getContext(request->request.threadId()); + + if (request->isLoad) { + DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); + TheISA::handleIprRead(thread, packet); + } else { + DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); + TheISA::handleIprWrite(thread, packet); + } + + request->stepToNextPacket(); + ret = request->sentAllPackets(); + + if (!ret) { + DPRINTF(MinorMem, "IPR access has another packet: %s\n", + *(request->inst)); + } + + if (ret) + request->setState(LSQRequest::Complete); + else + request->setState(LSQRequest::RequestIssuing); + } else if (dcachePort.sendTimingReq(packet)) { + DPRINTF(MinorMem, "Sent data memory request\n"); + + numAccessesInMemorySystem++; + + request->stepToNextPacket(); + + ret = request->sentAllPackets(); + + switch (request->state) { + case LSQRequest::Translated: + case LSQRequest::RequestIssuing: + /* Fully or partially issued a request in the transfers + * queue */ + request->setState(LSQRequest::RequestIssuing); + break; + case LSQRequest::StoreInStoreBuffer: + case LSQRequest::StoreBufferIssuing: + /* Fully or partially issued a request in the store + * buffer */ + request->setState(LSQRequest::StoreBufferIssuing); + break; + default: + assert(false); + break; + } + + state = MemoryRunning; + } else { + DPRINTF(MinorMem, + "Sending data memory request - needs retry\n"); + + /* Needs to be resent, wait for that */ + state = MemoryNeedsRetry; + retryRequest = request; + + switch (request->state) { + case LSQRequest::Translated: + case LSQRequest::RequestIssuing: + request->setState(LSQRequest::RequestNeedsRetry); + break; + case LSQRequest::StoreInStoreBuffer: + case LSQRequest::StoreBufferIssuing: + request->setState(LSQRequest::StoreBufferNeedsRetry); + break; + default: + assert(false); + break; + } + } + } + + return ret; +} + +void +LSQ::moveFromRequestsToTransfers(LSQRequestPtr request) +{ + assert(!requests.empty() && requests.front() == request); + assert(transfers.unreservedRemainingSpace() != 0); + + /* Need to count the number of stores in the transfers + * queue so that loads know when their store buffer forwarding + * results will be correct (only when all those stores + * have reached the store buffer) */ + if (!request->isLoad) + numStoresInTransfers++; + + requests.pop(); + transfers.push(request); +} + +bool +LSQ::canSendToMemorySystem() +{ + return state == MemoryRunning && + numAccessesInMemorySystem < inMemorySystemLimit; +} + +bool +LSQ::recvTimingResp(PacketPtr response) +{ + LSQRequestPtr request = + safe_cast<LSQRequestPtr>(response->popSenderState()); + + DPRINTF(MinorMem, "Received response packet inst: %s" + " addr: 0x%x cmd: %s\n", + *(request->inst), response->getAddr(), + response->cmd.toString()); + + numAccessesInMemorySystem--; + + if (response->isError()) { + DPRINTF(MinorMem, "Received error response packet: %s\n", + *request->inst); + } + + switch (request->state) { + case LSQRequest::RequestIssuing: + case LSQRequest::RequestNeedsRetry: + /* Response to a request from the transfers queue */ + request->retireResponse(response); + + DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n", + request->hasPacketsInMemSystem(), request->isComplete()); + + break; + case LSQRequest::StoreBufferIssuing: + case LSQRequest::StoreBufferNeedsRetry: + /* Response to a request from the store buffer */ + request->retireResponse(response); + + /* Remove completed requests unless they are barrier (which will + * need to be removed in order */ + if (request->isComplete()) { + if (!request->isBarrier()) { + storeBuffer.deleteRequest(request); + } else { + DPRINTF(MinorMem, "Completed transfer for barrier: %s" + " leaving the request as it is also a barrier\n", + *(request->inst)); + } + } + break; + default: + /* Shouldn't be allowed to receive a response from another + * state */ + assert(false); + break; + } + + /* We go to idle even if there are more things in the requests queue + * as it's the job of step to actually step us on to the next + * transaction */ + + /* Let's try and wake up the processor for the next cycle */ + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Never busy */ + return true; +} + +void +LSQ::recvRetry() +{ + DPRINTF(MinorMem, "Received retry request\n"); + + assert(state == MemoryNeedsRetry); + + switch (retryRequest->state) { + case LSQRequest::RequestNeedsRetry: + /* Retry in the requests queue */ + retryRequest->setState(LSQRequest::Translated); + break; + case LSQRequest::StoreBufferNeedsRetry: + /* Retry in the store buffer */ + retryRequest->setState(LSQRequest::StoreInStoreBuffer); + break; + default: + assert(false); + } + + /* Set state back to MemoryRunning so that the following + * tryToSend can actually send. Note that this won't + * allow another transfer in as tryToSend should + * issue a memory request and either succeed for this + * request or return the LSQ back to MemoryNeedsRetry */ + state = MemoryRunning; + + /* Try to resend the request */ + if (tryToSend(retryRequest)) { + /* Successfully sent, need to move the request */ + switch (retryRequest->state) { + case LSQRequest::RequestIssuing: + /* In the requests queue */ + moveFromRequestsToTransfers(retryRequest); + break; + case LSQRequest::StoreBufferIssuing: + /* In the store buffer */ + storeBuffer.numUnissuedAccesses--; + break; + default: + assert(false); + break; + } + } + + retryRequest = NULL; +} + +LSQ::LSQ(std::string name_, std::string dcache_port_name_, + MinorCPU &cpu_, Execute &execute_, + unsigned int in_memory_system_limit, unsigned int line_width, + unsigned int requests_queue_size, unsigned int transfers_queue_size, + unsigned int store_buffer_size, + unsigned int store_buffer_cycle_store_limit) : + Named(name_), + cpu(cpu_), + execute(execute_), + dcachePort(dcache_port_name_, *this, cpu_), + lastMemBarrier(0), + state(MemoryRunning), + inMemorySystemLimit(in_memory_system_limit), + lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), + requests(name_ + ".requests", "addr", requests_queue_size), + transfers(name_ + ".transfers", "addr", transfers_queue_size), + storeBuffer(name_ + ".storeBuffer", + *this, store_buffer_size, store_buffer_cycle_store_limit), + numAccessesInMemorySystem(0), + numAccessesInDTLB(0), + numStoresInTransfers(0), + numAccessesIssuedToMemory(0), + retryRequest(NULL), + cacheBlockMask(~(cpu_.cacheLineSize() - 1)) +{ + if (in_memory_system_limit < 1) { + fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, + in_memory_system_limit); + } + + if (store_buffer_cycle_store_limit < 1) { + fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" + " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); + } + + if (requests_queue_size < 1) { + fatal("%s: executeLSQRequestsQueueSize must be" + " >= 1 (%d)\n", name_, requests_queue_size); + } + + if (transfers_queue_size < 1) { + fatal("%s: executeLSQTransfersQueueSize must be" + " >= 1 (%d)\n", name_, transfers_queue_size); + } + + if (store_buffer_size < 1) { + fatal("%s: executeLSQStoreBufferSize must be" + " >= 1 (%d)\n", name_, store_buffer_size); + } + + if ((lineWidth & (lineWidth - 1)) != 0) { + fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); + } +} + +LSQ::~LSQ() +{ } + +LSQ::LSQRequest::~LSQRequest() +{ + if (packet) + delete packet; + if (data) + delete [] data; +} + +/** + * Step the memory access mechanism on to its next state. In reality, most + * of the stepping is done by the callbacks on the LSQ but this + * function is responsible for issuing memory requests lodged in the + * requests queue. + */ +void +LSQ::step() +{ + /* Try to move address-translated requests between queues and issue + * them */ + if (!requests.empty()) + tryToSendToTransfers(requests.front()); + + storeBuffer.step(); +} + +LSQ::LSQRequestPtr +LSQ::findResponse(MinorDynInstPtr inst) +{ + LSQ::LSQRequestPtr ret = NULL; + + if (!transfers.empty()) { + LSQRequestPtr request = transfers.front(); + + /* Same instruction and complete access or a store that's + * capable of being moved to the store buffer */ + if (request->inst->id == inst->id) { + if (request->isComplete() || + (request->state == LSQRequest::StoreToStoreBuffer && + storeBuffer.canInsert())) + { + ret = request; + } + } + } + + if (ret) { + DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", + *inst); + } else { + DPRINTF(MinorMem, "No matching memory response for inst: %s\n", + *inst); + } + + return ret; +} + +void +LSQ::popResponse(LSQ::LSQRequestPtr response) +{ + assert(!transfers.empty() && transfers.front() == response); + + transfers.pop(); + + if (!response->isLoad) + numStoresInTransfers--; + + if (response->issuedToMemory) + numAccessesIssuedToMemory--; + + if (response->state != LSQRequest::StoreInStoreBuffer) { + DPRINTF(MinorMem, "Deleting %s request: %s\n", + (response->isLoad ? "load" : "store"), + *(response->inst)); + + delete response; + } +} + +void +LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) +{ + assert(request->state == LSQRequest::StoreToStoreBuffer); + + DPRINTF(MinorMem, "Sending store: %s to store buffer\n", + *(request->inst)); + + request->inst->inStoreBuffer = true; + + storeBuffer.insert(request); +} + +bool +LSQ::isDrained() +{ + return requests.empty() && transfers.empty() && + storeBuffer.isDrained(); +} + +bool +LSQ::needsToTick() +{ + bool ret = false; + + if (canSendToMemorySystem()) { + bool have_translated_requests = !requests.empty() && + requests.front()->state != LSQRequest::InTranslation && + transfers.unreservedRemainingSpace() != 0; + + ret = have_translated_requests || + storeBuffer.numUnissuedStores() != 0; + } + + if (ret) + DPRINTF(Activity, "Need to tick\n"); + + return ret; +} + +void +LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, unsigned int flags, uint64_t *res) +{ + bool needs_burst = transferNeedsBurst(addr, size, lineWidth); + LSQRequestPtr request; + + /* Copy given data into the request. The request will pass this to the + * packet and then it will own the data */ + uint8_t *request_data = NULL; + + DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" + " 0x%x%s lineWidth : 0x%x\n", + (isLoad ? "load" : "store"), addr, size, flags, + (needs_burst ? " (needs burst)" : ""), lineWidth); + + if (!isLoad) { + /* request_data becomes the property of a ...DataRequest (see below) + * and destroyed by its destructor */ + request_data = new uint8_t[size]; + if (flags & Request::CACHE_BLOCK_ZERO) { + /* For cache zeroing, just use zeroed data */ + std::memset(request_data, 0, size); + } else { + std::memcpy(request_data, data, size); + } + } + + if (needs_burst) { + request = new SplitDataRequest( + *this, inst, isLoad, request_data, res); + } else { + request = new SingleDataRequest( + *this, inst, isLoad, request_data, res); + } + + if (inst->traceData) + inst->traceData->setAddr(addr); + + request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + request->request.setVirt(0 /* asid */, + addr, size, flags, cpu.instMasterId(), + /* I've no idea why we need the PC, but give it */ + inst->pc.instAddr()); + + requests.push(request); + request->startAddrTranslation(); +} + +void +LSQ::pushFailedRequest(MinorDynInstPtr inst) +{ + LSQRequestPtr request = new FailedDataRequest(*this, inst); + requests.push(request); +} + +void +LSQ::minorTrace() const +{ + MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" + " lastMemBarrier=%d\n", + state, numAccessesInDTLB, numAccessesInMemorySystem, + numStoresInTransfers, lastMemBarrier); + requests.minorTrace(); + transfers.minorTrace(); + storeBuffer.minorTrace(); +} + +LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, + unsigned int store_buffer_size, + unsigned int store_limit_per_cycle) : + Named(name_), lsq(lsq_), + numSlots(store_buffer_size), + storeLimitPerCycle(store_limit_per_cycle), + slots(), + numUnissuedAccesses(0) +{ +} + +PacketPtr +makePacketForRequest(Request &request, bool isLoad, + Packet::SenderState *sender_state, PacketDataPtr data) +{ + MemCmd command; + + /* Make a ret with the right command type to match the request */ + if (request.isLLSC()) { + command = (isLoad ? MemCmd::LoadLockedReq : MemCmd::StoreCondReq); + } else if (request.isSwap()) { + command = MemCmd::SwapReq; + } else { + command = (isLoad ? MemCmd::ReadReq : MemCmd::WriteReq); + } + + PacketPtr ret = new Packet(&request, command); + + if (sender_state) + ret->pushSenderState(sender_state); + + if (isLoad) + ret->allocate(); + else + ret->dataDynamicArray(data); + + return ret; +} + +void +LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) +{ + assert(inst->isInst() && inst->staticInst->isMemBarrier()); + assert(inst->id.execSeqNum > lastMemBarrier); + + /* Remember the barrier. We only have a notion of one + * barrier so this may result in some mem refs being + * delayed if they are between barriers */ + lastMemBarrier = inst->id.execSeqNum; +} + +void +LSQ::LSQRequest::makePacket() +{ + /* Make the function idempotent */ + if (packet) + return; + + packet = makePacketForRequest(request, isLoad, this, data); + /* Null the ret data so we know not to deallocate it when the + * ret is destroyed. The data now belongs to the ret and + * the ret is responsible for its destruction */ + data = NULL; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::MemoryState state) +{ + switch (state) { + case LSQ::MemoryRunning: + os << "MemoryRunning"; + break; + case LSQ::MemoryNeedsRetry: + os << "MemoryNeedsRetry"; + break; + default: + os << "MemoryState-" << static_cast<int>(state); + break; + } + return os; +} + +void +LSQ::recvTimingSnoopReq(PacketPtr pkt) +{ + /* LLSC operations in Minor can't be speculative and are executed from + * the head of the requests queue. We shouldn't need to do more than + * this action on snoops. */ + + /* THREAD */ + TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask); +} + +} |