diff options
Diffstat (limited to 'src/cpu/minor/lsq.hh')
-rw-r--r-- | src/cpu/minor/lsq.hh | 722 |
1 files changed, 722 insertions, 0 deletions
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh new file mode 100644 index 000000000..0998395e0 --- /dev/null +++ b/src/cpu/minor/lsq.hh @@ -0,0 +1,722 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * A load/store queue that allows outstanding reads and writes. + * + */ + +#ifndef __CPU_MINOR_NEW_LSQ_HH__ +#define __CPU_MINOR_NEW_LSQ_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +/* Forward declaration */ +class Execute; + +class LSQ : public Named +{ + protected: + /** My owner(s) */ + MinorCPU &cpu; + Execute &execute; + + protected: + /** State of memory access for head access. */ + enum MemoryState + { + MemoryRunning, /* Default. Step dcache queues when possible. */ + MemoryNeedsRetry /* Request rejected, will be asked to retry */ + }; + + /** Print MemoryState values as shown in the enum definition */ + friend std::ostream &operator <<(std::ostream &os, + MemoryState state); + + /** Coverage of one address range with another */ + enum AddrRangeCoverage + { + PartialAddrRangeCoverage, /* Two ranges partly overlap */ + FullAddrRangeCoverage, /* One range fully covers another */ + NoAddrRangeCoverage /* Two ranges are disjoint */ + }; + + /** Exposable data port */ + class DcachePort : public MinorCPU::MinorCPUPort + { + protected: + /** My owner */ + LSQ &lsq; + + public: + DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) : + MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_) + { } + + protected: + bool recvTimingResp(PacketPtr pkt) + { return lsq.recvTimingResp(pkt); } + + void recvRetry() { lsq.recvRetry(); } + + void recvTimingSnoopReq(PacketPtr pkt) + { return lsq.recvTimingSnoopReq(pkt); } + }; + + DcachePort dcachePort; + + public: + /** Derived SenderState to carry data access info. through address + * translation, the queues in this port and back from the memory + * system. */ + class LSQRequest : + public BaseTLB::Translation, /* For TLB lookups */ + public Packet::SenderState /* For packing into a Packet */ + { + public: + /** Owning port */ + LSQ &port; + + /** Instruction which made this request */ + MinorDynInstPtr inst; + + /** Load/store indication used for building packet. This isn't + * carried by Request so we need to keep it here */ + bool isLoad; + + /** Dynamically allocated and populated data carried for + * building write packets */ + PacketDataPtr data; + + /* Requests carry packets on their way to the memory system. + * When a Packet returns from the memory system, its + * request needs to have its packet updated as this + * may have changed in flight */ + PacketPtr packet; + + /** The underlying request of this LSQRequest */ + Request request; + + /** Fault generated performing this request */ + Fault fault; + + /** Res from pushRequest */ + uint64_t *res; + + /** Was skipped. Set to indicate any reason (faulted, bad + * stream sequence number, in a fault shadow) that this + * request did not perform a memory transfer */ + bool skipped; + + /** This in an access other than a normal cacheable load + * that's visited the memory system */ + bool issuedToMemory; + + enum LSQRequestState + { + NotIssued, /* Newly created */ + InTranslation, /* TLB accessed, no reply yet */ + Translated, /* Finished address translation */ + Failed, /* The starting start of FailedDataRequests */ + RequestIssuing, /* Load/store issued to memory in the requests + queue */ + StoreToStoreBuffer, /* Store in transfers on its way to the + store buffer */ + RequestNeedsRetry, /* Retry needed for load */ + StoreInStoreBuffer, /* Store in the store buffer, before issuing + a memory transfer */ + StoreBufferIssuing, /* Store in store buffer and has been + issued */ + StoreBufferNeedsRetry, /* Retry needed for store */ + /* All completed states. Includes + completed loads, TLB faults and skipped requests whose + seqNum's no longer match */ + Complete + }; + + LSQRequestState state; + + protected: + /** BaseTLB::Translation interface */ + void markDelayed() { } + + public: + LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, + PacketDataPtr data_ = NULL, uint64_t *res_ = NULL); + + virtual ~LSQRequest(); + + public: + /** Make a packet to use with the memory transaction */ + void makePacket(); + + /** Was no memory access attempted for this request? */ + bool skippedMemAccess() { return skipped; } + + /** Set this request as having been skipped before a memory + * transfer was attempt */ + void setSkipped() { skipped = true; } + + /** Does address range req1 (req1_addr to req1_addr + req1_size - 1) + * fully cover, partially cover or not cover at all the range req2 */ + static AddrRangeCoverage containsAddrRangeOf( + Addr req1_addr, unsigned int req1_size, + Addr req2_addr, unsigned int req2_size); + + /** Does this request's address range fully cover the range + * of other_request? */ + AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request); + + /** Start the address translation process for this request. This + * will issue a translation request to the TLB. */ + virtual void startAddrTranslation() = 0; + + /** Get the next packet to issue for this request. For split + * transfers, it will be necessary to step through the available + * packets by calling do { getHeadPacket ; stepToNextPacket } while + * (!sentAllPackets) and by retiring response using retireResponse */ + virtual PacketPtr getHeadPacket() = 0; + + /** Step to the next packet for the next call to getHeadPacket */ + virtual void stepToNextPacket() = 0; + + /** Have all packets been sent? */ + virtual bool sentAllPackets() = 0; + + /** True if this request has any issued packets in the memory + * system and so can't be interrupted until it gets responses */ + virtual bool hasPacketsInMemSystem() = 0; + + /** Retire a response packet into the LSQRequest packet possibly + * completing this transfer */ + virtual void retireResponse(PacketPtr packet_) = 0; + + /** Is this a request a barrier? */ + virtual bool isBarrier(); + + /** This request, once processed by the requests/transfers + * queues, will need to go to the store buffer */ + bool needsToBeSentToStoreBuffer(); + + /** Set state and output trace output */ + void setState(LSQRequestState new_state); + + /** Has this request been completed. This includes *all* reasons + * for completion: successful transfers, faults, skipped because + * of preceding faults */ + bool isComplete() const; + + /** MinorTrace report interface */ + void reportData(std::ostream &os) const; + }; + + typedef LSQRequest *LSQRequestPtr; + + friend std::ostream & operator <<(std::ostream &os, + AddrRangeCoverage state); + + friend std::ostream & operator <<(std::ostream &os, + LSQRequest::LSQRequestState state); + + protected: + /** Special request types that don't actually issue memory requests */ + class SpecialDataRequest : public LSQRequest + { + protected: + /** TLB interace */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode) + { } + + public: + /** Send single translation request */ + void startAddrTranslation() { } + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket() + { fatal("No packets in a SpecialDataRequest"); } + + /** Step on numIssuedFragments */ + void stepToNextPacket() { } + + /** Has no packets to send */ + bool sentAllPackets() { return true; } + + /** Never sends any requests */ + bool hasPacketsInMemSystem() { return false; } + + /** Keep the given packet as the response packet + * LSQRequest::packet */ + void retireResponse(PacketPtr packet_) { } + + public: + SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + /* Say this is a load, not actually relevant */ + LSQRequest(port_, inst_, true, NULL, 0) + { } + }; + + /** FailedDataRequest represents requests from instructions that + * failed their predicates but need to ride the requests/transfers + * queues to maintain trace ordering */ + class FailedDataRequest : public SpecialDataRequest + { + public: + FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + SpecialDataRequest(port_, inst_) + { state = Failed; } + }; + + /** Request for doing barrier accounting in the store buffer. Not + * for use outside that unit */ + class BarrierDataRequest : public SpecialDataRequest + { + public: + bool isBarrier() { return true; } + + public: + BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + SpecialDataRequest(port_, inst_) + { state = Complete; } + }; + + /** SingleDataRequest is used for requests that don't fragment */ + class SingleDataRequest : public LSQRequest + { + protected: + /** TLB interace */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + /** Has my only packet been sent to the memory system but has not + * yet been responded to */ + bool packetInFlight; + + /** Has the packet been at least sent to the memory system? */ + bool packetSent; + + public: + /** Send single translation request */ + void startAddrTranslation(); + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket() { return packet; } + + /** Remember that the packet has been sent */ + void stepToNextPacket() { packetInFlight = true; packetSent = true; } + + /** Has packet been sent */ + bool hasPacketsInMemSystem() { return packetInFlight; } + + /** packetInFlight can become false again, so need to check + * packetSent */ + bool sentAllPackets() { return packetSent; } + + /** Keep the given packet as the response packet + * LSQRequest::packet */ + void retireResponse(PacketPtr packet_); + + public: + SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) : + LSQRequest(port_, inst_, isLoad_, data_, res_), + packetInFlight(false), + packetSent(false) + { } + }; + + class SplitDataRequest : public LSQRequest + { + protected: + /** Event to step between translations */ + class TranslationEvent : public Event + { + protected: + SplitDataRequest &owner; + + public: + TranslationEvent(SplitDataRequest &owner_) + : owner(owner_) { } + + void process() + { owner.sendNextFragmentToTranslation(); } + }; + + TranslationEvent translationEvent; + protected: + /** Number of fragments this request is split into */ + unsigned int numFragments; + + /** Number of fragments in the address translation mechanism */ + unsigned int numInTranslationFragments; + + /** Number of fragments that have completed address translation, + * (numTranslatedFragments + numInTranslationFragments) <= + * numFragments. When numTranslatedFramgents == numFragments, + * translation is complete */ + unsigned int numTranslatedFragments; + + /** Number of fragments already issued (<= numFragments) */ + unsigned int numIssuedFragments; + + /** Number of fragments retired back to this request */ + unsigned int numRetiredFragments; + + /** Fragment Requests corresponding to the address ranges of + * each fragment */ + std::vector<Request *> fragmentRequests; + + /** Packets matching fragmentRequests to issue fragments to memory */ + std::vector<Packet *> fragmentPackets; + + protected: + /** TLB response interface */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + public: + SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_ = NULL, + uint64_t *res_ = NULL); + + ~SplitDataRequest(); + + public: + /** Make all the Requests for this transfer's fragments so that those + * requests can be sent for address translation */ + void makeFragmentRequests(); + + /** Make the packets to go with the requests so they can be sent to + * the memory system */ + void makeFragmentPackets(); + + /** Start a loop of do { sendNextFragmentToTranslation ; + * translateTiming ; finish } while (numTranslatedFragments != + * numFragments) to complete all this requests' fragments' address + * translations */ + void startAddrTranslation(); + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket(); + + /** Step on numIssuedFragments */ + void stepToNextPacket(); + + bool hasPacketsInMemSystem() + { return numIssuedFragments != numRetiredFragments; } + + /** Have we stepped past the end of fragmentPackets? */ + bool sentAllPackets() { return numIssuedFragments == numFragments; } + + /** For loads, paste the response data into the main + * response packet */ + void retireResponse(PacketPtr packet_); + + /** Part of the address translation loop, see startAddTranslation */ + void sendNextFragmentToTranslation(); + }; + + /** Store buffer. This contains stores which have been committed + * but whose memory transfers have not yet been issued. Load data + * can be forwarded out of the store buffer */ + class StoreBuffer : public Named + { + public: + /** My owner */ + LSQ &lsq; + + /** Number of slots, this is a bound on the size of slots */ + const unsigned int numSlots; + + /** Maximum number of stores that can be issued per cycle */ + const unsigned int storeLimitPerCycle; + + public: + /** Queue of store requests on their way to memory */ + std::deque<LSQRequestPtr> slots; + + /** Number of occupied slots which have not yet issued a + * memory access */ + unsigned int numUnissuedAccesses; + + public: + StoreBuffer(std::string name_, LSQ &lsq_, + unsigned int store_buffer_size, + unsigned int store_limit_per_cycle); + + public: + /** Can a new request be inserted into the queue? */ + bool canInsert() const; + + /** Delete the given request and free the slot it occupied */ + void deleteRequest(LSQRequestPtr request); + + /** Insert a request at the back of the queue */ + void insert(LSQRequestPtr request); + + /** Look for a store which satisfies the given load. Returns an + * indication whether the forwarding request can be wholly, + * partly or not all all satisfied. If the request can be + * wholly satisfied, the store buffer slot number which can be used + * is returned in found_slot */ + AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request, + unsigned int &found_slot); + + /** Fill the given packet with appropriate date from slot + * slot_number */ + void forwardStoreData(LSQRequestPtr load, unsigned int slot_number); + + /** Number of stores in the store buffer which have not been + * completely issued to the memory system */ + unsigned int numUnissuedStores() { return numUnissuedAccesses; } + + /** Drained if there is absolutely nothing left in the buffer */ + bool isDrained() const { return slots.empty(); } + + /** Try to issue more stores to memory */ + void step(); + + /** Report queue contents for MinorTrace */ + void minorTrace() const; + }; + + protected: + /** Most recent execSeqNum of a memory barrier instruction or + * 0 if there are no in-flight barriers. Useful as a + * dependency for early-issued memory operations */ + InstSeqNum lastMemBarrier; + + public: + /** Retry state of last issued memory transfer */ + MemoryState state; + + /** Maximum number of in-flight accesses issued to the memory system */ + const unsigned int inMemorySystemLimit; + + /** Memory system access width (and snap) in bytes */ + const unsigned int lineWidth; + + public: + /** The LSQ consists of three queues: requests, transfers and the + * store buffer storeBuffer. */ + + typedef Queue<LSQRequestPtr, + ReportTraitsPtrAdaptor<LSQRequestPtr>, + NoBubbleTraits<LSQRequestPtr> > + LSQQueue; + + /** requests contains LSQRequests which have been issued to the TLB by + * calling ExecContext::readMem/writeMem (which in turn calls + * LSQ::pushRequest and LSQRequest::startAddrTranslation). Once they + * have a physical address, requests at the head of requests can be + * issued to the memory system. At this stage, it cannot be clear that + * memory accesses *must* happen (that there are no preceding faults or + * changes of flow of control) and so only cacheable reads are issued + * to memory. + * Cacheable stores are not issued at all (and just pass through + * 'transfers' in order) and all other transfers are stalled in requests + * until their corresponding instructions are at the head of the + * inMemInsts instruction queue and have the right streamSeqNum. */ + LSQQueue requests; + + /** Once issued to memory (or, for stores, just had their + * state changed to StoreToStoreBuffer) LSQRequests pass through + * transfers waiting for memory responses. At the head of transfers, + * Execute::commitInst can pick up the memory response for a request + * using LSQ::findResponse. Responses to be committed can then + * have ExecContext::completeAcc on them. Stores can then be pushed + * into the store buffer. All other transfers will then be complete. */ + LSQQueue transfers; + + /* The store buffer contains committed cacheable stores on + * their way to memory decoupled from subsequence instruction execution. + * Before trying to issue a cacheable read from 'requests' to memory, + * the store buffer is checked to see if a previous store contains the + * needed data (StoreBuffer::canForwardDataToLoad) which can be + * forwarded in lieu of a memory access. If there are outstanding + * stores in the transfers queue, they must be promoted to the store + * buffer (and so be commited) before they can be correctly checked + * for forwarding. */ + StoreBuffer storeBuffer; + + protected: + /** Count of the number of mem. accesses which have left the + * requests queue and are in the 'wild' in the memory system. */ + unsigned int numAccessesInMemorySystem; + + /** Number of requests in the DTLB in the requests queue */ + unsigned int numAccessesInDTLB; + + /** The number of stores in the transfers queue. Useful when + * testing if the store buffer contains all the forwardable stores */ + unsigned int numStoresInTransfers; + + /** The number of accesses which have been issued to the memory + * system but have not been committed/discarded *excluding* + * cacheable normal loads which don't need to be tracked */ + unsigned int numAccessesIssuedToMemory; + + /** The request (from either requests or the store buffer) which is + * currently waiting have its memory access retried */ + LSQRequestPtr retryRequest; + + /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */ + Addr cacheBlockMask; + + protected: + /** Try and issue a memory access for a translated request at the + * head of the requests queue. Also tries to move the request + * between queues */ + void tryToSendToTransfers(LSQRequestPtr request); + + /** Try to send (or resend) a memory request's next/only packet to + * the memory system. Returns true if the request was successfully + * sent to memory (and was also the last packet in a transfer) */ + bool tryToSend(LSQRequestPtr request); + + /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */ + void clearMemBarrier(MinorDynInstPtr inst); + + /** Move a request between queues */ + void moveFromRequestsToTransfers(LSQRequestPtr request); + + /** Can a request be sent to the memory system */ + bool canSendToMemorySystem(); + + public: + LSQ(std::string name_, std::string dcache_port_name_, + MinorCPU &cpu_, Execute &execute_, + unsigned int max_accesses_in_memory_system, unsigned int line_width, + unsigned int requests_queue_size, unsigned int transfers_queue_size, + unsigned int store_buffer_size, + unsigned int store_buffer_cycle_store_limit); + + virtual ~LSQ(); + + public: + /** Step checks the queues to see if their are issuable transfers + * which were not otherwise picked up by tests at the end of other + * events. + * + * Steppable actions include deferred actions which couldn't be + * cascaded on the end of a memory response/TLB response event + * because of resource congestion. */ + void step(); + + /** Is their space in the request queue to be able to push a request by + * issuing an isMemRef instruction */ + bool canRequest() { return requests.unreservedRemainingSpace() != 0; } + + /** Returns a response if it's at the head of the transfers queue and + * it's either complete or can be sent on to the store buffer. After + * calling, the request still remains on the transfer queue until + * popResponse is called */ + LSQRequestPtr findResponse(MinorDynInstPtr inst); + + /** Sanity check and pop the head response */ + void popResponse(LSQRequestPtr response); + + /** Must check this before trying to insert into the store buffer */ + bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); } + + /** A store has been committed, please move it to the store buffer */ + void sendStoreToStoreBuffer(LSQRequestPtr request); + + /** Are there any accesses other than normal cached loads in the + * memory system or having received responses which need to be + * handled for their instruction's to be completed */ + bool accessesInFlight() const + { return numAccessesIssuedToMemory != 0; } + + /** A memory barrier instruction has been issued, remember its + * execSeqNum that we can avoid issuing memory ops until it is + * committed */ + void issuedMemBarrierInst(MinorDynInstPtr inst); + + /** Get the execSeqNum of the last issued memory barrier */ + InstSeqNum getLastMemBarrier() const { return lastMemBarrier; } + + /** Is there nothing left in the LSQ */ + bool isDrained(); + + /** May need to be ticked next cycle as one of the queues contains + * an actionable transfers or address translation */ + bool needsToTick(); + + /** Complete a barrier instruction. Where committed, makes a + * BarrierDataRequest and pushed it into the store buffer */ + void completeMemBarrierInst(MinorDynInstPtr inst, + bool committed); + + /** Single interface for readMem/writeMem to issue requests into + * the LSQ */ + void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, unsigned int flags, uint64_t *res); + + /** Push a predicate failed-representing request into the queues just + * to maintain commit order */ + void pushFailedRequest(MinorDynInstPtr inst); + + /** Memory interface */ + bool recvTimingResp(PacketPtr pkt); + void recvRetry(); + void recvTimingSnoopReq(PacketPtr pkt); + + /** Return the raw-bindable port */ + MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; } + + void minorTrace() const; +}; + +/** Make a suitable packet for the given request. If the request is a store, + * data will be the payload data. If sender_state is NULL, it won't be + * pushed into the packet as senderState */ +PacketPtr makePacketForRequest(Request &request, bool isLoad, + Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL); +} + +#endif /* __CPU_MINOR_NEW_LSQ_HH__ */ |