/*
 * Copyright (c) 2013-2014 ARM Limited
 * All rights reserved
 *
 * The license below extends only to copyright in the software and shall
 * not be construed as granting a license to any other intellectual
 * property including but not limited to intellectual property relating
 * to a hardware implementation of the functionality of the software
 * licensed hereunder.  You may use the software subject to the license
 * terms below provided that you ensure that this notice is replicated
 * unmodified and in its entirety in all distributions of the software,
 * modified or unmodified, in source code or in binary form.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Authors: Andrew Bardsley
 */

/**
 * @file
 *
 *  A load/store queue that allows outstanding reads and writes.
 *
 */

#ifndef __CPU_MINOR_NEW_LSQ_HH__
#define __CPU_MINOR_NEW_LSQ_HH__

#include "cpu/minor/buffers.hh"
#include "cpu/minor/cpu.hh"
#include "cpu/minor/pipe_data.hh"
#include "cpu/minor/trace.hh"

namespace Minor
{

/* Forward declaration */
class Execute;

class LSQ : public Named
{
  protected:
    /** My owner(s) */
    MinorCPU &cpu;
    Execute &execute;

  protected:
    /** State of memory access for head access. */
    enum MemoryState
    {
        MemoryRunning, /* Default. Step dcache queues when possible. */
        MemoryNeedsRetry /* Request rejected, will be asked to retry */
    };

    /** Print MemoryState values as shown in the enum definition */
    friend std::ostream &operator <<(std::ostream &os,
        MemoryState state);

    /** Coverage of one address range with another */
    enum AddrRangeCoverage
    {
        PartialAddrRangeCoverage, /* Two ranges partly overlap */
        FullAddrRangeCoverage, /* One range fully covers another */
        NoAddrRangeCoverage /* Two ranges are disjoint */
    };

    /** Exposable data port */
    class DcachePort : public MinorCPU::MinorCPUPort
    {
      protected:
        /** My owner */
        LSQ &lsq;

      public:
        DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) :
            MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_)
        { }

      protected:
        bool recvTimingResp(PacketPtr pkt) override
        { return lsq.recvTimingResp(pkt); }

        void recvReqRetry() override { lsq.recvReqRetry(); }

        bool isSnooping() const override { return true; }

        void recvTimingSnoopReq(PacketPtr pkt) override
        { return lsq.recvTimingSnoopReq(pkt); }

        void recvFunctionalSnoop(PacketPtr pkt) override { }
    };

    DcachePort dcachePort;

  public:
    /** Derived SenderState to carry data access info. through address
     *  translation, the queues in this port and back from the memory
     *  system. */
    class LSQRequest :
        public BaseTLB::Translation, /* For TLB lookups */
        public Packet::SenderState /* For packing into a Packet */
    {
      public:
        /** Owning port */
        LSQ &port;

        /** Instruction which made this request */
        MinorDynInstPtr inst;

        /** Load/store indication used for building packet.  This isn't
         *  carried by Request so we need to keep it here */
        bool isLoad;

        /** Dynamically allocated and populated data carried for
         *  building write packets */
        PacketDataPtr data;

        /* Requests carry packets on their way to the memory system.
         *  When a Packet returns from the memory system, its
         *  request needs to have its packet updated as this
         *  may have changed in flight */
        PacketPtr packet;

        /** The underlying request of this LSQRequest */
        RequestPtr request;

        /** Fault generated performing this request */
        Fault fault;

        /** Res from pushRequest */
        uint64_t *res;

        /** Was skipped.  Set to indicate any reason (faulted, bad
         *  stream sequence number, in a fault shadow) that this
         *  request did not perform a memory transfer */
        bool skipped;

        /** This in an access other than a normal cacheable load
         *  that's visited the memory system */
        bool issuedToMemory;

        enum LSQRequestState
        {
            NotIssued, /* Newly created */
            InTranslation, /* TLB accessed, no reply yet */
            Translated, /* Finished address translation */
            Failed, /* The starting start of FailedDataRequests */
            RequestIssuing, /* Load/store issued to memory in the requests
                queue */
            StoreToStoreBuffer, /* Store in transfers on its way to the
                store buffer */
            RequestNeedsRetry, /* Retry needed for load */
            StoreInStoreBuffer, /* Store in the store buffer, before issuing
                a memory transfer */
            StoreBufferIssuing, /* Store in store buffer and has been
                issued */
            StoreBufferNeedsRetry, /* Retry needed for store */
            /* All completed states.  Includes
                completed loads, TLB faults and skipped requests whose
                seqNum's no longer match */
            Complete
        };

        LSQRequestState state;

      protected:
        /** BaseTLB::Translation interface */
        void markDelayed() { }

      public:
        LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
            PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);

        virtual ~LSQRequest();

      public:
        /** Make a packet to use with the memory transaction */
        void makePacket();

        /** Was no memory access attempted for this request? */
        bool skippedMemAccess() { return skipped; }

        /** Set this request as having been skipped before a memory
         *  transfer was attempt */
        void setSkipped() { skipped = true; }

        /** Does address range req1 (req1_addr to req1_addr + req1_size - 1)
         *  fully cover, partially cover or not cover at all the range req2 */
        static AddrRangeCoverage containsAddrRangeOf(
            Addr req1_addr, unsigned int req1_size,
            Addr req2_addr, unsigned int req2_size);

        /** Does this request's address range fully cover the range
         *  of other_request? */
        AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request);

        /** Start the address translation process for this request.  This
         *  will issue a translation request to the TLB. */
        virtual void startAddrTranslation() = 0;

        /** Get the next packet to issue for this request.  For split
         *  transfers, it will be necessary to step through the available
         *  packets by calling do { getHeadPacket ; stepToNextPacket } while
         *  (!sentAllPackets) and by retiring response using retireResponse */
        virtual PacketPtr getHeadPacket() = 0;

        /** Step to the next packet for the next call to getHeadPacket */
        virtual void stepToNextPacket() = 0;

        /** Have all packets been sent? */
        virtual bool sentAllPackets() = 0;

        /** True if this request has any issued packets in the memory
         *  system and so can't be interrupted until it gets responses */
        virtual bool hasPacketsInMemSystem() = 0;

        /** Retire a response packet into the LSQRequest packet possibly
         *  completing this transfer */
        virtual void retireResponse(PacketPtr packet_) = 0;

        /** Is this a request a barrier? */
        virtual bool isBarrier();

        /** This request, once processed by the requests/transfers
         *  queues, will need to go to the store buffer */
        bool needsToBeSentToStoreBuffer();

        /** Set state and output trace output */
        void setState(LSQRequestState new_state);

        /** Has this request been completed.  This includes *all* reasons
         *  for completion: successful transfers, faults, skipped because
         *  of preceding faults */
        bool isComplete() const;

        /** MinorTrace report interface */
        void reportData(std::ostream &os) const;
    };

    typedef LSQRequest *LSQRequestPtr;

    friend std::ostream & operator <<(std::ostream &os,
        AddrRangeCoverage state);

    friend std::ostream & operator <<(std::ostream &os,
        LSQRequest::LSQRequestState state);

  protected:
    /** Special request types that don't actually issue memory requests */
    class SpecialDataRequest : public LSQRequest
    {
      protected:
        /** TLB interace */
        void finish(const Fault &fault_, const RequestPtr &request_,
                    ThreadContext *tc, BaseTLB::Mode mode)
        { }

      public:
        /** Send single translation request */
        void startAddrTranslation() { }

        /** Get the head packet as counted by numIssuedFragments */
        PacketPtr getHeadPacket()
        { fatal("No packets in a SpecialDataRequest"); }

        /** Step on numIssuedFragments */
        void stepToNextPacket() { }

        /** Has no packets to send */
        bool sentAllPackets() { return true; }

        /** Never sends any requests */
        bool hasPacketsInMemSystem() { return false; }

        /** Keep the given packet as the response packet
         *  LSQRequest::packet */
        void retireResponse(PacketPtr packet_) { }

      public:
        SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
            /* Say this is a load, not actually relevant */
            LSQRequest(port_, inst_, true, NULL, 0)
        { }
    };

    /** FailedDataRequest represents requests from instructions that
     *  failed their predicates but need to ride the requests/transfers
     *  queues to maintain trace ordering */
    class FailedDataRequest : public SpecialDataRequest
    {
      public:
        FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
            SpecialDataRequest(port_, inst_)
        { state = Failed; }
    };

    /** Request for doing barrier accounting in the store buffer.  Not
     *  for use outside that unit */
    class BarrierDataRequest : public SpecialDataRequest
    {
      public:
        bool isBarrier() { return true; }

      public:
        BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
            SpecialDataRequest(port_, inst_)
        { state = Complete; }
    };

    /** SingleDataRequest is used for requests that don't fragment */
    class SingleDataRequest : public LSQRequest
    {
      protected:
        /** TLB interace */
        void finish(const Fault &fault_, const RequestPtr &request_,
                    ThreadContext *tc, BaseTLB::Mode mode);

        /** Has my only packet been sent to the memory system but has not
         *  yet been responded to */
        bool packetInFlight;

        /** Has the packet been at least sent to the memory system? */
        bool packetSent;

      public:
        /** Send single translation request */
        void startAddrTranslation();

        /** Get the head packet as counted by numIssuedFragments */
        PacketPtr getHeadPacket() { return packet; }

        /** Remember that the packet has been sent */
        void stepToNextPacket() { packetInFlight = true; packetSent = true; }

        /** Has packet been sent */
        bool hasPacketsInMemSystem() { return packetInFlight; }

        /** packetInFlight can become false again, so need to check
         *  packetSent */
        bool sentAllPackets() { return packetSent; }

        /** Keep the given packet as the response packet
         *  LSQRequest::packet */
        void retireResponse(PacketPtr packet_);

      public:
        SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_,
            bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) :
            LSQRequest(port_, inst_, isLoad_, data_, res_),
            packetInFlight(false),
            packetSent(false)
        { }
    };

    class SplitDataRequest : public LSQRequest
    {
      protected:
        /** Event to step between translations */
        EventFunctionWrapper translationEvent;
      protected:
        /** Number of fragments this request is split into */
        unsigned int numFragments;

        /** Number of fragments in the address translation mechanism */
        unsigned int numInTranslationFragments;

        /** Number of fragments that have completed address translation,
         *  (numTranslatedFragments + numInTranslationFragments) <=
         *  numFragments.  When numTranslatedFramgents == numFragments,
         *  translation is complete */
        unsigned int numTranslatedFragments;

        /** Number of fragments already issued (<= numFragments) */
        unsigned int numIssuedFragments;

        /** Number of fragments retired back to this request */
        unsigned int numRetiredFragments;

        /** Fragment Requests corresponding to the address ranges of
         *  each fragment */
        std::vector<RequestPtr> fragmentRequests;

        /** Packets matching fragmentRequests to issue fragments to memory */
        std::vector<Packet *> fragmentPackets;

      protected:
        /** TLB response interface */
        void finish(const Fault &fault_, const RequestPtr &request_,
                    ThreadContext *tc, BaseTLB::Mode mode);

      public:
        SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
            bool isLoad_, PacketDataPtr data_ = NULL,
            uint64_t *res_ = NULL);

        ~SplitDataRequest();

      public:
        /** Make all the Requests for this transfer's fragments so that those
         *  requests can be sent for address translation */
        void makeFragmentRequests();

        /** Make the packets to go with the requests so they can be sent to
         *  the memory system */
        void makeFragmentPackets();

        /** Start a loop of do { sendNextFragmentToTranslation ;
         *  translateTiming ; finish } while (numTranslatedFragments !=
         *  numFragments) to complete all this requests' fragments' address
         *  translations */
        void startAddrTranslation();

        /** Get the head packet as counted by numIssuedFragments */
        PacketPtr getHeadPacket();

        /** Step on numIssuedFragments */
        void stepToNextPacket();

        bool hasPacketsInMemSystem()
        { return numIssuedFragments != numRetiredFragments; }

        /** Have we stepped past the end of fragmentPackets? */
        bool sentAllPackets() { return numIssuedFragments == numFragments; }

        /** For loads, paste the response data into the main
         *  response packet */
        void retireResponse(PacketPtr packet_);

        /** Part of the address translation loop, see startAddTranslation */
        void sendNextFragmentToTranslation();
    };

    /** Store buffer.  This contains stores which have been committed
     *  but whose memory transfers have not yet been issued. Load data
     *  can be forwarded out of the store buffer */
    class StoreBuffer : public Named
    {
      public:
        /** My owner */
        LSQ &lsq;

        /** Number of slots, this is a bound on the size of slots */
        const unsigned int numSlots;

        /** Maximum number of stores that can be issued per cycle */
        const unsigned int storeLimitPerCycle;

      public:
        /** Queue of store requests on their way to memory */
        std::deque<LSQRequestPtr> slots;

        /** Number of occupied slots which have not yet issued a
         *  memory access */
        unsigned int numUnissuedAccesses;

      public:
        StoreBuffer(std::string name_, LSQ &lsq_,
            unsigned int store_buffer_size,
            unsigned int store_limit_per_cycle);

      public:
        /** Can a new request be inserted into the queue? */
        bool canInsert() const;

        /** Delete the given request and free the slot it occupied */
        void deleteRequest(LSQRequestPtr request);

        /** Insert a request at the back of the queue */
        void insert(LSQRequestPtr request);

        /** Look for a store which satisfies the given load.  Returns an
         *  indication whether the forwarding request can be wholly,
         *  partly or not all all satisfied.  If the request can be
         *  wholly satisfied, the store buffer slot number which can be used
         *  is returned in found_slot */
        AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request,
            unsigned int &found_slot);

        /** Fill the given packet with appropriate date from slot
         *  slot_number */
        void forwardStoreData(LSQRequestPtr load, unsigned int slot_number);

        /** Number of stores in the store buffer which have not been
         *  completely issued to the memory system */
        unsigned int numUnissuedStores() { return numUnissuedAccesses; }

        /** Count a store being issued to memory by decrementing
         *  numUnissuedAccesses.  Does not count barrier requests as they
         *  will be handles as barriers are cleared from the buffer */
        void countIssuedStore(LSQRequestPtr request);

        /** Drained if there is absolutely nothing left in the buffer */
        bool isDrained() const { return slots.empty(); }

        /** Try to issue more stores to memory */
        void step();

        /** Report queue contents for MinorTrace */
        void minorTrace() const;
    };

  protected:
    /** Most recent execSeqNum of a memory barrier instruction or
     *  0 if there are no in-flight barriers.  Useful as a
     *  dependency for early-issued memory operations */
    std::vector<InstSeqNum> lastMemBarrier;

  public:
    /** Retry state of last issued memory transfer */
    MemoryState state;

    /** Maximum number of in-flight accesses issued to the memory system */
    const unsigned int inMemorySystemLimit;

    /** Memory system access width (and snap) in bytes */
    const unsigned int lineWidth;

  public:
    /** The LSQ consists of three queues: requests, transfers and the
     *  store buffer storeBuffer. */

    typedef Queue<LSQRequestPtr,
        ReportTraitsPtrAdaptor<LSQRequestPtr>,
        NoBubbleTraits<LSQRequestPtr> >
        LSQQueue;

    /** requests contains LSQRequests which have been issued to the TLB by
     *  calling ExecContext::readMem/writeMem (which in turn calls
     *  LSQ::pushRequest and LSQRequest::startAddrTranslation).  Once they
     *  have a physical address, requests at the head of requests can be
     *  issued to the memory system.  At this stage, it cannot be clear that
     *  memory accesses *must* happen (that there are no preceding faults or
     *  changes of flow of control) and so only cacheable reads are issued
     *  to memory.
     *  Cacheable stores are not issued at all (and just pass through
     *  'transfers' in order) and all other transfers are stalled in requests
     *  until their corresponding instructions are at the head of the
     *  inMemInsts instruction queue and have the right streamSeqNum. */
    LSQQueue requests;

    /** Once issued to memory (or, for stores, just had their
     *  state changed to StoreToStoreBuffer) LSQRequests pass through
     *  transfers waiting for memory responses.  At the head of transfers,
     *  Execute::commitInst can pick up the memory response for a request
     *  using LSQ::findResponse.  Responses to be committed can then
     *  have ExecContext::completeAcc on them.  Stores can then be pushed
     *  into the store buffer.  All other transfers will then be complete. */
    LSQQueue transfers;

    /* The store buffer contains committed cacheable stores on
     * their way to memory decoupled from subsequence instruction execution.
     * Before trying to issue a cacheable read from 'requests' to memory,
     * the store buffer is checked to see if a previous store contains the
     * needed data (StoreBuffer::canForwardDataToLoad) which can be
     * forwarded in lieu of a memory access.  If there are outstanding
     * stores in the transfers queue, they must be promoted to the store
     * buffer (and so be commited) before they can be correctly checked
     * for forwarding. */
    StoreBuffer storeBuffer;

  protected:
    /** Count of the number of mem. accesses which have left the
     *  requests queue and are in the 'wild' in the memory system and who
     *  *must not* be interrupted as they are not normal cacheable
     *  accesses.  This is a count of the number of in-flight requests
     *  with issuedToMemory set who have visited tryToSendRequest at least
     *  once */
    unsigned int numAccessesInMemorySystem;

    /** Number of requests in the DTLB in the requests queue */
    unsigned int numAccessesInDTLB;

    /** The number of stores in the transfers queue.  Useful when
     *  testing if the store buffer contains all the forwardable stores */
    unsigned int numStoresInTransfers;

    /** The number of accesses which have been issued to the memory
     *  system but have not been committed/discarded *excluding*
     *  cacheable normal loads which don't need to be tracked */
    unsigned int numAccessesIssuedToMemory;

    /** The request (from either requests or the store buffer) which is
     *  currently waiting have its memory access retried */
    LSQRequestPtr retryRequest;

    /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
    Addr cacheBlockMask;

  protected:
    /** Try and issue a memory access for a translated request at the
     *  head of the requests queue.  Also tries to move the request
     *  between queues */
    void tryToSendToTransfers(LSQRequestPtr request);

    /** Try to send (or resend) a memory request's next/only packet to
     *  the memory system.  Returns true if the request was successfully
     *  sent to memory (and was also the last packet in a transfer) */
    bool tryToSend(LSQRequestPtr request);

    /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */
    void clearMemBarrier(MinorDynInstPtr inst);

    /** Move a request between queues */
    void moveFromRequestsToTransfers(LSQRequestPtr request);

    /** Can a request be sent to the memory system */
    bool canSendToMemorySystem();

    /** Snoop other threads monitors on memory system accesses */
    void threadSnoop(LSQRequestPtr request);

  public:
    LSQ(std::string name_, std::string dcache_port_name_,
        MinorCPU &cpu_, Execute &execute_,
        unsigned int max_accesses_in_memory_system, unsigned int line_width,
        unsigned int requests_queue_size, unsigned int transfers_queue_size,
        unsigned int store_buffer_size,
        unsigned int store_buffer_cycle_store_limit);

    virtual ~LSQ();

  public:
    /** Step checks the queues to see if their are issuable transfers
     *  which were not otherwise picked up by tests at the end of other
     *  events.
     *
     *  Steppable actions include deferred actions which couldn't be
     *  cascaded on the end of a memory response/TLB response event
     *  because of resource congestion. */
    void step();

    /** Is their space in the request queue to be able to push a request by
     *  issuing an isMemRef instruction */
    bool canRequest() { return requests.unreservedRemainingSpace() != 0; }

    /** Returns a response if it's at the head of the transfers queue and
     *  it's either complete or can be sent on to the store buffer.  After
     *  calling, the request still remains on the transfer queue until
     *  popResponse is called */
    LSQRequestPtr findResponse(MinorDynInstPtr inst);

    /** Sanity check and pop the head response */
    void popResponse(LSQRequestPtr response);

    /** Must check this before trying to insert into the store buffer */
    bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); }

    /** A store has been committed, please move it to the store buffer */
    void sendStoreToStoreBuffer(LSQRequestPtr request);

    /** Are there any accesses other than normal cached loads in the
     *  memory system or having received responses which need to be
     *  handled for their instruction's to be completed */
    bool accessesInFlight() const
    { return numAccessesIssuedToMemory != 0; }

    /** A memory barrier instruction has been issued, remember its
     *  execSeqNum that we can avoid issuing memory ops until it is
     *  committed */
    void issuedMemBarrierInst(MinorDynInstPtr inst);

    /** Get the execSeqNum of the last issued memory barrier */
    InstSeqNum getLastMemBarrier(ThreadID thread_id) const
    { return lastMemBarrier[thread_id]; }

    /** Is there nothing left in the LSQ */
    bool isDrained();

    /** May need to be ticked next cycle as one of the queues contains
     *  an actionable transfers or address translation */
    bool needsToTick();

    /** Complete a barrier instruction.  Where committed, makes a
     *  BarrierDataRequest and pushed it into the store buffer */
    void completeMemBarrierInst(MinorDynInstPtr inst,
        bool committed);

    /** Single interface for readMem/writeMem to issue requests into
     *  the LSQ */
    void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
                     unsigned int size, Addr addr, Request::Flags flags,
                     uint64_t *res);

    /** Push a predicate failed-representing request into the queues just
     *  to maintain commit order */
    void pushFailedRequest(MinorDynInstPtr inst);

    /** Memory interface */
    bool recvTimingResp(PacketPtr pkt);
    void recvReqRetry();
    void recvTimingSnoopReq(PacketPtr pkt);

    /** Return the raw-bindable port */
    MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; }

    void minorTrace() const;
};

/** Make a suitable packet for the given request.  If the request is a store,
 *  data will be the payload data.  If sender_state is NULL, it won't be
 *  pushed into the packet as senderState */
PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad,
    Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL);
}

#endif /* __CPU_MINOR_NEW_LSQ_HH__ */