diff options
author | Kevin Lim <ktlim@umich.edu> | 2006-06-02 18:15:20 -0400 |
---|---|---|
committer | Kevin Lim <ktlim@umich.edu> | 2006-06-02 18:15:20 -0400 |
commit | 7940c10ace28d5b93a61d4d278e6647e0c497149 (patch) | |
tree | 902ac028a61bbcc93485d8efc6e7ba5fe8c47d00 /src/cpu/ozone/lw_lsq.hh | |
parent | d4b73086b6b0856c28433b55c8dd5c7b56a1b6df (diff) | |
download | gem5-7940c10ace28d5b93a61d4d278e6647e0c497149.tar.xz |
Fixes to get compiling to work. This is mainly fixing up some includes; changing functions within the XCs; changing MemReqPtrs to Requests or Packets where appropriate.
Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly. This check-in is so that the merge between m5 and newmem is no longer outstanding.
src/SConscript:
Need to include FU Pool for new CPU model. I'll try to figure out a cleaner way to handle this in the future.
src/base/traceflags.py:
Include new traces flags, fix up merge mess up.
src/cpu/SConscript:
Include the base_dyn_inst.cc as one of othe sources.
Don't compile the Ozone CPU for now.
src/cpu/base.cc:
Remove an extra } from the merge.
src/cpu/base_dyn_inst.cc:
Fixes to make compiling work. Don't instantiate the OzoneCPU for now.
src/cpu/base_dyn_inst.hh:
src/cpu/o3/2bit_local_pred.cc:
src/cpu/o3/alpha_cpu_builder.cc:
src/cpu/o3/alpha_cpu_impl.hh:
src/cpu/o3/alpha_dyn_inst.hh:
src/cpu/o3/alpha_params.hh:
src/cpu/o3/bpred_unit.cc:
src/cpu/o3/btb.hh:
src/cpu/o3/commit.hh:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/cpu.hh:
src/cpu/o3/fetch.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/free_list.hh:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/inst_queue_impl.hh:
src/cpu/o3/regfile.hh:
src/cpu/o3/sat_counter.hh:
src/cpu/op_class.hh:
src/cpu/ozone/cpu.hh:
src/cpu/checker/cpu.cc:
src/cpu/checker/cpu.hh:
src/cpu/checker/exec_context.hh:
src/cpu/checker/o3_cpu_builder.cc:
src/cpu/ozone/cpu_impl.hh:
src/mem/request.hh:
src/cpu/o3/fu_pool.hh:
src/cpu/o3/lsq.hh:
src/cpu/o3/lsq_unit.hh:
src/cpu/o3/lsq_unit_impl.hh:
src/cpu/o3/thread_state.hh:
src/cpu/ozone/back_end.hh:
src/cpu/ozone/dyn_inst.cc:
src/cpu/ozone/dyn_inst.hh:
src/cpu/ozone/front_end.hh:
src/cpu/ozone/inorder_back_end.hh:
src/cpu/ozone/lw_back_end.hh:
src/cpu/ozone/lw_lsq.hh:
src/cpu/ozone/ozone_impl.hh:
src/cpu/ozone/thread_state.hh:
Fixes to get compiling to work.
src/cpu/o3/alpha_cpu.hh:
Fixes to get compiling to work.
Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs.
src/cpu/o3/alpha_dyn_inst_impl.hh:
Fixes to get compiling to work.
Pass in the packet to the completeAcc function.
Fix up syscall function.
--HG--
rename : cpu/activity.cc => src/cpu/activity.cc
rename : cpu/activity.hh => src/cpu/activity.hh
rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc
rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh
rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc
rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh
rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc
rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh
rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc
rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh
rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc
rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh
rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh
rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc
rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh
rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh
rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc
rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh
rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh
rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc
rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh
rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh
rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc
rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc
rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh
rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh
rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc
rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh
rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh
rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc
rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh
rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh
rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc
rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh
rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh
rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc
rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh
rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh
rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc
rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh
rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh
rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc
rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh
rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh
rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh
rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh
rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc
rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh
rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh
rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh
rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh
rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh
rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc
rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh
rename : cpu/thread_state.hh => src/cpu/thread_state.hh
rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py
rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py
rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py
extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8
Diffstat (limited to 'src/cpu/ozone/lw_lsq.hh')
-rw-r--r-- | src/cpu/ozone/lw_lsq.hh | 656 |
1 files changed, 656 insertions, 0 deletions
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh new file mode 100644 index 000000000..e1488dd6f --- /dev/null +++ b/src/cpu/ozone/lw_lsq.hh @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LW_LSQ_HH__ +#define __CPU_OZONE_LW_LSQ_HH__ + +#include <list> +#include <map> +#include <queue> +#include <algorithm> + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +//#include "mem/page_table.hh" +#include "sim/debug.hh" +#include "sim/sim_object.hh" + +//class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template <class Impl> +class OzoneLWLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, + Event *wb_event, OzoneLWLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + DynInstPtr inst; + + BackEnd *be; + /** The writeback event for the store. Needed for store + * conditionals. + */ + public: + Event *wbEvent; + bool miss; + private: + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ<Impl> *lsqPtr; + }; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLWLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue.back().canWB && + !storeQueue.back().completed/* && + !dcacheInterface->isBlocked()*/; } + + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + MemObject *mem; + + class DcachePort : public Port + { + protected: + FullCPU *cpu; + + public: + DcachePort(const std::string &_name, FullCPU *_cpu) + : Port(_name), cpu(_cpu) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + + /** Pointer to the D-cache. */ + DcachePort dcachePort; + + /** Pointer to the page table. */ +// PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + RequestPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + + typename std::list<DynInstPtr>::iterator lqIt; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLWLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ + std::list<SQEntry> storeQueue; + /** The load queue. */ + std::list<DynInstPtr> loadQueue; + + typedef typename std::list<SQEntry>::iterator SQIt; + typedef typename std::list<DynInstPtr>::iterator LQIt; + + + struct HashFn { + size_t operator() (const int a) const + { + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; + } + }; + + m5::hash_map<int, SQIt, HashFn> SQItHash; + std::queue<int> SQIndices; + m5::hash_map<int, LQIt, HashFn> LQItHash; + std::queue<int> LQIndices; + + typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt; + typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt; + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + + int storesToWB; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list<InstSeqNum> mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + LQIt stallingLoad; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template <class T> + Fault read(RequestPtr req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template <class T> + Fault write(RequestPtr req, T &data, int store_idx); + + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (!loadQueue.empty()) { + return loadQueue.back()->seqNum; + } else { + return 0; + } + + } + + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (!storeQueue.empty()) { + return storeQueue.back().inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template <class Impl> +template <class T> +Fault +OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + typename m5::hash_map<int, LQIt, HashFn>::iterator + lq_hash_it = LQItHash.find(load_idx); + assert(lq_hash_it != LQItHash.end()); + DynInstPtr inst = (*(*lq_hash_it).second); + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->getFlags() & UNCACHEABLE && + (inst != loadQueue.back() || !inst->reachedCommit)) { + DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " + "commit/LSQ!\n", + inst->seqNum); + be->rescheduleMemInst(inst); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + SQIt sq_it = storeQueue.begin(); + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n", + load_idx, req->getPaddr()); + + while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum) + ++sq_it; + + while (1) { + // End once we've reached the top of the LSQ + if (sq_it == storeQueue.end()) { + break; + } + + assert((*sq_it).inst); + + store_size = (*sq_it).size; + + if (store_size == 0) { + sq_it++; + continue; + } + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->getVaddr() >= (*sq_it).inst->effAddr; + bool store_has_upper_limit = + (req->getVaddr() + req->getSize()) <= ((*sq_it).inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->getVaddr() < ((*sq_it).inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->getVaddr() + req->getSize()) > (*sq_it).inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + int shift_amt = req->getVaddr() & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = (*sq_it).data >> shift_amt; + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + + memcpy(inst->memData, &data, req->getSize()); + + DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " + "[sn:%lli] addr %#x, data %#x\n", + (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData)); +/* + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); +*/ + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if ((*sq_it).completed) { + sq_it++; + break; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + inst->seqNum < + (*stallingLoad)->seqNum)) { + stalled = true; + stallingStoreIsn = (*sq_it).inst->seqNum; + stallingLoad = (*lq_hash_it).second; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(inst); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store [sn:%lli] to load addr %#x\n", + (*sq_it).inst->seqNum, req->vaddr); + + return NoFault; + } + sq_it++; + } + + // If there's no forwarding case, then go access memory + DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", + inst->readPC()); + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + + ++usedPorts; + + DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n", + inst->readPC()); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + // if we have a cache, do cache access too + if (!dcachePort.sendTiming(data_pkt)) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) + return NoFault; + + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + if (data_pkt->result != Packet::Success) { + DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + } else { + DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n"); + DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", + inst->seqNum); + } + + return NoFault; +} + +template <class Impl> +template <class T> +Fault +OzoneLWLSQ<Impl>::write(RequestPtr req, T &data, int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + + SQIt sq_it = (*sq_hash_it).second; + assert((*sq_it).inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | [sn:%lli]\n", + store_idx, req->getPaddr(), data, (*sq_it).inst->seqNum); + + (*sq_it).req = req; + (*sq_it).size = sizeof(T); + (*sq_it).data = data; +/* + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); +*/ + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_OZONE_LW_LSQ_HH__ |