diff options
author | Kevin Lim <ktlim@umich.edu> | 2006-05-30 14:17:41 -0400 |
---|---|---|
committer | Kevin Lim <ktlim@umich.edu> | 2006-05-30 14:17:41 -0400 |
commit | 4a5b51b516853c9fcaabc44caacdd7e8e93dc0ef (patch) | |
tree | 4b7d92408a2b74a16ae6f7b4167ded00079355ef /cpu/o3 | |
parent | d308055afc1ace1f321b76e8a85a9a45165da2ce (diff) | |
parent | f1fab2a4469d6cb2e55ebac15da02f8c1fcb7055 (diff) | |
download | gem5-4a5b51b516853c9fcaabc44caacdd7e8e93dc0ef.tar.xz |
Merge ktlim@zizzer:/bk/m5
into zamp.eecs.umich.edu:/z/ktlim2/clean/newmem
SConstruct:
src/SConscript:
src/arch/SConscript:
src/arch/alpha/faults.cc:
src/arch/alpha/tlb.cc:
src/base/traceflags.py:
src/cpu/SConscript:
src/cpu/base.cc:
src/cpu/base.hh:
src/cpu/base_dyn_inst.cc:
src/cpu/cpu_exec_context.cc:
src/cpu/cpu_exec_context.hh:
src/cpu/exec_context.hh:
src/cpu/o3/alpha_cpu.hh:
src/cpu/o3/alpha_cpu_impl.hh:
src/cpu/o3/alpha_dyn_inst.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/cpu.hh:
src/cpu/o3/regfile.hh:
src/cpu/ozone/cpu.hh:
src/cpu/simple/base.cc:
src/cpu/base_dyn_inst.hh:
src/cpu/o3/2bit_local_pred.cc:
src/cpu/o3/2bit_local_pred.hh:
src/cpu/o3/alpha_cpu.cc:
src/cpu/o3/alpha_cpu_builder.cc:
src/cpu/o3/alpha_dyn_inst.cc:
src/cpu/o3/alpha_dyn_inst_impl.hh:
src/cpu/o3/alpha_impl.hh:
src/cpu/o3/alpha_params.hh:
src/cpu/o3/bpred_unit.cc:
src/cpu/o3/bpred_unit.hh:
src/cpu/o3/bpred_unit_impl.hh:
src/cpu/o3/btb.cc:
src/cpu/o3/btb.hh:
src/cpu/o3/comm.hh:
src/cpu/o3/commit.cc:
src/cpu/o3/commit.hh:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu_policy.hh:
src/cpu/o3/decode.cc:
src/cpu/o3/decode.hh:
src/cpu/o3/decode_impl.hh:
src/cpu/o3/fetch.cc:
src/cpu/o3/fetch.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/free_list.cc:
src/cpu/o3/free_list.hh:
src/cpu/o3/iew.cc:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.cc:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/inst_queue_impl.hh:
src/cpu/o3/mem_dep_unit.cc:
src/cpu/o3/mem_dep_unit.hh:
src/cpu/o3/mem_dep_unit_impl.hh:
src/cpu/o3/ras.cc:
src/cpu/o3/ras.hh:
src/cpu/o3/rename.cc:
src/cpu/o3/rename.hh:
src/cpu/o3/rename_impl.hh:
src/cpu/o3/rename_map.cc:
src/cpu/o3/rename_map.hh:
src/cpu/o3/rob.cc:
src/cpu/o3/rob.hh:
src/cpu/o3/rob_impl.hh:
src/cpu/o3/sat_counter.cc:
src/cpu/o3/sat_counter.hh:
src/cpu/o3/store_set.cc:
src/cpu/o3/store_set.hh:
src/cpu/o3/tournament_pred.cc:
src/cpu/o3/tournament_pred.hh:
Hand merges.
--HG--
rename : build/SConstruct => SConstruct
rename : SConscript => src/SConscript
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/pal.isa => src/arch/alpha/isa/pal.isa
rename : base/traceflags.py => src/base/traceflags.py
rename : cpu/SConscript => src/cpu/SConscript
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/cpu_exec_context.cc => src/cpu/cpu_exec_context.cc
rename : cpu/cpu_exec_context.hh => src/cpu/cpu_exec_context.hh
rename : cpu/cpu_models.py => src/cpu/cpu_models.py
rename : cpu/exec_context.hh => src/cpu/exec_context.hh
rename : cpu/exetrace.cc => src/cpu/exetrace.cc
rename : cpu/exetrace.hh => src/cpu/exetrace.hh
rename : cpu/inst_seq.hh => src/cpu/inst_seq.hh
rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc
rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh
rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc
rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh
rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh
rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh
rename : cpu/o3/alpha_impl.hh => src/cpu/o3/alpha_impl.hh
rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh
rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc
rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh
rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh
rename : cpu/o3/btb.cc => src/cpu/o3/btb.cc
rename : cpu/o3/btb.hh => src/cpu/o3/btb.hh
rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh
rename : cpu/o3/commit.cc => src/cpu/o3/commit.cc
rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh
rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh
rename : cpu/o3/decode.cc => src/cpu/o3/decode.cc
rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh
rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh
rename : cpu/o3/fetch.cc => src/cpu/o3/fetch.cc
rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/free_list.cc => src/cpu/o3/free_list.cc
rename : cpu/o3/free_list.hh => src/cpu/o3/free_list.hh
rename : cpu/o3/iew.cc => src/cpu/o3/iew.cc
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.cc => src/cpu/o3/inst_queue.cc
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/mem_dep_unit.cc => src/cpu/o3/mem_dep_unit.cc
rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/ras.cc => src/cpu/o3/ras.cc
rename : cpu/o3/ras.hh => src/cpu/o3/ras.hh
rename : cpu/o3/regfile.hh => src/cpu/o3/regfile.hh
rename : cpu/o3/rename.cc => src/cpu/o3/rename.cc
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/rename_map.cc => src/cpu/o3/rename_map.cc
rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh
rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh
rename : cpu/o3/rob_impl.hh => src/cpu/o3/rob_impl.hh
rename : cpu/o3/sat_counter.hh => src/cpu/o3/sat_counter.hh
rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc
rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/ozone/cpu.cc => src/cpu/ozone/cpu.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/static_inst.hh => src/cpu/static_inst.hh
rename : kern/system_events.cc => src/kern/system_events.cc
rename : kern/tru64/tru64.hh => src/kern/tru64/tru64.hh
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
extra : convert_revision : ff351fc0e3a7c0f23e59fdbec33d8209eb9280be
Diffstat (limited to 'cpu/o3')
-rw-r--r-- | cpu/o3/dep_graph.hh | 213 | ||||
-rw-r--r-- | cpu/o3/fu_pool.cc | 295 | ||||
-rw-r--r-- | cpu/o3/fu_pool.hh | 162 | ||||
-rw-r--r-- | cpu/o3/lsq.cc | 36 | ||||
-rw-r--r-- | cpu/o3/lsq.hh | 324 | ||||
-rw-r--r-- | cpu/o3/lsq_impl.hh | 538 | ||||
-rw-r--r-- | cpu/o3/lsq_unit.cc | 36 | ||||
-rw-r--r-- | cpu/o3/lsq_unit.hh | 632 | ||||
-rw-r--r-- | cpu/o3/lsq_unit_impl.hh | 873 | ||||
-rw-r--r-- | cpu/o3/scoreboard.cc | 106 | ||||
-rw-r--r-- | cpu/o3/scoreboard.hh | 114 | ||||
-rw-r--r-- | cpu/o3/thread_state.hh | 120 |
12 files changed, 3449 insertions, 0 deletions
diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh new file mode 100644 index 000000000..f8ae38da4 --- /dev/null +++ b/cpu/o3/dep_graph.hh @@ -0,0 +1,213 @@ + +#ifndef __CPU_O3_DEP_GRAPH_HH__ +#define __CPU_O3_DEP_GRAPH_HH__ + +#include "cpu/o3/comm.hh" + +template <class DynInstPtr> +class DependencyEntry +{ + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry<DynInstPtr> *next; +}; + +template <class DynInstPtr> +class DependencyGraph +{ + public: + typedef DependencyEntry<DynInstPtr> DepEntry; + + DependencyGraph() + : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) + { } + + void resize(int num_entries); + + void reset(); + + void insert(PhysRegIndex idx, DynInstPtr &new_inst); + + void setInst(PhysRegIndex idx, DynInstPtr &new_inst) + { dependGraph[idx].inst = new_inst; } + + void clearInst(PhysRegIndex idx) + { dependGraph[idx].inst = NULL; } + + void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + + DynInstPtr pop(PhysRegIndex idx); + + bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } + + /** Debugging function to dump out the dependency graph. + */ + void dump(); + + private: + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DepEntry *dependGraph; + + int numEntries; + + // Debug variable, remove when done testing. + unsigned memAllocCounter; + + public: + uint64_t nodesTraversed; + uint64_t nodesRemoved; +}; + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::resize(int num_entries) +{ + numEntries = num_entries; + dependGraph = new DepEntry[numEntries]; +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::reset() +{ + // Clear the dependency graph + DepEntry *curr; + DepEntry *prev; + + for (int i = 0; i < numEntries; ++i) { + curr = dependGraph[i].next; + + while (curr) { + memAllocCounter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DepEntry *new_entry = new DepEntry; + new_entry->next = dependGraph[idx].next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + dependGraph[idx].next = new_entry; + + ++memAllocCounter; +} + + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx, + DynInstPtr &inst_to_remove) +{ + DepEntry *prev = &dependGraph[idx]; + DepEntry *curr = dependGraph[idx].next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + nodesRemoved++; + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + nodesTraversed++; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --memAllocCounter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template <class DynInstPtr> +DynInstPtr +DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx) +{ + DepEntry *node; + node = dependGraph[idx].next; + DynInstPtr inst = NULL; + if (node) { + inst = node->inst; + dependGraph[idx].next = node->next; + node->inst = NULL; + memAllocCounter--; + delete node; + } + return inst; +} + +template <class DynInstPtr> +void +DependencyGraph<DynInstPtr>::dump() +{ + DepEntry *curr; + + for (int i = 0; i < numEntries; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } + cprintf("memAllocCounter: %i\n", memAllocCounter); +} + +#endif // __CPU_O3_DEP_GRAPH_HH__ diff --git a/cpu/o3/fu_pool.cc b/cpu/o3/fu_pool.cc new file mode 100644 index 000000000..fb2b5c00d --- /dev/null +++ b/cpu/o3/fu_pool.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sstream> + +#include "cpu/o3/fu_pool.hh" +#include "encumbered/cpu/full/fu_pool.hh" +#include "sim/builder.hh" + +using namespace std; + +//////////////////////////////////////////////////////////////////////////// +// +// A pool of function units +// + +inline void +FUPool::FUIdxQueue::addFU(int fu_idx) +{ + funcUnitsIdx.push_back(fu_idx); + ++size; +} + +inline int +FUPool::FUIdxQueue::getFU() +{ + int retval = funcUnitsIdx[idx++]; + + if (idx == size) + idx = 0; + + return retval; +} + +FUPool::~FUPool() +{ + fuListIterator i = funcUnits.begin(); + fuListIterator end = funcUnits.end(); + for (; i != end; ++i) + delete *i; +} + + +// Constructor +FUPool::FUPool(string name, vector<FUDesc *> paramList) + : SimObject(name) +{ + numFU = 0; + + funcUnits.clear(); + + for (int i = 0; i < Num_OpClasses; ++i) { + maxOpLatencies[i] = 0; + maxIssueLatencies[i] = 0; + } + + // + // Iterate through the list of FUDescData structures + // + for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) { + + // + // Don't bother with this if we're not going to create any FU's + // + if ((*i)->number) { + // + // Create the FuncUnit object from this structure + // - add the capabilities listed in the FU's operation + // description + // + // We create the first unit, then duplicate it as needed + // + FuncUnit *fu = new FuncUnit; + + OPDDiterator j = (*i)->opDescList.begin(); + OPDDiterator end = (*i)->opDescList.end(); + for (; j != end; ++j) { + // indicate that this pool has this capability + capabilityList.set((*j)->opClass); + + // Add each of the FU's that will have this capability to the + // appropriate queue. + for (int k = 0; k < (*i)->number; ++k) + fuPerCapList[(*j)->opClass].addFU(numFU + k); + + // indicate that this FU has the capability + fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); + + if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) + maxOpLatencies[(*j)->opClass] = (*j)->opLat; + + if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) + maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; + } + + numFU++; + + // Add the appropriate number of copies of this FU to the list + ostringstream s; + + s << (*i)->name() << "(0)"; + fu->name = s.str(); + funcUnits.push_back(fu); + + for (int c = 1; c < (*i)->number; ++c) { + ostringstream s; + numFU++; + FuncUnit *fu2 = new FuncUnit(*fu); + + s << (*i)->name() << "(" << c << ")"; + fu2->name = s.str(); + funcUnits.push_back(fu2); + } + } + } + + unitBusy.resize(numFU); + + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } +} + +void +FUPool::annotateMemoryUnits(unsigned hit_latency) +{ + maxOpLatencies[MemReadOp] = hit_latency; + + fuListIterator i = funcUnits.begin(); + fuListIterator iend = funcUnits.end(); + for (; i != iend; ++i) { + if ((*i)->provides(MemReadOp)) + (*i)->opLatency(MemReadOp) = hit_latency; + + if ((*i)->provides(MemWriteOp)) + (*i)->opLatency(MemWriteOp) = hit_latency; + } +} + +int +FUPool::getUnit(OpClass capability) +{ + // If this pool doesn't have the specified capability, + // return this information to the caller + if (!capabilityList[capability]) + return -2; + + int fu_idx = fuPerCapList[capability].getFU(); + int start_idx = fu_idx; + + // Iterate through the circular queue if needed, stopping if we've reached + // the first element again. + while (unitBusy[fu_idx]) { + fu_idx = fuPerCapList[capability].getFU(); + if (fu_idx == start_idx) { + // No FU available + return -1; + } + } + + unitBusy[fu_idx] = true; + + return fu_idx; +} + +void +FUPool::freeUnitNextCycle(int fu_idx) +{ + assert(unitBusy[fu_idx]); + unitsToBeFreed.push_back(fu_idx); +} + +void +FUPool::processFreeUnits() +{ + while (!unitsToBeFreed.empty()) { + int fu_idx = unitsToBeFreed.back(); + unitsToBeFreed.pop_back(); + + assert(unitBusy[fu_idx]); + + unitBusy[fu_idx] = false; + } +} + +void +FUPool::dump() +{ + cout << "Function Unit Pool (" << name() << ")\n"; + cout << "======================================\n"; + cout << "Free List:\n"; + + for (int i = 0; i < numFU; ++i) { + if (unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } + + cout << "======================================\n"; + cout << "Busy List:\n"; + for (int i = 0; i < numFU; ++i) { + if (!unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } +} + +void +FUPool::switchOut() +{ +} + +void +FUPool::takeOverFrom() +{ + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } + unitsToBeFreed.clear(); +} + +// + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// FUPool - Contails a list of FUDesc objects to make available +// + +// +// The FuPool object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool) + + SimObjectVectorParam<FUDesc *> FUList; + +END_DECLARE_SIM_OBJECT_PARAMS(FUPool) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool) + + INIT_PARAM(FUList, "list of FU's for this pool") + +END_INIT_SIM_OBJECT_PARAMS(FUPool) + + +CREATE_SIM_OBJECT(FUPool) +{ + return new FUPool(getInstanceName(), FUList); +} + +REGISTER_SIM_OBJECT("FUPool", FUPool) + diff --git a/cpu/o3/fu_pool.hh b/cpu/o3/fu_pool.hh new file mode 100644 index 000000000..da6fdc802 --- /dev/null +++ b/cpu/o3/fu_pool.hh @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_FU_POOL_HH__ +#define __CPU_O3_FU_POOL_HH__ + +#include <bitset> +#include <list> +#include <string> +#include <vector> + +#include "base/sched_list.hh" +#include "encumbered/cpu/full/op_class.hh" +#include "sim/sim_object.hh" + +class FUDesc; +class FuncUnit; + +/** + * Pool of FU's, specific to the new CPU model. The old FU pool had lists of + * free units and busy units, and whenever a FU was needed it would iterate + * through the free units to find a FU that provided the capability. This pool + * has lists of units specific to each of the capabilities, and whenever a FU + * is needed, it iterates through that list to find a free unit. The previous + * FU pool would have to be ticked each cycle to update which units became + * free. This FU pool lets the IEW stage handle freeing units, which frees + * them as their scheduled execution events complete. This limits units in this + * model to either have identical issue and op latencies, or 1 cycle issue + * latencies. + */ +class FUPool : public SimObject +{ + private: + /** Maximum op execution latencies, per op class. */ + unsigned maxOpLatencies[Num_OpClasses]; + /** Maximum issue latencies, per op class. */ + unsigned maxIssueLatencies[Num_OpClasses]; + + /** Bitvector listing capabilities of this FU pool. */ + std::bitset<Num_OpClasses> capabilityList; + + /** Bitvector listing which FUs are busy. */ + std::vector<bool> unitBusy; + + /** List of units to be freed at the end of this cycle. */ + std::vector<int> unitsToBeFreed; + + /** + * Class that implements a circular queue to hold FU indices. The hope is + * that FUs that have been just used will be moved to the end of the queue + * by iterating through it, thus leaving free units at the head of the + * queue. + */ + class FUIdxQueue { + public: + /** Constructs a circular queue of FU indices. */ + FUIdxQueue() + : idx(0), size(0) + { } + + /** Adds a FU to the queue. */ + inline void addFU(int fu_idx); + + /** Returns the index of the FU at the head of the queue, and changes + * the index to the next element. + */ + inline int getFU(); + + private: + /** Circular queue index. */ + int idx; + + /** Size of the queue. */ + int size; + + /** Queue of FU indices. */ + std::vector<int> funcUnitsIdx; + }; + + /** Per op class queues of FUs that provide that capability. */ + FUIdxQueue fuPerCapList[Num_OpClasses]; + + /** Number of FUs. */ + int numFU; + + /** Functional units. */ + std::vector<FuncUnit *> funcUnits; + + typedef std::vector<FuncUnit *>::iterator fuListIterator; + + public: + + /** Constructs a FU pool. */ + FUPool(std::string name, std::vector<FUDesc *> l); + ~FUPool(); + + /** Annotates units that provide memory operations. Included only because + * old FU pool provided this function. + */ + void annotateMemoryUnits(unsigned hit_latency); + + /** + * Gets a FU providing the requested capability. Will mark the unit as busy, + * but leaves the freeing of the unit up to the IEW stage. + * @param capability The capability requested. + * @return Returns -2 if the FU pool does not have the capability, -1 if + * there is no free FU, and the FU's index otherwise. + */ + int getUnit(OpClass capability); + + /** Frees a FU at the end of this cycle. */ + void freeUnitNextCycle(int fu_idx); + + /** Frees all FUs on the list. */ + void processFreeUnits(); + + /** Returns the total number of FUs. */ + int size() { return numFU; } + + /** Debugging function used to dump FU information. */ + void dump(); + + /** Returns the operation execution latency of the given capability. */ + unsigned getOpLatency(OpClass capability) { + return maxOpLatencies[capability]; + } + + /** Returns the issue latency of the given capability. */ + unsigned getIssueLatency(OpClass capability) { + return maxIssueLatencies[capability]; + } + + void switchOut(); + void takeOverFrom(); +}; + +#endif // __CPU_O3_FU_POOL_HH__ diff --git a/cpu/o3/lsq.cc b/cpu/o3/lsq.cc new file mode 100644 index 000000000..8991ab8f8 --- /dev/null +++ b/cpu/o3/lsq.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQ<AlphaSimpleImpl>; + diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh new file mode 100644 index 000000000..a1eeccbe7 --- /dev/null +++ b/cpu/o3/lsq.hh @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_LSQ_HH__ +#define __CPU_O3_LSQ_HH__ + +#include <map> +#include <queue> + +#include "config/full_system.hh" +#include "cpu/inst_seq.hh" +//#include "cpu/o3/cpu_policy.hh" +#include "cpu/o3/lsq_unit.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +template <class Impl> +class LSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQUnit LSQUnit; + + enum LSQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** Constructs an LSQ with the given parameters. */ + LSQ(Params *params); + + /** Returns the name of the LSQ. */ + std::string name() const; + + /** Sets the pointer to the list of active threads. */ + void setActiveThreads(std::list<unsigned> *at_ptr); + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + /** Sets the IEW stage pointer. */ + void setIEW(IEW *iew_ptr); + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + void switchOut(); + void takeOverFrom(); + + /** Number of entries needed for the given amount of threads.*/ + int entryAmount(int num_threads); + void removeEntries(unsigned tid); + /** Reset the max entries for each thread. */ + void resetEntries(); + /** Resize the max entries for a thread. */ + void resizeEntries(unsigned size, unsigned tid); + + /** Ticks the LSQ. */ + void tick(); + /** Ticks a specific LSQ Unit. */ + void tick(unsigned tid) + { thread[tid].tick(); } + + /** Inserts a load into the LSQ. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store into the LSQ. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx, unsigned tid) + { return thread[tid].executeLoad(lq_idx); } + + /** Executes a store. */ + Fault executeStore(DynInstPtr &inst); + + /** + * Commits loads up until the given sequence number for a specific thread. + */ + void commitLoads(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitLoads(youngest_inst); } + + /** + * Commits stores up until the given sequence number for a specific thread. + */ + void commitStores(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitStores(youngest_inst); } + + /** + * Attempts to write back stores until all cache ports are used or the + * interface becomes blocked. + */ + void writebackStores(); + /** Same as above, but only for one thread. */ + void writebackStores(unsigned tid); + + /** + * Squash instructions from a thread until the specified sequence number. + */ + void squash(const InstSeqNum &squashed_num, unsigned tid) + { thread[tid].squash(squashed_num); } + + /** Returns whether or not there was a memory ordering violation. */ + bool violation(); + /** + * Returns whether or not there was a memory ordering violation for a + * specific thread. + */ + bool violation(unsigned tid) + { return thread[tid].violation(); } + + /** Returns if a load is blocked due to the memory system for a specific + * thread. + */ + bool loadBlocked(unsigned tid) + { return thread[tid].loadBlocked(); } + + bool isLoadBlockedHandled(unsigned tid) + { return thread[tid].isLoadBlockedHandled(); } + + void setLoadBlockedHandled(unsigned tid) + { thread[tid].setLoadBlockedHandled(); } + + /** Gets the instruction that caused the memory ordering violation. */ + DynInstPtr getMemDepViolator(unsigned tid) + { return thread[tid].getMemDepViolator(); } + + /** Returns the head index of the load queue for a specific thread. */ + int getLoadHead(unsigned tid) + { return thread[tid].getLoadHead(); } + + /** Returns the sequence number of the head of the load queue. */ + InstSeqNum getLoadHeadSeqNum(unsigned tid) + { + return thread[tid].getLoadHeadSeqNum(); + } + + /** Returns the head index of the store queue. */ + int getStoreHead(unsigned tid) + { return thread[tid].getStoreHead(); } + + /** Returns the sequence number of the head of the store queue. */ + InstSeqNum getStoreHeadSeqNum(unsigned tid) + { + return thread[tid].getStoreHeadSeqNum(); + } + + /** Returns the number of instructions in all of the queues. */ + int getCount(); + /** Returns the number of instructions in the queues of one thread. */ + int getCount(unsigned tid) + { return thread[tid].getCount(); } + + /** Returns the total number of loads in the load queue. */ + int numLoads(); + /** Returns the total number of loads for a single thread. */ + int numLoads(unsigned tid) + { return thread[tid].numLoads(); } + + /** Returns the total number of stores in the store queue. */ + int numStores(); + /** Returns the total number of stores for a single thread. */ + int numStores(unsigned tid) + { return thread[tid].numStores(); } + + /** Returns the total number of loads that are ready. */ + int numLoadsReady(); + /** Returns the number of loads that are ready for a single thread. */ + int numLoadsReady(unsigned tid) + { return thread[tid].numLoadsReady(); } + + /** Returns the number of free entries. */ + unsigned numFreeEntries(); + /** Returns the number of free entries for a specific thread. */ + unsigned numFreeEntries(unsigned tid); + + /** Returns if the LSQ is full (either LQ or SQ is full). */ + bool isFull(); + /** + * Returns if the LSQ is full for a specific thread (either LQ or SQ is + * full). + */ + bool isFull(unsigned tid); + + /** Returns if any of the LQs are full. */ + bool lqFull(); + /** Returns if the LQ of a given thread is full. */ + bool lqFull(unsigned tid); + + /** Returns if any of the SQs are full. */ + bool sqFull(); + /** Returns if the SQ of a given thread is full. */ + bool sqFull(unsigned tid); + + /** + * Returns if the LSQ is stalled due to a memory operation that must be + * replayed. + */ + bool isStalled(); + /** + * Returns if the LSQ of a specific thread is stalled due to a memory + * operation that must be replayed. + */ + bool isStalled(unsigned tid); + + /** Returns whether or not there are any stores to write back to memory. */ + bool hasStoresToWB(); + + /** Returns whether or not a specific thread has any stores to write back + * to memory. + */ + bool hasStoresToWB(unsigned tid) + { return thread[tid].hasStoresToWB(); } + + /** Returns the number of stores a specific thread has to write back. */ + int numStoresToWB(unsigned tid) + { return thread[tid].numStoresToWB(); } + + /** Returns if the LSQ will write back to memory this cycle. */ + bool willWB(); + /** Returns if the LSQ of a specific thread will write back to memory this + * cycle. + */ + bool willWB(unsigned tid) + { return thread[tid].willWB(); } + + /** Debugging function to print out all instructions. */ + void dumpInsts(); + /** Debugging function to print out instructions from a specific thread. */ + void dumpInsts(unsigned tid) + { thread[tid].dumpInsts(); } + + /** Executes a read operation, using the load specified at the load index. */ + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes a store operation, using the store specified at the store + * index. + */ + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + private: + /** The LSQ policy for SMT mode. */ + LSQPolicy lsqPolicy; + + /** The LSQ units for individual threads. */ + LSQUnit thread[Impl::MaxThreads]; + + /** The CPU pointer. */ + FullCPU *cpu; + + /** The IEW stage pointer. */ + IEW *iewStage; + + /** The pointer to the page table. */ +// PageTable *pTable; + + /** List of Active Threads in System. */ + std::list<unsigned> *activeThreads; + + /** Total Size of LQ Entries. */ + unsigned LQEntries; + /** Total Size of SQ Entries. */ + unsigned SQEntries; + + /** Max LQ Size - Used to Enforce Sharing Policies. */ + unsigned maxLQEntries; + + /** Max SQ Size - Used to Enforce Sharing Policies. */ + unsigned maxSQEntries; + + /** Number of Threads. */ + unsigned numThreads; +}; + +template <class Impl> +template <class T> +Fault +LSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ + unsigned tid = req->thread_num; + + return thread[tid].read(req, data, load_idx); +} + +template <class Impl> +template <class T> +Fault +LSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ + unsigned tid = req->thread_num; + + return thread[tid].write(req, data, store_idx); +} + +#endif // __CPU_O3_LSQ_HH__ diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh new file mode 100644 index 000000000..a6ad27522 --- /dev/null +++ b/cpu/o3/lsq_impl.hh @@ -0,0 +1,538 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <algorithm> +#include <string> + +#include "cpu/o3/lsq.hh" + +using namespace std; + +template <class Impl> +LSQ<Impl>::LSQ(Params *params) + : LQEntries(params->LQEntries), SQEntries(params->SQEntries), + numThreads(params->numberOfThreads) +{ + DPRINTF(LSQ, "Creating LSQ object.\n"); + + //**********************************************/ + //************ Handle SMT Parameters ***********/ + //**********************************************/ + string policy = params->smtLSQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out fetch policy + if (policy == "dynamic") { + lsqPolicy = Dynamic; + + maxLQEntries = LQEntries; + maxSQEntries = SQEntries; + + DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); + + } else if (policy == "partitioned") { + lsqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + maxLQEntries = LQEntries / numThreads; + maxSQEntries = SQEntries / numThreads; + + DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + + } else if (policy == "threshold") { + lsqPolicy = Threshold; + + assert(params->smtLSQThreshold > LQEntries); + assert(params->smtLSQThreshold > SQEntries); + + //Divide up by threshold amount + //@todo: Should threads check the max and the total + //amount of the LSQ + maxLQEntries = params->smtLSQThreshold; + maxSQEntries = params->smtLSQThreshold; + + DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + + } else { + assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } + + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + } +} + + +template<class Impl> +std::string +LSQ<Impl>::name() const +{ + return iewStage->name() + ".lsq"; +} + +template<class Impl> +void +LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) +{ + activeThreads = at_ptr; + assert(activeThreads != 0); +} + +template<class Impl> +void +LSQ<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setCPU(cpu_ptr); + } +} + +template<class Impl> +void +LSQ<Impl>::setIEW(IEW *iew_ptr) +{ + iewStage = iew_ptr; + + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setIEW(iew_ptr); + } +} + +#if 0 +template<class Impl> +void +LSQ<Impl>::setPageTable(PageTable *pt_ptr) +{ + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setPageTable(pt_ptr); + } +} +#endif + +template <class Impl> +void +LSQ<Impl>::switchOut() +{ + for (int tid = 0; tid < numThreads; tid++) { + thread[tid].switchOut(); + } +} + +template <class Impl> +void +LSQ<Impl>::takeOverFrom() +{ + for (int tid = 0; tid < numThreads; tid++) { + thread[tid].takeOverFrom(); + } +} + +template <class Impl> +int +LSQ<Impl>::entryAmount(int num_threads) +{ + if (lsqPolicy == Partitioned) { + return LQEntries / num_threads; + } else { + return 0; + } +} + +template <class Impl> +void +LSQ<Impl>::resetEntries() +{ + if (lsqPolicy != Dynamic || numThreads > 1) { + int active_threads = (*activeThreads).size(); + + list<unsigned>::iterator threads = (*activeThreads).begin(); + list<unsigned>::iterator list_end = (*activeThreads).end(); + + int maxEntries; + + if (lsqPolicy == Partitioned) { + maxEntries = LQEntries / active_threads; + } else if (lsqPolicy == Threshold && active_threads == 1) { + maxEntries = LQEntries; + } else { + maxEntries = LQEntries; + } + + while (threads != list_end) { + resizeEntries(maxEntries,*threads++); + } + } +} + +template<class Impl> +void +LSQ<Impl>::removeEntries(unsigned tid) +{ + thread[tid].clearLQ(); + thread[tid].clearSQ(); +} + +template<class Impl> +void +LSQ<Impl>::resizeEntries(unsigned size,unsigned tid) +{ + thread[tid].resizeLQ(size); + thread[tid].resizeSQ(size); +} + +template<class Impl> +void +LSQ<Impl>::tick() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + + thread[tid].tick(); + } +} + +template<class Impl> +void +LSQ<Impl>::insertLoad(DynInstPtr &load_inst) +{ + unsigned tid = load_inst->threadNumber; + + thread[tid].insertLoad(load_inst); +} + +template<class Impl> +void +LSQ<Impl>::insertStore(DynInstPtr &store_inst) +{ + unsigned tid = store_inst->threadNumber; + + thread[tid].insertStore(store_inst); +} + +template<class Impl> +Fault +LSQ<Impl>::executeLoad(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + return thread[tid].executeLoad(inst); +} + +template<class Impl> +Fault +LSQ<Impl>::executeStore(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + return thread[tid].executeStore(inst); +} + +template<class Impl> +void +LSQ<Impl>::writebackStores() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + + if (numStoresToWB(tid) > 0) { + DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " + "available for Writeback.\n", tid, numStoresToWB(tid)); + } + + thread[tid].writebackStores(); + } +} + +template<class Impl> +bool +LSQ<Impl>::violation() +{ + /* Answers: Does Anybody Have a Violation?*/ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (thread[tid].violation()) + return true; + } + + return false; +} + +template<class Impl> +int +LSQ<Impl>::getCount() +{ + unsigned total = 0; + + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += getCount(tid); + } + + return total; +} + +template<class Impl> +int +LSQ<Impl>::numLoads() +{ + unsigned total = 0; + + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += numLoads(tid); + } + + return total; +} + +template<class Impl> +int +LSQ<Impl>::numStores() +{ + unsigned total = 0; + + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numStores(); + } + + return total; +} + +template<class Impl> +int +LSQ<Impl>::numLoadsReady() +{ + unsigned total = 0; + + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numLoadsReady(); + } + + return total; +} + +template<class Impl> +unsigned +LSQ<Impl>::numFreeEntries() +{ + unsigned total = 0; + + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numFreeEntries(); + } + + return total; +} + +template<class Impl> +unsigned +LSQ<Impl>::numFreeEntries(unsigned tid) +{ + //if( lsqPolicy == Dynamic ) + //return numFreeEntries(); + //else + return thread[tid].numFreeEntries(); +} + +template<class Impl> +bool +LSQ<Impl>::isFull() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (! (thread[tid].lqFull() || thread[tid].sqFull()) ) + return false; + } + + return true; +} + +template<class Impl> +bool +LSQ<Impl>::isFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return isFull(); + else + return thread[tid].lqFull() || thread[tid].sqFull(); +} + +template<class Impl> +bool +LSQ<Impl>::lqFull() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!thread[tid].lqFull()) + return false; + } + + return true; +} + +template<class Impl> +bool +LSQ<Impl>::lqFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return lqFull(); + else + return thread[tid].lqFull(); +} + +template<class Impl> +bool +LSQ<Impl>::sqFull() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!sqFull(tid)) + return false; + } + + return true; +} + +template<class Impl> +bool +LSQ<Impl>::sqFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return sqFull(); + else + return thread[tid].sqFull(); +} + +template<class Impl> +bool +LSQ<Impl>::isStalled() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!thread[tid].isStalled()) + return false; + } + + return true; +} + +template<class Impl> +bool +LSQ<Impl>::isStalled(unsigned tid) +{ + if( lsqPolicy == Dynamic ) + return isStalled(); + else + return thread[tid].isStalled(); +} + +template<class Impl> +bool +LSQ<Impl>::hasStoresToWB() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!hasStoresToWB(tid)) + return false; + } + + return true; +} + +template<class Impl> +bool +LSQ<Impl>::willWB() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!willWB(tid)) + return false; + } + + return true; +} + +template<class Impl> +void +LSQ<Impl>::dumpInsts() +{ + list<unsigned>::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + thread[tid].dumpInsts(); + } +} diff --git a/cpu/o3/lsq_unit.cc b/cpu/o3/lsq_unit.cc new file mode 100644 index 000000000..dd29007bc --- /dev/null +++ b/cpu/o3/lsq_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQUnit<AlphaSimpleImpl>; + diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh new file mode 100644 index 000000000..942b4583d --- /dev/null +++ b/cpu/o3/lsq_unit.hh @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_LSQ_UNIT_HH__ +#define __CPU_O3_LSQ_UNIT_HH__ + +#include <algorithm> +#include <map> +#include <queue> + +#include "arch/faults.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +//#include "sim/debug.hh" +//#include "sim/sim_object.hh" + +/** + * Class that implements the actual LQ and SQ for each specific + * thread. Both are circular queues; load entries are freed upon + * committing, while store entries are freed once they writeback. The + * LSQUnit tracks if there are memory ordering violations, and also + * detects partial load to store forwarding cases (a store only has + * part of a load's data) that requires the load to wait until the + * store writes back. In the former case it holds onto the instruction + * until the dependence unit looks at it, and in the latter it stalls + * the LSQ until the store writes back. At that point the load is + * replayed. + */ +template <class Impl> +class LSQUnit { + protected: + typedef TheISA::IntReg IntReg; + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + /** The writeback event for the store. Needed for store + * conditionals. + */ + Event *wbEvent; + + private: + /** The store index of the store being written back. */ + int storeIdx; + private: + /** The pointer to the LSQ unit that issued the store. */ + LSQUnit<Impl> *lsqPtr; + }; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + LSQUnit(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the IEW stage pointer. */ + void setIEW(IEW *iew_ptr) + { iewStage = iew_ptr; } + + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + void switchOut(); + + void takeOverFrom(); + + bool isSwitchedOut() { return switchedOut; } + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed && + !dcacheInterface->isBlocked(); } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx); + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx); + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx); + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx); + + public: + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the IEW stage. */ + IEW *iewStage; + + /** Pointer to the D-cache. */ + MemInterface *dcacheInterface; + + /** Pointer to the page table. */ +// PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + MemReqPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + }; + + private: + /** The LSQUnit thread id. */ + unsigned lsqID; + + /** The store queue. */ + std::vector<SQEntry> storeQueue; + + /** The load queue. */ + std::vector<DynInstPtr> loadQueue; + + /** The number of LQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of LQ entries. + */ + unsigned LQEntries; + /** The number of SQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of SQ entries. + */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ. */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that may be ready to be + * written back, and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + bool switchedOut; + + //list<InstSeqNum> mshrSeqNums; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer<IssueStruct>::wire fromIssue; + + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** Whether or not a load is blocked due to the memory system. */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. +/* + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + + // total non-speculative bogus addresses seen (debug var) + Counter sim_invalid_addrs; + Stats::Vector<> fu_busy; //cumulative fu busy + + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; +*/ + public: + /** Executes the load at the given index. */ + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template <class Impl> +template <class T> +Fault +LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ + assert(loadQueue[load_idx]); + + assert(!loadQueue[load_idx]->isExecuted()); + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + if (req->flags & UNCACHEABLE && + (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { + iewStage->rescheduleMemInst(loadQueue[load_idx]); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + int store_idx = loadQueue[load_idx]->sqIdx; + + int store_size = 0; + + DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x\n", + load_idx, store_idx, storeHead, req->paddr); + +#if 0 + if (req->flags & LOCKED) { + cpu->lockAddr = req->paddr; + cpu->lockFlag = true; + } +#endif + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; + + while (store_idx != -1) { + // End once we've reached the top of the LSQ + if (store_idx == storeWBIdx) { + break; + } + + // Move the index to one younger + if (--store_idx < 0) + store_idx += SQEntries; + + assert(storeQueue[store_idx].inst); + + store_size = storeQueue[store_idx].size; + + if (store_size == 0) + continue; + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->vaddr >= storeQueue[store_idx].inst->effAddr; + bool store_has_upper_limit = + (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->vaddr < (storeQueue[store_idx].inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + // Get shift amount for offset into the store's data. + int shift_amt = req->vaddr & (store_size - 1); + // @todo: Magic number, assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = storeQueue[store_idx].data >> shift_amt; + + assert(!req->data); + req->data = new uint8_t[64]; + + memcpy(req->data, &data, req->size); + + DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " + "addr %#x, data %#x\n", + store_idx, req->vaddr, *(req->data)); + + typename IEW::LdWritebackEvent *wb = + new typename IEW::LdWritebackEvent(loadQueue[load_idx], + iewStage); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // @todo: Need to make this a parameter. + wb->schedule(curTick); + + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if (storeQueue[store_idx].completed) { + continue; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + loadQueue[load_idx]->seqNum < + loadQueue[stallingLoadIdx]->seqNum)) { + stalled = true; + stallingStoreIsn = storeQueue[store_idx].inst->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + iewStage->rescheduleMemInst(loadQueue[load_idx]); + + // Do not generate a writeback event as this instruction is not + // complete. + DPRINTF(LSQUnit, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_idx, req->vaddr); + + return NoFault; + } + } + + // If there's no forwarding case, then go access memory + DynInstPtr inst = loadQueue[load_idx]; + + DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC()); + + assert(!req->data); + req->data = new uint8_t[64]; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); + + ++usedPorts; + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + if (dcacheInterface->isBlocked()) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) + return NoFault; + + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", + loadQueue[load_idx]->readPC()); + + assert(!req->completionEvent); + req->completionEvent = + new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); + MemAccessResult result = dcacheInterface->access(req); + + assert(dcacheInterface->doEvents()); + + if (result != MA_HIT) { + DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + } else { + DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); + DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", + inst->seqNum); + } + } + + return fault; +} + +template <class Impl> +template <class T> +Fault +LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->paddr, data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + storeQueue[store_idx].data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_O3_LSQ_UNIT_HH__ diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh new file mode 100644 index 000000000..7974ddaad --- /dev/null +++ b/cpu/o3/lsq_unit_impl.hh @@ -0,0 +1,873 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/checker/cpu.hh" +#include "cpu/o3/lsq_unit.hh" +#include "base/str.hh" + +template <class Impl> +LSQUnit<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx, + Event *wb_event, + LSQUnit<Impl> *lsq_ptr) + : Event(&mainEventQueue), + wbEvent(wb_event), + storeIdx(store_idx), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +LSQUnit<Impl>::StoreCompletionEvent::process() +{ + DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx); + DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + + if (lsqPtr->isSwitchedOut()) + return; + + lsqPtr->cpu->wakeCPU(); + if (wbEvent) + wbEvent->process(); + lsqPtr->completeStore(storeIdx); +} + +template <class Impl> +const char * +LSQUnit<Impl>::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template <class Impl> +LSQUnit<Impl>::LSQUnit() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template<class Impl> +void +LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) + +{ + DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); + + switchedOut = false; + + lsqID = id; + + // Add 1 for the sentinel entry (they are circular queues). + LQEntries = maxLQEntries + 1; + SQEntries = maxSQEntries + 1; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template<class Impl> +std::string +LSQUnit<Impl>::name() const +{ + if (Impl::MaxThreads == 1) { + return iewStage->name() + ".lsq"; + } else { + return iewStage->name() + ".lsq.thread." + to_string(lsqID); + } +} + +template<class Impl> +void +LSQUnit<Impl>::clearLQ() +{ + loadQueue.clear(); +} + +template<class Impl> +void +LSQUnit<Impl>::clearSQ() +{ + storeQueue.clear(); +} + +#if 0 +template<class Impl> +void +LSQUnit<Impl>::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(LSQUnit, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} +#endif + +template<class Impl> +void +LSQUnit<Impl>::switchOut() +{ + switchedOut = true; + for (int i = 0; i < loadQueue.size(); ++i) + loadQueue[i] = NULL; + + assert(storesToWB == 0); + + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB) { + + if (storeQueue[storeWBIdx].size == 0 || + storeQueue[storeWBIdx].inst->isDataPrefetch() || + storeQueue[storeWBIdx].committed || + storeQueue[storeWBIdx].req->flags & LOCKED) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + MemReqPtr req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); + + DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), + req->paddr, *(req->data), + storeQueue[storeWBIdx].inst->seqNum); + + switch(storeQueue[storeWBIdx].size) { + case 1: + cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data); + break; + case 2: + cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data); + break; + case 4: + cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data); + break; + case 8: + cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data); + break; + default: + panic("Unexpected store size!\n"); + } + incrStIdx(storeWBIdx); + } +} + +template<class Impl> +void +LSQUnit<Impl>::takeOverFrom() +{ + switchedOut = false; + loads = stores = storesToWB = 0; + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; + + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; +} + +template<class Impl> +void +LSQUnit<Impl>::resizeLQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + assert(size_plus_sentinel >= LQEntries); + + if (size_plus_sentinel > LQEntries) { + while (size_plus_sentinel > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size_plus_sentinel; + } + +} + +template<class Impl> +void +LSQUnit<Impl>::resizeSQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + if (size_plus_sentinel > SQEntries) { + while (size_plus_sentinel > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size_plus_sentinel; + } +} + +template <class Impl> +void +LSQUnit<Impl>::insert(DynInstPtr &inst) +{ + assert(inst->isMemRef()); + + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + + inst->setInLSQ(); +} + +template <class Impl> +void +LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead); + assert(loads < LQEntries); + + DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), loadTail, load_inst->seqNum); + + load_inst->lqIdx = loadTail; + + if (stores == 0) { + load_inst->sqIdx = -1; + } else { + load_inst->sqIdx = storeTail; + } + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template <class Impl> +void +LSQUnit<Impl>::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), storeTail, store_inst->seqNum); + + store_inst->sqIdx = storeTail; + store_inst->lqIdx = loadTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; +} + +template <class Impl> +typename Impl::DynInstPtr +LSQUnit<Impl>::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template <class Impl> +unsigned +LSQUnit<Impl>::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template <class Impl> +int +LSQUnit<Impl>::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template <class Impl> +Fault +LSQUnit<Impl>::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + +// load_fault = inst->initiateAcc(); + load_fault = inst->execute(); + + // If the instruction faulted, then we need to send it along to commit + // without the instruction completing. + if (load_fault != NoFault) { + // Send this instruction to commit, also make sure iew stage + // realizes there is activity. + iewStage->instToCommit(inst); + iewStage->activityThisCycle(); + } + + return load_fault; +} + +template <class Impl> +Fault +LSQUnit<Impl>::executeStore(DynInstPtr &store_inst) +{ + using namespace TheISA; + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + int load_idx = store_inst->lqIdx; + + Fault store_fault = store_inst->initiateAcc(); +// Fault store_fault = store_inst->execute(); + + if (storeQueue[store_idx].size == 0) { + DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + if (!memDepViolator) { + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if ((loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = loadQueue[load_idx]; + + return genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template <class Impl> +void +LSQUnit<Impl>::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->readPC()); + + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template <class Impl> +void +LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template <class Impl> +void +LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + // Mark any stores that are now committed and have not yet + // been marked as able to write back. + if (!storeQueue[store_idx].canWB) { + if (storeQueue[store_idx].inst->seqNum > youngest_inst) { + break; + } + DPRINTF(LSQUnit, "Marking store as able to write back, PC " + "%#x [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + incrStIdx(store_idx); + } +} + +template <class Impl> +void +LSQUnit<Impl>::writebackStores() +{ + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + // Store didn't write any data so no need to write it back to + // memory. + if (storeQueue[storeWBIdx].size == 0) { + completeStore(storeWBIdx); + + incrStIdx(storeWBIdx); + + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(LSQUnit, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + MemReqPtr req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); + + DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), + req->paddr, *(req->data), + storeQueue[storeWBIdx].inst->seqNum); + + switch(storeQueue[storeWBIdx].size) { + case 1: + cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data); + break; + case 2: + cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data); + break; + case 4: + cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data); + break; + case 8: + cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data); + break; + default: + panic("Unexpected store size!\n"); + } + + // Stores other than store conditionals are completed at this + // time. Mark them as completed and, if we have a checker, + // tell it that the instruction is completed. + // @todo: Figure out what time I can say stores are complete in + // the timing memory. + if (!(req->flags & LOCKED)) { + storeQueue[storeWBIdx].inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(storeQueue[storeWBIdx].inst); + } + } + + if (dcacheInterface) { + assert(!req->completionEvent); + StoreCompletionEvent *store_event = new + StoreCompletionEvent(storeWBIdx, NULL, this); + req->completionEvent = store_event; + + MemAccessResult result = dcacheInterface->access(req); + + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + iewStage->replayMemInst(loadQueue[stallingLoadIdx]); + } + + typename IEW::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C should not generate a system port transaction + // if it misses in the cache, but that might be hard + // to accomplish without explicit cache support. + wb = new typename + IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, + iewStage); + store_event->wbEvent = wb; + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { + DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", + storeWBIdx); + + DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // @todo: Increment stat here. + } else { + DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n", + storeWBIdx); + + DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); + } + + incrStIdx(storeWBIdx); + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template <class Impl> +void +LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum) +{ + list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template <class Impl> +void +LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { + DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->seqNum); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + + // Clear the smart pointer to make sure it is decremented. + loadQueue[load_idx]->squashed = true; + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && + storeQueue[store_idx].inst->seqNum > squashed_num) { + // Instructions marked as can WB are already committed. + if (storeQueue[store_idx].canWB) { + break; + } + + DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, " + "idx:%i [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->seqNum); + + // I don't think this can happen. It should have been cleared + // by the stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + // Clear the smart pointer to make sure it is decremented. + storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + if (storeQueue[store_idx].req) { + // There should not be a completion event if the store has + // not yet committed. + assert(!storeQueue[store_idx].req->completionEvent); + } + + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + } +} + +template <class Impl> +void +LSQUnit<Impl>::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. + cpu->activityThisCycle(); + + if (store_idx == storeHead) { + do { + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + + iewStage->updateLSQNextCycle = true; + } + + DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " + "idx:%i\n", + storeQueue[store_idx].inst->seqNum, store_idx, storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + iewStage->replayMemInst(loadQueue[stallingLoadIdx]); + } + + storeQueue[store_idx].inst->setCompleted(); + + // Tell the checker we've completed this instruction. Some stores + // may get reported twice to the checker, but the checker can + // handle that case. + if (cpu->checker) { + cpu->checker->tick(storeQueue[store_idx].inst); + } +} + +template <class Impl> +inline void +LSQUnit<Impl>::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template <class Impl> +inline void +LSQUnit<Impl>::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template <class Impl> +inline void +LSQUnit<Impl>::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template <class Impl> +inline void +LSQUnit<Impl>::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} + +template <class Impl> +void +LSQUnit<Impl>::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("%#x ", loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("Store queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("%#x ", storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} diff --git a/cpu/o3/scoreboard.cc b/cpu/o3/scoreboard.cc new file mode 100644 index 000000000..b0e433620 --- /dev/null +++ b/cpu/o3/scoreboard.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/scoreboard.hh" + +Scoreboard::Scoreboard(unsigned activeThreads, + unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + unsigned _zeroRegIdx) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + zeroRegIdx(_zeroRegIdx) +{ + //Get Register Sizes + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Resize scoreboard appropriately + regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads)); + + //Initialize values + for (int i=0; i < numLogicalIntRegs * activeThreads; i++) { + regScoreBoard[i] = 1; + } + + for (int i= numPhysicalIntRegs; + i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads); + i++) { + regScoreBoard[i] = 1; + } + + for (int i = numPhysicalRegs; + i < numPhysicalRegs + (numMiscRegs * activeThreads); + i++) { + regScoreBoard[i] = 1; + } +} + +std::string +Scoreboard::name() const +{ + return "cpu.scoreboard"; +} + +bool +Scoreboard::getReg(PhysRegIndex phys_reg) +{ + // Always ready if int or fp zero reg. + if (phys_reg == zeroRegIdx || + phys_reg == (zeroRegIdx + numPhysicalIntRegs)) { + return 1; + } + + return regScoreBoard[phys_reg]; +} + +void +Scoreboard::setReg(PhysRegIndex phys_reg) +{ + DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg); + + regScoreBoard[phys_reg] = 1; +} + +void +Scoreboard::unsetReg(PhysRegIndex ready_reg) +{ + if (ready_reg == zeroRegIdx || + ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { + // Don't do anything if int or fp zero reg. + return; + } + + regScoreBoard[ready_reg] = 0; +} diff --git a/cpu/o3/scoreboard.hh b/cpu/o3/scoreboard.hh new file mode 100644 index 000000000..77f2cf157 --- /dev/null +++ b/cpu/o3/scoreboard.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_SCOREBOARD_HH__ +#define __CPU_O3_SCOREBOARD_HH__ + +#include <iostream> +#include <utility> +#include <vector> +#include "arch/alpha/isa_traits.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/comm.hh" + +/** + * Implements a simple scoreboard to track which registers are ready. + * This class assumes that the fp registers start, index wise, right after + * the integer registers. The misc. registers start, index wise, right after + * the fp registers. + * @todo: Fix up handling of the zero register in case the decoder does not + * automatically make insts that write the zero register into nops. + */ +class Scoreboard +{ + public: + /** Constructs a scoreboard. + * @param activeThreads The number of active threads. + * @param _numLogicalIntRegs Number of logical integer registers. + * @param _numPhysicalIntRegs Number of physical integer registers. + * @param _numLogicalFloatRegs Number of logical fp registers. + * @param _numPhysicalFloatRegs Number of physical fp registers. + * @param _numMiscRegs Number of miscellaneous registers. + * @param _zeroRegIdx Index of the zero register. + */ + Scoreboard(unsigned activeThreads, + unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + unsigned _zeroRegIdx); + + /** Destructor. */ + ~Scoreboard() {} + + /** Returns the name of the scoreboard. */ + std::string name() const; + + /** Checks if the register is ready. */ + bool getReg(PhysRegIndex ready_reg); + + /** Sets the register as ready. */ + void setReg(PhysRegIndex phys_reg); + + /** Sets the register as not ready. */ + void unsetReg(PhysRegIndex ready_reg); + + private: + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + std::vector<bool> regScoreBoard; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The logical index of the zero register. */ + int zeroRegIdx; +}; + +#endif diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh new file mode 100644 index 000000000..2c9788e4b --- /dev/null +++ b/cpu/o3/thread_state.hh @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_THREAD_STATE_HH__ +#define __CPU_O3_THREAD_STATE_HH__ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/exec_context.hh" +#include "cpu/thread_state.hh" + +class Event; +class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class FunctionalMemory; +class Process; +#endif + +/** + * Class that has various thread state, such as the status, the + * current instruction being processed, whether or not the thread has + * a trap pending or is being externally updated, the ExecContext + * proxy pointer, etc. It also handles anything related to a specific + * thread's process, such as syscalls and checking valid addresses. + */ +template <class Impl> +struct O3ThreadState : public ThreadState { + typedef ExecContext::Status Status; + typedef typename Impl::FullCPU FullCPU; + + Status _status; + + // Current instruction + TheISA::MachInst inst; + private: + FullCPU *cpu; + public: + + bool inSyscall; + + bool trapPending; + +#if FULL_SYSTEM + O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) + : ThreadState(-1, _thread_num, _mem), + inSyscall(0), trapPending(0) + { } +#else + O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) + : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { } + + O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, + int _asid) + : ThreadState(-1, _thread_num, _mem, NULL, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { } +#endif + + ExecContext *xcProxy; + + ExecContext *getXCProxy() { return xcProxy; } + + Status status() const { return _status; } + + void setStatus(Status new_status) { _status = new_status; } + +#if !FULL_SYSTEM + bool validInstAddr(Addr addr) + { return process->validInstAddr(addr); } + + bool validDataAddr(Addr addr) + { return process->validDataAddr(addr); } +#endif + + bool misspeculating() { return false; } + + void setInst(TheISA::MachInst _inst) { inst = _inst; } + + Counter readFuncExeInst() { return funcExeInst; } + + void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } + +#if !FULL_SYSTEM + void syscall() { process->syscall(xcProxy); } +#endif +}; + +#endif // __CPU_O3_THREAD_STATE_HH__ |