summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/o3')
-rwxr-xr-xsrc/cpu/o3/SConscript86
-rw-r--r--src/cpu/o3/SConsopts34
-rw-r--r--src/cpu/o3/alpha/cpu_builder.cc7
-rw-r--r--src/cpu/o3/alpha/cpu_impl.hh1
-rw-r--r--src/cpu/o3/commit.hh15
-rw-r--r--src/cpu/o3/commit_impl.hh142
-rw-r--r--src/cpu/o3/cpu.cc10
-rw-r--r--src/cpu/o3/cpu.hh16
-rw-r--r--src/cpu/o3/decode_impl.hh6
-rw-r--r--src/cpu/o3/fetch.hh9
-rw-r--r--src/cpu/o3/fetch_impl.hh69
-rw-r--r--src/cpu/o3/iew_impl.hh69
-rw-r--r--src/cpu/o3/inst_queue_impl.hh29
-rw-r--r--src/cpu/o3/lsq_impl.hh13
-rw-r--r--src/cpu/o3/lsq_unit.hh25
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh102
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh10
-rw-r--r--src/cpu/o3/rename_impl.hh10
-rw-r--r--src/cpu/o3/rename_map.cc2
-rw-r--r--src/cpu/o3/rob_impl.hh4
-rw-r--r--src/cpu/o3/sparc/cpu_builder.cc7
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh2
22 files changed, 438 insertions, 230 deletions
diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript
index afbd4c533..bb1dfb613 100755
--- a/src/cpu/o3/SConscript
+++ b/src/cpu/o3/SConscript
@@ -26,52 +26,56 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
-# Authors: Korey Sewell
+# Authors: Nathan Binkert
-import os
-import os.path
import sys
-# Import build environment variable from SConstruct.
-Import('env')
+Import('*')
+if 'O3CPU' in env['CPU_MODELS']:
+ Source('base_dyn_inst.cc')
+ Source('bpred_unit.cc')
+ Source('commit.cc')
+ Source('cpu.cc')
+ Source('decode.cc')
+ Source('fetch.cc')
+ Source('free_list.cc')
+ Source('fu_pool.cc')
+ Source('iew.cc')
+ Source('inst_queue.cc')
+ Source('lsq.cc')
+ Source('lsq_unit.cc')
+ Source('mem_dep_unit.cc')
+ Source('rename.cc')
+ Source('rename_map.cc')
+ Source('rob.cc')
+ Source('scoreboard.cc')
+ Source('store_set.cc')
-#################################################################
-#
-# Include ISA-specific files for the O3 CPU-model
-#
-#################################################################
-
-sources = []
-
-if env['TARGET_ISA'] == 'alpha':
- sources += Split('''
- alpha/dyn_inst.cc
- alpha/cpu.cc
- alpha/thread_context.cc
- alpha/cpu_builder.cc
- ''')
-elif env['TARGET_ISA'] == 'mips':
- sources += Split('''
- mips/dyn_inst.cc
- mips/cpu.cc
- mips/thread_context.cc
- mips/cpu_builder.cc
- ''')
-elif env['TARGET_ISA'] == 'sparc':
- sources += Split('''
- sparc/dyn_inst.cc
- sparc/cpu.cc
- sparc/thread_context.cc
- sparc/cpu_builder.cc
- ''')
-else:
- sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA'])
-
+ if env['TARGET_ISA'] == 'alpha':
+ Source('alpha/cpu.cc')
+ Source('alpha/cpu_builder.cc')
+ Source('alpha/dyn_inst.cc')
+ Source('alpha/thread_context.cc')
+ elif env['TARGET_ISA'] == 'mips':
+ Source('mips/cpu.cc')
+ Source('mips/cpu_builder.cc')
+ Source('mips/dyn_inst.cc')
+ Source('mips/thread_context.cc')
+ elif env['TARGET_ISA'] == 'sparc':
+ Source('sparc/cpu.cc')
+ Source('sparc/cpu_builder.cc')
+ Source('sparc/dyn_inst.cc')
+ Source('sparc/thread_context.cc')
+ else:
+ sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA'])
-# Convert file names to SCons File objects. This takes care of the
-# path relative to the top of the directory tree.
-sources = [File(s) for s in sources]
+ if env['USE_CHECKER']:
+ Source('checker_builder.cc')
-Return('sources')
+if 'O3CPU' in env['CPU_MODELS'] or 'OzoneCPU' in env['CPU_MODELS']:
+ Source('2bit_local_pred.cc')
+ Source('btb.cc')
+ Source('ras.cc')
+ Source('tournament_pred.cc')
diff --git a/src/cpu/o3/SConsopts b/src/cpu/o3/SConsopts
new file mode 100644
index 000000000..040352e6a
--- /dev/null
+++ b/src/cpu/o3/SConsopts
@@ -0,0 +1,34 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+all_cpu_list.append('O3CPU')
+default_cpus.append('O3CPU')
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5a375a4b8..34754d3c5 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
Param<int> clock;
Param<int> phase;
Param<int> numThreads;
+Param<int> cpu_id;
Param<int> activity;
#if FULL_SYSTEM
SimObjectParam<System *> system;
-Param<int> cpu_id;
SimObjectParam<AlphaISA::ITB *> itb;
SimObjectParam<AlphaISA::DTB *> dtb;
Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(clock, "clock speed"),
INIT_PARAM_DFLT(phase, "clock phase", 0),
INIT_PARAM(numThreads, "number of HW thread contexts"),
+ INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM_DFLT(activity, "Initial activity count", 0),
#if FULL_SYSTEM
INIT_PARAM(system, "System object"),
- INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
AlphaSimpleParams *params = new AlphaSimpleParams;
params->clock = clock;
+ params->phase = phase;
params->name = getInstanceName();
params->numberOfThreads = actual_num_threads;
+ params->cpu_id = cpu_id;
params->activity = activity;
#if FULL_SYSTEM
params->system = system;
- params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
params->profile = profile;
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index b91972704..304ee6c38 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
#endif
// Give the thread the TC.
this->thread[i]->tc = tc;
+ this->thread[i]->setCpuId(params->cpu_id);
// Add the TC to the CPU's list of TC's.
this->threadContexts.push_back(tc);
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 0d7d82529..e2ad23954 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -247,6 +247,11 @@ class DefaultCommit
/** Handles squashing due to an TC write. */
void squashFromTC(unsigned tid);
+#if FULL_SYSTEM
+ /** Handles processing an interrupt. */
+ void handleInterrupt();
+#endif // FULL_SYSTEM
+
/** Commits as many instructions as possible. */
void commitInsts();
@@ -409,6 +414,16 @@ class DefaultCommit
/** The sequence number of the youngest valid instruction in the ROB. */
InstSeqNum youngestSeqNum[Impl::MaxThreads];
+ /** Records if there is a trap currently in flight. */
+ bool trapInFlight[Impl::MaxThreads];
+
+ /** Records if there were any stores committed this cycle. */
+ bool committedStores[Impl::MaxThreads];
+
+ /** Records if commit should check if the ROB is truly empty (see
+ commit_impl.hh). */
+ bool checkEmptyROB[Impl::MaxThreads];
+
/** Pointer to the list of active threads. */
std::list<unsigned> *activeThreads;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 18fb2aaa3..65e36d99a 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -96,7 +96,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
if (policy == "aggressive"){
commitPolicy = Aggressive;
- DPRINTF(Commit,"Commit Policy set to Aggressive.");
+// DPRINTF(Commit,"Commit Policy set to Aggressive.");
} else if (policy == "roundrobin"){
commitPolicy = RoundRobin;
@@ -105,11 +105,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
priority_list.push_back(tid);
}
- DPRINTF(Commit,"Commit Policy set to Round Robin.");
+// DPRINTF(Commit,"Commit Policy set to Round Robin.");
} else if (policy == "oldestready"){
commitPolicy = OldestReady;
- DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
+// DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
} else {
assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive,"
"RoundRobin,OldestReady}");
@@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
for (int i=0; i < numThreads; i++) {
commitStatus[i] = Idle;
changedROBNumEntries[i] = false;
+ checkEmptyROB[i] = false;
+ trapInFlight[i] = false;
+ committedStores[i] = false;
trapSquash[i] = false;
tcSquash[i] = false;
PC[i] = nextPC[i] = nextNPC[i] = 0;
@@ -226,8 +229,8 @@ template <class Impl>
void
DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
{
- DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
cpu = cpu_ptr;
+ DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
// Commit must broadcast the number of free entries it has at the start of
// the simulation, so it starts as active.
@@ -247,7 +250,6 @@ template <class Impl>
void
DefaultCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
- DPRINTF(Commit, "Commit: Setting time buffer pointer.\n");
timeBuffer = tb_ptr;
// Setup wire to send information back to IEW.
@@ -261,7 +263,6 @@ template <class Impl>
void
DefaultCommit<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
- DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n");
fetchQueue = fq_ptr;
// Setup wire to get instructions from rename (for the ROB).
@@ -272,7 +273,6 @@ template <class Impl>
void
DefaultCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
- DPRINTF(Commit, "Commit: Setting rename queue pointer.\n");
renameQueue = rq_ptr;
// Setup wire to get instructions from rename (for the ROB).
@@ -283,7 +283,6 @@ template <class Impl>
void
DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
{
- DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n");
iewQueue = iq_ptr;
// Setup wire to get instructions from IEW.
@@ -301,7 +300,6 @@ template<class Impl>
void
DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
- DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
activeThreads = at_ptr;
}
@@ -309,8 +307,6 @@ template <class Impl>
void
DefaultCommit<Impl>::setRenameMap(RenameMap rm_ptr[])
{
- DPRINTF(Commit, "Setting rename map pointers.\n");
-
for (int i=0; i < numThreads; i++) {
renameMap[i] = &rm_ptr[i];
}
@@ -320,7 +316,6 @@ template <class Impl>
void
DefaultCommit<Impl>::setROB(ROB *rob_ptr)
{
- DPRINTF(Commit, "Commit: Setting ROB pointer.\n");
rob = rob_ptr;
}
@@ -335,6 +330,7 @@ DefaultCommit<Impl>::initStage()
for (int i=0; i < numThreads; i++) {
toIEW->commitInfo[i].usedROB = true;
toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+ toIEW->commitInfo[i].emptyROB = true;
}
cpu->activityThisCycle();
@@ -473,14 +469,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
TrapEvent *trap = new TrapEvent(this, tid);
trap->schedule(curTick + trapLatency);
-
- thread[tid]->trapPending = true;
+ trapInFlight[tid] = true;
}
template <class Impl>
void
DefaultCommit<Impl>::generateTCEvent(unsigned tid)
{
+ assert(!trapInFlight[tid]);
DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
tcSquash[tid] = true;
@@ -495,7 +491,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
// Hopefully this doesn't mess things up. Basically I want to squash
// all instructions of this thread.
InstSeqNum squashed_inst = rob->isEmpty() ?
- 0 : rob->readHeadInst(tid)->seqNum - 1;;
+ 0 : rob->readHeadInst(tid)->seqNum - 1;
// All younger instructions will be squashed. Set the sequence
// number as the youngest instruction in the ROB (0 in this case.
@@ -532,6 +528,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
thread[tid]->trapPending = false;
thread[tid]->inSyscall = false;
+ trapInFlight[tid] = false;
trapSquash[tid] = false;
@@ -580,6 +577,10 @@ DefaultCommit<Impl>::tick()
while (threads != end) {
unsigned tid = *threads++;
+ // Clear the bit saying if the thread has committed stores
+ // this cycle.
+ committedStores[tid] = false;
+
if (commitStatus[tid] == ROBSquashing) {
if (rob->isDoneSquashing(tid)) {
@@ -635,16 +636,11 @@ DefaultCommit<Impl>::tick()
updateStatus();
}
+#if FULL_SYSTEM
template <class Impl>
void
-DefaultCommit<Impl>::commit()
+DefaultCommit<Impl>::handleInterrupt()
{
-
- //////////////////////////////////////
- // Check for interrupts
- //////////////////////////////////////
-
-#if FULL_SYSTEM
if (interrupt != NoFault) {
// Wait until the ROB is empty and all stores have drained in
// order to enter the interrupt.
@@ -653,6 +649,12 @@ DefaultCommit<Impl>::commit()
// an interrupt needed to be handled.
DPRINTF(Commit, "Interrupt detected.\n");
+ Fault new_interrupt = cpu->getInterrupts();
+ assert(new_interrupt != NoFault);
+
+ // Clear the interrupt now that it's going to be handled
+ toIEW->commitInfo[0].clearInterrupt = true;
+
assert(!thread[0]->inSyscall);
thread[0]->inSyscall = true;
@@ -666,16 +668,14 @@ DefaultCommit<Impl>::commit()
// Generate trap squash event.
generateTrapEvent(0);
- // Clear the interrupt now that it's been handled
- toIEW->commitInfo[0].clearInterrupt = true;
interrupt = NoFault;
} else {
DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
}
- } else if (cpu->check_interrupts(cpu->tcBase(0)) &&
- commitStatus[0] != TrapPending &&
- !trapSquash[0] &&
- !tcSquash[0]) {
+ } else if (commitStatus[0] != TrapPending &&
+ cpu->check_interrupts(cpu->tcBase(0)) &&
+ !trapSquash[0] &&
+ !tcSquash[0]) {
// Process interrupts if interrupts are enabled, not in PAL
// mode, and no other traps or external squashes are currently
// pending.
@@ -691,7 +691,21 @@ DefaultCommit<Impl>::commit()
toIEW->commitInfo[0].interruptPending = true;
}
}
+}
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+DefaultCommit<Impl>::commit()
+{
+#if FULL_SYSTEM
+ // Check for any interrupt, and start processing it. Or if we
+ // have an outstanding interrupt and are at a point when it is
+ // valid to take an interrupt, process it.
+ if (cpu->check_interrupts(cpu->tcBase(0))) {
+ handleInterrupt();
+ }
#endif // FULL_SYSTEM
////////////////////////////////////
@@ -709,6 +723,7 @@ DefaultCommit<Impl>::commit()
assert(!tcSquash[tid]);
squashFromTrap(tid);
} else if (tcSquash[tid] == true) {
+ assert(commitStatus[tid] != TrapPending);
squashFromTC(tid);
}
@@ -753,6 +768,7 @@ DefaultCommit<Impl>::commit()
bdelay_done_seq_num--;
#endif
}
+
// All younger instructions will be squashed. Set the sequence
// number as the youngest instruction in the ROB.
youngestSeqNum[tid] = squashed_inst;
@@ -817,13 +833,29 @@ DefaultCommit<Impl>::commit()
toIEW->commitInfo[tid].usedROB = true;
toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
- if (rob->isEmpty(tid)) {
- toIEW->commitInfo[tid].emptyROB = true;
- }
-
wroteToTimeBuffer = true;
changedROBNumEntries[tid] = false;
+ if (rob->isEmpty(tid))
+ checkEmptyROB[tid] = true;
}
+
+ // ROB is only considered "empty" for previous stages if: a)
+ // ROB is empty, b) there are no outstanding stores, c) IEW
+ // stage has received any information regarding stores that
+ // committed.
+ // c) is checked by making sure to not consider the ROB empty
+ // on the same cycle as when stores have been committed.
+ // @todo: Make this handle multi-cycle communication between
+ // commit and IEW.
+ if (checkEmptyROB[tid] && rob->isEmpty(tid) &&
+ !iewStage->hasStoresToWB() && !committedStores[tid]) {
+ checkEmptyROB[tid] = false;
+ toIEW->commitInfo[tid].usedROB = true;
+ toIEW->commitInfo[tid].emptyROB = true;
+ toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+ wroteToTimeBuffer = true;
+ }
+
}
}
@@ -966,8 +998,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// and committed this instruction.
thread[tid]->funcExeInst--;
- head_inst->setAtCommit();
-
if (head_inst->isNonSpeculative() ||
head_inst->isStoreConditional() ||
head_inst->isMemBarrier() ||
@@ -977,19 +1007,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
"instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
head_inst->seqNum, head_inst->readPC());
- // Hack to make sure syscalls/memory barriers/quiesces
- // aren't executed until all stores write back their data.
- // This direct communication shouldn't be used for
- // anything other than this.
- if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
- head_inst->isQuiesce()) &&
- iewStage->hasStoresToWB())
- {
+ if (inst_num > 0 || iewStage->hasStoresToWB()) {
DPRINTF(Commit, "Waiting for all stores to writeback.\n");
return false;
- } else if (inst_num > 0 || iewStage->hasStoresToWB()) {
- DPRINTF(Commit, "Waiting to become head of commit.\n");
- return false;
}
toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
@@ -1002,6 +1022,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
return false;
} else if (head_inst->isLoad()) {
+ if (inst_num > 0 || iewStage->hasStoresToWB()) {
+ DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+ return false;
+ }
+
+ assert(head_inst->uncacheable());
DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
head_inst->seqNum, head_inst->readPC());
@@ -1025,8 +1051,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
panic("Thread sync instructions are not handled yet.\n");
}
+ // Check if the instruction caused a fault. If so, trap.
+ Fault inst_fault = head_inst->getFault();
+
// Stores mark themselves as completed.
- if (!head_inst->isStore()) {
+ if (!head_inst->isStore() && inst_fault == NoFault) {
head_inst->setCompleted();
}
@@ -1038,9 +1067,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
}
#endif
- // Check if the instruction caused a fault. If so, trap.
- Fault inst_fault = head_inst->getFault();
-
// DTB will sometimes need the machine instruction for when
// faults happen. So we will set it here, prior to the DTB
// possibly needing it for its fault.
@@ -1048,7 +1074,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
if (inst_fault != NoFault) {
- head_inst->setCompleted();
DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
head_inst->seqNum, head_inst->readPC());
@@ -1057,6 +1082,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
return false;
}
+ head_inst->setCompleted();
+
#if USE_CHECKER
if (cpu->checker && head_inst->isStore()) {
cpu->checker->verify(head_inst);
@@ -1082,6 +1109,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
commitStatus[tid] = TrapPending;
+ if (head_inst->traceData) {
+ head_inst->traceData->setFetchSeq(head_inst->seqNum);
+ head_inst->traceData->setCPSeq(thread[tid]->numInst);
+ head_inst->traceData->dump();
+ delete head_inst->traceData;
+ head_inst->traceData = NULL;
+ }
+
// Generate trap squash event.
generateTrapEvent(tid);
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
@@ -1123,6 +1158,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Finally clear the head ROB entry.
rob->retireHead(tid);
+ // If this was a store, record it for this cycle.
+ if (head_inst->isStore())
+ committedStores[tid] = true;
+
// Return true to indicate that we have committed an instruction.
return true;
}
@@ -1167,7 +1206,8 @@ DefaultCommit<Impl>::getInsts()
int tid = inst->threadNumber;
if (!inst->isSquashed() &&
- commitStatus[tid] != ROBSquashing) {
+ commitStatus[tid] != ROBSquashing &&
+ commitStatus[tid] != TrapPending) {
changedROBNumEntries[tid] = true;
DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 38e6a0b5b..354e3c490 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick()
lastRunningCycle = curTick;
timesIdled++;
} else {
- tickEvent.schedule(curTick + cycles(1));
+ tickEvent.schedule(nextCycle(curTick + cycles(1)));
DPRINTF(O3CPU, "Scheduling next tick!\n");
}
}
@@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume()
#endif
if (!tickEvent.scheduled())
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
_status = Running;
}
@@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
ThreadContext *tc = threadContexts[i];
if (tc->status() == ThreadContext::Active && _status != Running) {
_status = Running;
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
}
}
if (!tickEvent.scheduled())
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
}
template <class Impl>
@@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU()
idleCycles += (curTick - 1) - lastRunningCycle;
- tickEvent.schedule(curTick);
+ tickEvent.schedule(nextCycle());
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ea374dd57..0ab20ba2a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU
void scheduleTickEvent(int delay)
{
if (tickEvent.squashed())
- tickEvent.reschedule(curTick + cycles(delay));
+ tickEvent.reschedule(nextCycle(curTick + cycles(delay)));
else if (!tickEvent.scheduled())
- tickEvent.schedule(curTick + cycles(delay));
+ tickEvent.schedule(nextCycle(curTick + cycles(delay)));
}
/** Unschedule tick event, regardless of its current state. */
@@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU
{
// Schedule thread to activate, regardless of its current state.
if (activateThreadEvent[tid].squashed())
- activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+ activateThreadEvent[tid].
+ reschedule(nextCycle(curTick + cycles(delay)));
else if (!activateThreadEvent[tid].scheduled())
- activateThreadEvent[tid].schedule(curTick + cycles(delay));
+ activateThreadEvent[tid].
+ schedule(nextCycle(curTick + cycles(delay)));
}
/** Unschedule actiavte thread event, regardless of its current state. */
@@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU
{
// Schedule thread to activate, regardless of its current state.
if (deallocateContextEvent[tid].squashed())
- deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+ deallocateContextEvent[tid].
+ reschedule(nextCycle(curTick + cycles(delay)));
else if (!deallocateContextEvent[tid].scheduled())
- deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+ deallocateContextEvent[tid].
+ schedule(nextCycle(curTick + cycles(delay)));
}
/** Unschedule thread deallocation in CPU */
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 79a0bfdbf..93d02bfcd 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -114,15 +114,14 @@ template<class Impl>
void
DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr)
{
- DPRINTF(Decode, "Setting CPU pointer.\n");
cpu = cpu_ptr;
+ DPRINTF(Decode, "Setting CPU pointer.\n");
}
template<class Impl>
void
DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
- DPRINTF(Decode, "Setting time buffer pointer.\n");
timeBuffer = tb_ptr;
// Setup wire to write information back to fetch.
@@ -138,7 +137,6 @@ template<class Impl>
void
DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
- DPRINTF(Decode, "Setting decode queue pointer.\n");
decodeQueue = dq_ptr;
// Setup wire to write information to proper place in decode queue.
@@ -149,7 +147,6 @@ template<class Impl>
void
DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
- DPRINTF(Decode, "Setting fetch queue pointer.\n");
fetchQueue = fq_ptr;
// Setup wire to read information from fetch queue.
@@ -160,7 +157,6 @@ template<class Impl>
void
DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
- DPRINTF(Decode, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
}
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 8347ed775..811f4d2bc 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -33,6 +33,7 @@
#define __CPU_O3_FETCH_HH__
#include "arch/utility.hh"
+#include "arch/predecoder.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/pc_event.hh"
@@ -85,6 +86,8 @@ class DefaultFetch
bool snoopRangeSent;
+ virtual void setPeer(Port *port);
+
protected:
/** Atomic version of receive. Panics. */
virtual Tick recvAtomic(PacketPtr pkt);
@@ -183,6 +186,9 @@ class DefaultFetch
/** Initialize stage. */
void initStage();
+ /** Tells the fetch stage that the Icache is set. */
+ void setIcache();
+
/** Processes cache completion event. */
void processCacheCompletion(PacketPtr pkt);
@@ -338,6 +344,9 @@ class DefaultFetch
/** BPredUnit. */
BPredUnit branchPred;
+ /** Predecoder. */
+ TheISA::Predecoder predecoder;
+
/** Per-thread fetch PC. */
Addr PC[Impl::MaxThreads];
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index ac0149d18..85885906d 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -51,6 +51,15 @@
#include <algorithm>
template<class Impl>
+void
+DefaultFetch<Impl>::IcachePort::setPeer(Port *port)
+{
+ Port::setPeer(port);
+
+ fetch->setIcache();
+}
+
+template<class Impl>
Tick
DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
{
@@ -103,6 +112,7 @@ DefaultFetch<Impl>::IcachePort::recvRetry()
template<class Impl>
DefaultFetch<Impl>::DefaultFetch(Params *params)
: branchPred(params),
+ predecoder(NULL),
decodeToFetchDelay(params->decodeToFetchDelay),
renameToFetchDelay(params->renameToFetchDelay),
iewToFetchDelay(params->iewToFetchDelay),
@@ -256,8 +266,8 @@ template<class Impl>
void
DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
{
- DPRINTF(Fetch, "Setting the CPU pointer.\n");
cpu = cpu_ptr;
+ DPRINTF(Fetch, "Setting the CPU pointer.\n");
// Name is finally available, so create the port.
icachePort = new IcachePort(this);
@@ -282,7 +292,6 @@ template<class Impl>
void
DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
{
- DPRINTF(Fetch, "Setting the time buffer pointer.\n");
timeBuffer = time_buffer;
// Create wires to get information from proper places in time buffer.
@@ -296,7 +305,6 @@ template<class Impl>
void
DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
- DPRINTF(Fetch, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
}
@@ -304,7 +312,6 @@ template<class Impl>
void
DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
- DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
fetchQueue = fq_ptr;
// Create wire to write information to proper place in fetch queue.
@@ -322,12 +329,6 @@ DefaultFetch<Impl>::initStage()
nextNPC[tid] = cpu->readNextNPC(tid);
}
- // Size of cache block.
- cacheBlkSize = icachePort->peerBlockSize();
-
- // Create mask to get rid of offset bits.
- cacheBlkMask = (cacheBlkSize - 1);
-
for (int tid=0; tid < numThreads; tid++) {
fetchStatus[tid] = Running;
@@ -336,11 +337,6 @@ DefaultFetch<Impl>::initStage()
memReq[tid] = NULL;
- // Create space to store a cache line.
- cacheData[tid] = new uint8_t[cacheBlkSize];
- cacheDataPC[tid] = 0;
- cacheDataValid[tid] = false;
-
stalls[tid].decode = false;
stalls[tid].rename = false;
stalls[tid].iew = false;
@@ -350,6 +346,24 @@ DefaultFetch<Impl>::initStage()
template<class Impl>
void
+DefaultFetch<Impl>::setIcache()
+{
+ // Size of cache block.
+ cacheBlkSize = icachePort->peerBlockSize();
+
+ // Create mask to get rid of offset bits.
+ cacheBlkMask = (cacheBlkSize - 1);
+
+ for (int tid=0; tid < numThreads; tid++) {
+ // Create space to store a cache line.
+ cacheData[tid] = new uint8_t[cacheBlkSize];
+ cacheDataPC[tid] = 0;
+ cacheDataValid[tid] = false;
+ }
+}
+
+template<class Impl>
+void
DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
{
unsigned tid = pkt->req->getThreadNum();
@@ -619,6 +633,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
fault = TheISA::genMachineCheckFault();
delete mem_req;
memReq[tid] = NULL;
+ warn("Bad address!\n");
}
assert(retryPkt == NULL);
assert(retryTid == -1);
@@ -669,11 +684,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
// Get rid of the retrying packet if it was from this thread.
if (retryTid == tid) {
assert(cacheBlocked);
- cacheBlocked = false;
- retryTid = -1;
- delete retryPkt->req;
- delete retryPkt;
+ if (retryPkt) {
+ delete retryPkt->req;
+ delete retryPkt;
+ }
retryPkt = NULL;
+ retryTid = -1;
}
fetchStatus[tid] = Squashing;
@@ -1117,13 +1133,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
(&cacheData[tid][offset]));
-#if THE_ISA == ALPHA_ISA
- ext_inst = TheISA::makeExtMI(inst, fetch_PC);
-#elif THE_ISA == SPARC_ISA
- ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
-#elif THE_ISA == MIPS_ISA
- ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
-#endif
+ predecoder.setTC(cpu->thread[tid]->getTC());
+ predecoder.moreBytes(fetch_PC, 0, inst);
+
+ ext_inst = predecoder.getExtMachInst();
// Create a new DynInst from the instruction fetched.
DynInstPtr instruction = new DynInst(ext_inst,
@@ -1152,7 +1165,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
///FIXME This needs to be more robust in dealing with delay slots
#if !ISA_HAS_DELAY_SLOT
- predicted_branch |=
+// predicted_branch |=
#endif
lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
predicted_branch |= (next_PC != fetch_NPC);
@@ -1223,7 +1236,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
// until commit handles the fault. The only other way it can
// wake up is if a squash comes along and changes the PC.
#if FULL_SYSTEM
- assert(numInst != fetchWidth);
+ assert(numInst < fetchWidth);
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
// We will use a nop in order to carry the fault.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index f24eaf2c4..d2948a525 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -282,8 +282,8 @@ template<class Impl>
void
DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr)
{
- DPRINTF(IEW, "Setting CPU pointer.\n");
cpu = cpu_ptr;
+ DPRINTF(IEW, "Setting CPU pointer.\n");
instQueue.setCPU(cpu_ptr);
ldstQueue.setCPU(cpu_ptr);
@@ -295,7 +295,6 @@ template<class Impl>
void
DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
- DPRINTF(IEW, "Setting time buffer pointer.\n");
timeBuffer = tb_ptr;
// Setup wire to read information from time buffer, from commit.
@@ -314,7 +313,6 @@ template<class Impl>
void
DefaultIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
- DPRINTF(IEW, "Setting rename queue pointer.\n");
renameQueue = rq_ptr;
// Setup wire to read information from rename queue.
@@ -325,7 +323,6 @@ template<class Impl>
void
DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
{
- DPRINTF(IEW, "Setting IEW queue pointer.\n");
iewQueue = iq_ptr;
// Setup wire to write instructions to commit.
@@ -336,7 +333,6 @@ template<class Impl>
void
DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
- DPRINTF(IEW, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
ldstQueue.setActiveThreads(at_ptr);
@@ -347,7 +343,6 @@ template<class Impl>
void
DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
{
- DPRINTF(IEW, "Setting scoreboard pointer.\n");
scoreboard = sb_ptr;
}
@@ -1153,19 +1148,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
inst->setCanCommit();
instQueue.insertBarrier(inst);
add_to_iq = false;
- } else if (inst->isNonSpeculative()) {
- DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
- "encountered, skipping.\n", tid);
-
- // Same as non-speculative stores.
- inst->setCanCommit();
-
- // Specifically insert it as nonspeculative.
- instQueue.insertNonSpec(inst);
-
- ++iewDispNonSpecInsts;
-
- add_to_iq = false;
} else if (inst->isNop()) {
DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
"skipping.\n", tid);
@@ -1193,6 +1175,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
} else {
add_to_iq = true;
}
+ if (inst->isNonSpeculative()) {
+ DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+ "encountered, skipping.\n", tid);
+
+ // Same as non-speculative stores.
+ inst->setCanCommit();
+
+ // Specifically insert it as nonspeculative.
+ instQueue.insertNonSpec(inst);
+
+ ++iewDispNonSpecInsts;
+
+ add_to_iq = false;
+ }
// If the instruction queue is not full, then add the
// instruction.
@@ -1379,6 +1375,7 @@ DefaultIEW<Impl>::executeInsts()
predictedNotTakenIncorrect++;
}
} else if (ldstQueue.violation(tid)) {
+ assert(inst->isMemRef());
// If there was an ordering violation, then get the
// DynInst that caused the violation. Note that this
// clears the violation signal.
@@ -1391,10 +1388,10 @@ DefaultIEW<Impl>::executeInsts()
// Ensure the violating instruction is older than
// current squash
- if (fetchRedirect[tid] &&
- violator->seqNum >= toCommit->squashedSeqNum[tid])
+/* if (fetchRedirect[tid] &&
+ violator->seqNum >= toCommit->squashedSeqNum[tid] + 1)
continue;
-
+*/
fetchRedirect[tid] = true;
// Tell the instruction queue that a violation has occured.
@@ -1414,6 +1411,33 @@ DefaultIEW<Impl>::executeInsts()
squashDueToMemBlocked(inst, tid);
}
+ } else {
+ // Reset any state associated with redirects that will not
+ // be used.
+ if (ldstQueue.violation(tid)) {
+ assert(inst->isMemRef());
+
+ DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
+
+ DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
+ "%#x, inst PC: %#x. Addr is: %#x.\n",
+ violator->readPC(), inst->readPC(), inst->physEffAddr);
+ DPRINTF(IEW, "Violation will not be handled because "
+ "already squashing\n");
+
+ ++memOrderViolationEvents;
+ }
+ if (ldstQueue.loadBlocked(tid) &&
+ !ldstQueue.isLoadBlockedHandled(tid)) {
+ DPRINTF(IEW, "Load operation couldn't execute because the "
+ "memory system is blocked. PC: %#x [sn:%lli]\n",
+ inst->readPC(), inst->seqNum);
+ DPRINTF(IEW, "Blocked load will not be handled because "
+ "already squashing\n");
+
+ ldstQueue.setLoadBlockedHandled(tid);
+ }
+
}
}
@@ -1563,6 +1587,7 @@ DefaultIEW<Impl>::tick()
//DPRINTF(IEW,"NonspecInst from thread %i",tid);
if (fromCommit->commitInfo[tid].uncached) {
instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+ fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
} else {
instQueue.scheduleNonSpec(
fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index d5781d89d..4d99fb520 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -81,8 +81,6 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
// Set the number of physical registers as the number of int + float
numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
- DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
-
//Create an entry for each physical register within the
//dependency graph.
dependGraph.resize(numPhysRegs);
@@ -124,8 +122,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
maxEntries[i] = part_amt;
}
+/*
DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
"%i entries per thread.\n",part_amt);
+*/
} else if (policy == "threshold") {
iqPolicy = Threshold;
@@ -139,8 +139,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
maxEntries[i] = thresholdIQ;
}
+/*
DPRINTF(IQ, "IQ sharing policy set to Threshold:"
"%i entries per thread.\n",thresholdIQ);
+*/
} else {
assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
"Partitioned, Threshold}");
@@ -360,7 +362,6 @@ template <class Impl>
void
InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
- DPRINTF(IQ, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
}
@@ -368,15 +369,13 @@ template <class Impl>
void
InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
{
- DPRINTF(IQ, "Set the issue to execute queue.\n");
- issueToExecuteQueue = i2e_ptr;
+ issueToExecuteQueue = i2e_ptr;
}
template <class Impl>
void
InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
- DPRINTF(IQ, "Set the time buffer.\n");
timeBuffer = tb_ptr;
fromCommit = timeBuffer->getWire(-commitToIEWDelay);
@@ -829,6 +828,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
unsigned tid = (*inst_it).second->threadNumber;
+ (*inst_it).second->setAtCommit();
+
(*inst_it).second->setCanIssue();
if (!(*inst_it).second->isMemRef()) {
@@ -960,6 +961,8 @@ template <class Impl>
void
InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
{
+ DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+ resched_inst->clearCanIssue();
memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
}
@@ -984,7 +987,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
completed_inst->memOpDone = true;
memDepUnit[tid].completed(completed_inst);
-
count[tid]--;
}
@@ -1084,16 +1086,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
++iqSquashedOperandsExamined;
}
- } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+ } else if (!squashed_inst->isStoreConditional() ||
+ !squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
assert(ns_inst_it != nonSpecInsts.end());
+ if (ns_inst_it == nonSpecInsts.end()) {
+ assert(squashed_inst->getFault() != NoFault);
+ } else {
- (*ns_inst_it).second = NULL;
+ (*ns_inst_it).second = NULL;
- nonSpecInsts.erase(ns_inst_it);
+ nonSpecInsts.erase(ns_inst_it);
- ++iqSquashedNonSpecRemoved;
+ ++iqSquashedNonSpecRemoved;
+ }
}
// Might want to also clear out the head of the dependency graph.
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index d4994fcb7..02cc5784c 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -112,8 +112,6 @@ LSQ<Impl>::LSQ(Params *params)
SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
retryTid(-1)
{
- DPRINTF(LSQ, "Creating LSQ object.\n");
-
dcachePort.snoopRangeSent = false;
//**********************************************/
@@ -131,20 +129,20 @@ LSQ<Impl>::LSQ(Params *params)
maxLQEntries = LQEntries;
maxSQEntries = SQEntries;
-
+/*
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
+*/
} else if (policy == "partitioned") {
lsqPolicy = Partitioned;
//@todo:make work if part_amt doesnt divide evenly.
maxLQEntries = LQEntries / numThreads;
maxSQEntries = SQEntries / numThreads;
-
+/*
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
"%i entries per LQ | %i entries per SQ",
maxLQEntries,maxSQEntries);
-
+*/
} else if (policy == "threshold") {
lsqPolicy = Threshold;
@@ -156,10 +154,11 @@ LSQ<Impl>::LSQ(Params *params)
//amount of the LSQ
maxLQEntries = params->smtLSQThreshold;
maxSQEntries = params->smtLSQThreshold;
-
+/*
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
"%i entries per LQ | %i entries per SQ",
maxLQEntries,maxSQEntries);
+*/
} else {
assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2419afe29..1b10843f5 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
(load_idx != loadHead || !load_inst->isAtCommit())) {
iewStage->rescheduleMemInst(load_inst);
++lsqRescheduledLoads;
+
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the proper
+ // place to really handle request deletes.
+ delete req;
return TheISA::genMachineCheckFault();
}
@@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
if (store_size == 0)
continue;
+ else if (storeQueue[store_idx].inst->uncacheable())
+ continue;
+
+ assert(storeQueue[store_idx].inst->effAddrValid);
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
@@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
storeQueue[store_idx].inst->effAddr;
// If the store's data has all of the data needed, we can forward.
- if (store_has_lower_limit && store_has_upper_limit) {
+ if ((store_has_lower_limit && store_has_upper_limit)) {
// Get shift amount for offset into the store's data.
int shift_amt = req->getVaddr() & (store_size - 1);
// @todo: Magic number, assumes byte addressing
@@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// If it's already been written back, then don't worry about
// stalling on it.
if (storeQueue[store_idx].completed) {
+ panic("Should not check one of these");
continue;
}
@@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
iewStage->decrWb(load_inst->seqNum);
+ load_inst->clearIssued();
++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
@@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
"Store idx %i to load addr %#x\n",
store_idx, req->getVaddr());
- ++lsqBlockedLoads;
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the
+ // proper place to really handle request deletes.
+ delete req;
+
return NoFault;
}
}
@@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// Delete state and data packet because a load retry
// initiates a pipeline restart; it does not retry.
delete state;
+ delete data_pkt->req;
delete data_pkt;
+ req = NULL;
+
if (result == Packet::BadAddress) {
return TheISA::genMachineCheckFault();
}
@@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// If the cache was blocked, or has become blocked due to the access,
// handle it.
if (lsq->cacheBlocked()) {
+ if (req)
+ delete req;
+
++lsqCacheBlocked;
iewStage->decrWb(load_inst->seqNum);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3ba22a530..0a3021046 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
if (isSwitchedOut() || inst->isSquashed()) {
iewStage->decrWb(inst->seqNum);
delete state;
+ delete pkt->req;
delete pkt;
return;
} else {
@@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
delete state;
+ delete pkt->req;
delete pkt;
}
@@ -110,7 +112,7 @@ void
LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id)
{
- DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+// DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
switchedOut = false;
@@ -403,12 +405,15 @@ template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
{
+ using namespace TheISA;
// Execute a specific load.
Fault load_fault = NoFault;
DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
inst->readPC(),inst->seqNum);
+ assert(!inst->isSquashed());
+
load_fault = inst->initiateAcc();
// If the instruction faulted, then we need to send it along to commit
@@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
// realizes there is activity.
// Mark it as executed unless it is an uncached load that
// needs to hit the head of commit.
- if (!(inst->req && inst->req->isUncacheable()) ||
+ if (!(inst->hasRequest() && inst->uncacheable()) ||
inst->isAtCommit()) {
inst->setExecuted();
}
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
+ } else if (!loadBlocked()) {
+ assert(inst->effAddrValid);
+ int load_idx = inst->lqIdx;
+ incrLdIdx(load_idx);
+ while (load_idx != loadTail) {
+ // Really only need to check loads that have actually executed
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
+ // @todo: Fix this, magic number being used here
+ if (loadQueue[load_idx]->effAddrValid &&
+ (loadQueue[load_idx]->effAddr >> 8) ==
+ (inst->effAddr >> 8)) {
+ // A load incorrectly passed this load. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ DynInstPtr violator = loadQueue[load_idx];
+ if (!memDepViolator ||
+ (violator->seqNum < memDepViolator->seqNum)) {
+ memDepViolator = violator;
+ } else {
+ break;
+ }
+
+ ++lsqMemOrderViolation;
+
+ return genMachineCheckFault();
+ }
+
+ incrLdIdx(load_idx);
+ }
}
return load_fault;
@@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
store_inst->readPC(), store_inst->seqNum);
+ assert(!store_inst->isSquashed());
+
// Check the recently completed loads to see if any match this store's
// address. If so, then we have a memory ordering violation.
int load_idx = store_inst->lqIdx;
@@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
++storesToWB;
}
- if (!memDepViolator) {
- while (load_idx != loadTail) {
- // Really only need to check loads that have actually executed
- // It's safe to check all loads because effAddr is set to
- // InvalAddr when the dyn inst is created.
-
- // @todo: For now this is extra conservative, detecting a
- // violation if the addresses match assuming all accesses
- // are quad word accesses.
-
- // @todo: Fix this, magic number being used here
- if ((loadQueue[load_idx]->effAddr >> 8) ==
- (store_inst->effAddr >> 8)) {
- // A load incorrectly passed this store. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- memDepViolator = loadQueue[load_idx];
- ++lsqMemOrderViolation;
-
- return genMachineCheckFault();
+ assert(store_inst->effAddrValid);
+ while (load_idx != loadTail) {
+ // Really only need to check loads that have actually executed
+ // It's safe to check all loads because effAddr is set to
+ // InvalAddr when the dyn inst is created.
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
+ // @todo: Fix this, magic number being used here
+ if (loadQueue[load_idx]->effAddrValid &&
+ (loadQueue[load_idx]->effAddr >> 8) ==
+ (store_inst->effAddr >> 8)) {
+ // A load incorrectly passed this store. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ DynInstPtr violator = loadQueue[load_idx];
+ if (!memDepViolator ||
+ (violator->seqNum < memDepViolator->seqNum)) {
+ memDepViolator = violator;
+ } else {
+ break;
}
- incrLdIdx(load_idx);
+ ++lsqMemOrderViolation;
+
+ return genMachineCheckFault();
}
- // If we've reached this point, there was no violation.
- memDepViolator = NULL;
+ incrLdIdx(load_idx);
}
return store_fault;
@@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores()
panic("LSQ sent out a bad address for a completed store!");
}
// Need to handle becoming blocked on a store.
- DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+ DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
"retry later\n",
inst->seqNum);
isStoreBlocked = true;
@@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
}
+ if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+ memDepViolator = NULL;
+ }
+
int store_idx = storeTail;
decrStIdx(store_idx);
@@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;
+ // Must delete request now that it wasn't handed off to
+ // memory. This is quite ugly. @todo: Figure out the proper
+ // place to really handle request deletes.
+ delete storeQueue[store_idx].req;
+
storeQueue[store_idx].req = NULL;
--stores;
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index f19980fd5..64558efaa 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
inst_entry->regsReady = true;
}
+ // Clear the bit saying this instruction can issue.
+ inst->clearCanIssue();
+
// Add this instruction to the list of dependents.
store_entry->dependInsts.push_back(inst_entry);
@@ -357,7 +360,6 @@ void
MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
{
DynInstPtr temp_inst;
- bool found_inst = false;
// For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
@@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
moveToReady(inst_entry);
- if (temp_inst == inst) {
- found_inst = true;
- }
-
instsToReplay.pop_front();
}
-
- assert(found_inst);
}
template <class MemDepPred, class Impl>
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index e303f1cee..eb04ca733 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -168,15 +168,14 @@ template <class Impl>
void
DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr)
{
- DPRINTF(Rename, "Setting CPU pointer.\n");
cpu = cpu_ptr;
+ DPRINTF(Rename, "Setting CPU pointer.\n");
}
template <class Impl>
void
DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
- DPRINTF(Rename, "Setting time buffer pointer.\n");
timeBuffer = tb_ptr;
// Setup wire to read information from time buffer, from IEW stage.
@@ -193,7 +192,6 @@ template <class Impl>
void
DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
- DPRINTF(Rename, "Setting rename queue pointer.\n");
renameQueue = rq_ptr;
// Setup wire to write information to future stages.
@@ -204,7 +202,6 @@ template <class Impl>
void
DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
- DPRINTF(Rename, "Setting decode queue pointer.\n");
decodeQueue = dq_ptr;
// Setup wire to get information from decode.
@@ -228,7 +225,6 @@ template<class Impl>
void
DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
- DPRINTF(Rename, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
}
@@ -237,8 +233,6 @@ template <class Impl>
void
DefaultRename<Impl>::setRenameMap(RenameMap rm_ptr[])
{
- DPRINTF(Rename, "Setting rename map pointers.\n");
-
for (int i=0; i<numThreads; i++) {
renameMap[i] = &rm_ptr[i];
}
@@ -248,7 +242,6 @@ template <class Impl>
void
DefaultRename<Impl>::setFreeList(FreeList *fl_ptr)
{
- DPRINTF(Rename, "Setting free list pointer.\n");
freeList = fl_ptr;
}
@@ -256,7 +249,6 @@ template<class Impl>
void
DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
{
- DPRINTF(Rename, "Setting scoreboard pointer.\n");
scoreboard = _scoreboard;
}
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 620daf691..b436ec1c3 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg)
// known that the prev reg was outside the range of normal registers
// so the free list can avoid adding it.
prev_reg = renamed_reg;
-
- assert(renamed_reg < numPhysicalRegs + numMiscRegs);
}
DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index fde636754..975aba379 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -66,7 +66,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
} else if (policy == "partitioned") {
robPolicy = Partitioned;
- DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
+// DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
//@todo:make work if part_amt doesnt divide evenly.
int part_amt = numEntries / numThreads;
@@ -78,7 +78,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
} else if (policy == "threshold") {
robPolicy = Threshold;
- DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
+// DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
int threshold = _smtROBThreshold;;
diff --git a/src/cpu/o3/sparc/cpu_builder.cc b/src/cpu/o3/sparc/cpu_builder.cc
index 3cac89bad..35badce2c 100644
--- a/src/cpu/o3/sparc/cpu_builder.cc
+++ b/src/cpu/o3/sparc/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
Param<int> clock;
Param<int> phase;
Param<int> numThreads;
+ Param<int> cpu_id;
Param<int> activity;
#if FULL_SYSTEM
SimObjectParam<System *> system;
- Param<int> cpu_id;
SimObjectParam<SparcISA::ITB *> itb;
SimObjectParam<SparcISA::DTB *> dtb;
Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(clock, "clock speed"),
INIT_PARAM_DFLT(phase, "clock phase", 0),
INIT_PARAM(numThreads, "number of HW thread contexts"),
+ INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM_DFLT(activity, "Initial activity count", 0),
#if FULL_SYSTEM
INIT_PARAM(system, "System object"),
- INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
SparcSimpleParams *params = new SparcSimpleParams;
params->clock = clock;
+ params->phase = phase;
params->name = getInstanceName();
params->numberOfThreads = actual_num_threads;
+ params->cpu_id = cpu_id;
params->activity = activity;
#if FULL_SYSTEM
params->system = system;
- params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
params->profile = profile;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index d2acc6232..a145e046e 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -103,7 +103,7 @@ void
O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp)
{
if (vp != thread->getVirtPort()) {
- delete vp->getPeer();
+ vp->removeConn();
delete vp;
}
}