diff options
author | Korey Sewell <ksewell@umich.edu> | 2006-06-14 14:43:45 -0400 |
---|---|---|
committer | Korey Sewell <ksewell@umich.edu> | 2006-06-14 14:43:45 -0400 |
commit | 2a9becba44f1d70b05100c04b95d475c43099fa9 (patch) | |
tree | 46ef0e871f3329698feeee25d0d6935f21661b78 /src/cpu | |
parent | e715e298e044af3727423b27d08327d72b2d74fa (diff) | |
parent | 7709e6ba93be4b67b22e2f3f9c853a3e1ab4458b (diff) | |
download | gem5-2a9becba44f1d70b05100c04b95d475c43099fa9.tar.xz |
Merge zizzer:/bk/newmem
into zazzer.eecs.umich.edu:/.automount/zooks/y/ksewell/research/m5-sim/newmem-release
--HG--
extra : convert_revision : 9b5b1419e8e22bce16ed97fc02c2008ca0181afc
Diffstat (limited to 'src/cpu')
-rw-r--r-- | src/cpu/base_dyn_inst.cc | 68 | ||||
-rw-r--r-- | src/cpu/base_dyn_inst.hh | 38 | ||||
-rw-r--r-- | src/cpu/checker/cpu.cc | 2 | ||||
-rw-r--r-- | src/cpu/o3/alpha_cpu_builder.cc | 15 | ||||
-rw-r--r-- | src/cpu/o3/alpha_params.hh | 5 | ||||
-rw-r--r-- | src/cpu/o3/commit.hh | 5 | ||||
-rw-r--r-- | src/cpu/o3/commit_impl.hh | 48 | ||||
-rw-r--r-- | src/cpu/o3/decode_impl.hh | 2 | ||||
-rw-r--r-- | src/cpu/o3/fetch.hh | 1 | ||||
-rw-r--r-- | src/cpu/o3/fetch_impl.hh | 16 | ||||
-rw-r--r-- | src/cpu/o3/iew.hh | 46 | ||||
-rw-r--r-- | src/cpu/o3/iew_impl.hh | 144 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue.hh | 12 | ||||
-rw-r--r-- | src/cpu/o3/inst_queue_impl.hh | 17 | ||||
-rw-r--r-- | src/cpu/o3/lsq.hh | 3 | ||||
-rw-r--r-- | src/cpu/o3/lsq_impl.hh | 12 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit.hh | 50 | ||||
-rw-r--r-- | src/cpu/o3/lsq_unit_impl.hh | 47 | ||||
-rw-r--r-- | src/cpu/o3/rename_impl.hh | 2 |
19 files changed, 218 insertions, 315 deletions
diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc index 30fa10a6b..e3829297d 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,6 +103,8 @@ BaseDynInst<Impl>::initVars() readyRegs = 0; + instResult.integer = 0; + // May want to turn this into a bit vector or something. completed = false; resultReady = false; @@ -242,31 +244,7 @@ template <class Impl> void BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags) { - // Need to create a MemReq here so we can do a translation. This - // will casue a TLB miss trap if necessary... not sure whether - // that's the best thing to do or not. We don't really need the - // MemReq otherwise, since wh64 has no functional effect. -/* - MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags); - req->asid = asid; - - fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault && !(req->flags & UNCACHEABLE)) { - // Record key MemReq parameters so we can generate another one - // just like it for the timing access without calling translate() - // again (which might mess up the TLB). - effAddr = req->vaddr; - physEffAddr = req->paddr; - memReqFlags = req->flags; - } else { - // ignore faults & accesses to uncacheable space... treat as no-op - effAddr = physEffAddr = MemReq::inval_addr; - } - - storeSize = size; - storeData = 0; -*/ + // Not currently supported. } /** @@ -276,22 +254,7 @@ template <class Impl> Fault BaseDynInst<Impl>::copySrcTranslate(Addr src) { -/* - MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataReadReq(req); - - if (fault == NoFault) { - thread->copySrcAddr = src; - thread->copySrcPhysAddr = req->paddr; - } else { - thread->copySrcAddr = 0; - thread->copySrcPhysAddr = 0; - } - return fault; -*/ + // Not currently supported. return NoFault; } @@ -302,26 +265,7 @@ template <class Impl> Fault BaseDynInst<Impl>::copy(Addr dest) { -/* - uint8_t data[64]; - FunctionalMemory *mem = thread->mem; - assert(thread->copySrcPhysAddr); - MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault) { - Addr dest_addr = req->paddr; - // Need to read straight from memory since we have more than 8 bytes. - req->paddr = thread->copySrcPhysAddr; - mem->read(req, data); - req->paddr = dest_addr; - mem->write(req, data); - } - return fault; -*/ + // Not currently supported. return NoFault; } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 948ee058a..fc9bf8b94 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,12 +44,6 @@ #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "sim/system.hh" -/* -#include "encumbered/cpu/full/bpred_update.hh" -#include "encumbered/cpu/full/spec_memory.hh" -#include "encumbered/cpu/full/spec_state.hh" -#include "encumbered/mem/functional/main.hh" -*/ /** * @file @@ -202,10 +196,9 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault fault; /** The memory request. */ -// MemReqPtr req; Request *req; -// Packet pkt; + /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ @@ -288,21 +281,6 @@ class BaseDynInst : public FastAlloc, public RefCounted void initVars(); public: - /** - * @todo: Make this function work; currently it is a dummy function. - * @param fault Last fault. - * @param cmd Last command. - * @param addr Virtual address of access. - * @param p Memory accessed. - * @param nbytes Access size. - */ -// void -// trace_mem(Fault fault, -// MemCmd cmd, -// Addr addr, -// void *p, -// int nbytes); - /** Dumps out contents of this BaseDynInst. */ void dump(); @@ -439,11 +417,13 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the result of a floating point (double) instruction. */ double readDoubleResult() { return instResult.dbl; } + /** Records an integer register being set to a value. */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) { if (width == 32) @@ -454,16 +434,19 @@ class BaseDynInst : public FastAlloc, public RefCounted panic("Unsupported width!"); } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val) { instResult.fp = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val, int width) { instResult.integer = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; @@ -590,14 +573,15 @@ class BaseDynInst : public FastAlloc, public RefCounted void setNextPC(uint64_t val) { nextPC = val; -// instResult.integer = val; } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } - void setThread(unsigned tid) { threadNumber = tid; } + /** Sets the thread id. */ + void setTid(unsigned tid) { threadNumber = tid; } - void setState(ImplState *state) { thread = state; } + void setThreadState(ImplState *state) { thread = state; } /** Returns the thread context. */ diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 37c5a3aa2..6971ab37f 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -85,6 +85,8 @@ CheckerCPU::CheckerCPU(Params *p) #else process = p->process; #endif + + result.integer = 0; } CheckerCPU::~CheckerCPU() diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc index 828977ccb..a6fbe34d7 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -92,11 +92,6 @@ Param<unsigned> commitToIEWDelay; Param<unsigned> renameToIEWDelay; Param<unsigned> issueToExecuteDelay; Param<unsigned> issueWidth; -Param<unsigned> executeWidth; -Param<unsigned> executeIntWidth; -Param<unsigned> executeFloatWidth; -Param<unsigned> executeBranchWidth; -Param<unsigned> executeMemoryWidth; SimObjectParam<FUPool *> fuPool; Param<unsigned> iewToCommitDelay; @@ -213,11 +208,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -344,11 +334,6 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh index f3cf36887..2ece7fb7f 100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@ -105,11 +105,6 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned renameToIEWDelay; unsigned issueToExecuteDelay; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; FUPool *fuPool; // diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index b7404c488..0b31cb9c8 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -365,11 +365,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index ceb2918e0..021d3ef90 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -72,7 +72,6 @@ DefaultCommit<Impl>::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), switchPending(false), @@ -205,19 +204,6 @@ DefaultCommit<Impl>::regStats() .flags(total) ; - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") @@ -434,7 +420,7 @@ DefaultCommit<Impl>::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -539,8 +525,6 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template <class Impl> @@ -558,8 +542,6 @@ DefaultCommit<Impl>::squashFromTC(unsigned tid) cpu->activityThisCycle(); tcSquash[tid] = false; - - ++squashCounter; } template <class Impl> @@ -585,10 +567,12 @@ DefaultCommit<Impl>::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" "insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -694,29 +678,7 @@ DefaultCommit<Impl>::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -744,8 +706,6 @@ DefaultCommit<Impl>::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8a6ea6626..0748ddb3b 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -515,7 +515,7 @@ DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid); + DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. decodeStatus[tid] = Squashing; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 76b32de68..962d46437 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -421,6 +421,7 @@ class DefaultFetch Stats::Scalar<> icacheStallCycles; /** Stat for total number of fetched instructions. */ Stats::Scalar<> fetchedInsts; + /** Total number of fetched branches. */ Stats::Scalar<> fetchedBranches; /** Stat for total number of predicted branches. */ Stats::Scalar<> predictedBranches; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c0a2a5d09..477a1469c 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -817,7 +817,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid); + DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. fetchStatus[tid] = Squashing; @@ -915,7 +915,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); if (!fetch_success) { - ++fetchMiscStallCycles; + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } return; } } else { @@ -984,11 +988,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, next_PC, inst_seq, cpu); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " "[sn:%lli]\n", @@ -1065,11 +1069,11 @@ DefaultFetch<Impl>::fetch(bool &status_change) next_PC, inst_seq, cpu); instruction->setPredTarg(next_PC + instSize); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); instruction->traceData = NULL; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2e61af5fc..615022dc9 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -261,6 +261,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -390,11 +393,6 @@ class DefaultIEW /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; @@ -439,14 +437,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -456,28 +446,25 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -490,7 +477,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3929f2e19..b02ee8555 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -52,7 +52,6 @@ DefaultIEW<Impl>::DefaultIEW(Params *params) issueToExecuteDelay(params->issueToExecuteDelay), issueReadWidth(params->issueWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -94,6 +93,7 @@ DefaultIEW<Impl>::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -139,20 +139,6 @@ DefaultIEW<Impl>::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -171,114 +157,105 @@ DefaultIEW<Impl>::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".EXEC:insts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".EXEC:loads") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".EXEC:squashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; -/* - for (int i=0; i<Num_OpClasses; ++i) { - stringstream subname; - subname << opClassStrings[i] << "_delay"; - issue_delay_dist.subname(i, subname.str()); - } -*/ - // - // Other stats - // + .flags(total); + iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts; + + iewExecRate + .name(name() + ".EXEC:rate") + .desc("Inst execution rate") + .flags(total); + + iewExecRate = iewExecutedInsts / cpu->numCycles; iewInstsToCommit .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -456,16 +433,7 @@ DefaultIEW<Impl>::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template<class Impl> @@ -799,10 +767,12 @@ DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -853,6 +823,22 @@ DefaultIEW<Impl>::sortInsts() template <class Impl> void +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + +template <class Impl> +void DefaultIEW<Impl>::wakeCPU() { cpu->wakeCPU(); @@ -1090,7 +1076,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1501,9 +1487,9 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else - iewExecutedInsts++; + iewIewExecutedcutedInsts++; #else iewExecutedInsts++; #endif @@ -1512,13 +1498,13 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 60a713020..6fd3c6d0b 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -474,12 +474,17 @@ class InstructionQueue /** Stat for number of non-speculative instructions removed due to a squash. */ Stats::Scalar<> iqSquashedNonSpecRemoved; + // Also include number of instructions rescheduled and replayed. - /** Distribution of number of instructions in the queue. */ + /** Distribution of number of instructions in the queue. + * @todo: Need to create struct to track the entry time for each + * instruction. */ Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; - /** Distribution of the cycles it takes to issue an instruction. */ + /** Distribution of the cycles it takes to issue an instruction. + * @todo: Need to create struct to track the ready time for each + * instruction. */ Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a @@ -492,8 +497,7 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; + /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 06a052c6f..66d4a54c6 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -289,22 +289,7 @@ InstructionQueue<Impl>::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index bc4154c85..1dbd46b8e 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -62,6 +62,9 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers statistics of each LSQ unit. */ + void regStats(); + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list<unsigned> *at_ptr); /** Sets the CPU pointer. */ diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 27aa0dc3c..0b6c6f542 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -108,6 +108,16 @@ LSQ<Impl>::name() const template<class Impl> void +LSQ<Impl>::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template<class Impl> +void LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr) { activeThreads = at_ptr; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index ce0cdd36f..3de581519 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -77,6 +77,9 @@ class LSQUnit { /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ void setCPU(FullCPU *cpu_ptr); @@ -127,9 +130,6 @@ class LSQUnit { void completeDataAccess(PacketPtr pkt); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -443,25 +443,35 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; + + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; + + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; + + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; + + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; + + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template <class T> @@ -519,6 +529,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) if (req->getFlags() & UNCACHEABLE && (load_idx != loadHead || !load_inst->reachedCommit)) { iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -598,7 +609,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // @todo: Need to make this a parameter. wb->schedule(curTick); - // Should keep track of stat for forwarded data + ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || (store_has_upper_limit && upper_load_has_store_part) || @@ -626,6 +637,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -633,6 +645,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); + ++lsqBlockedLoads; return NoFault; } } @@ -660,6 +673,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // if we have a cache, do cache access too if (!dcachePort->sendTiming(data_pkt)) { + ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 4c01f29a2..a5c1eb12a 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -198,6 +198,47 @@ LSQUnit<Impl>::name() const template<class Impl> void +LSQUnit<Impl>::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + +template<class Impl> +void LSQUnit<Impl>::clearLQ() { loadQueue.clear(); @@ -618,7 +659,7 @@ LSQUnit<Impl>::writebackStores() if (!dcachePort->sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; - + ++lsqCacheBlocked; assert(retryPkt == NULL); retryPkt = data_pkt; } else { @@ -677,6 +718,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) loadTail = load_idx; decrLdIdx(load_idx); + ++lsqSquashedLoads; } if (isLoadBlocked) { @@ -723,6 +765,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } @@ -782,6 +825,7 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { assert(!inst->isStore()); + ++lsqIgnoredResponses; return; } @@ -858,6 +902,7 @@ LSQUnit<Impl>::recvRetry() isStoreBlocked = false; } else { // Still blocked! + ++lsqCacheBlocked; } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df33b98ee..f9e2a03ee 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -1206,7 +1206,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid) } DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." - " Adding to front of list.", tid); + " Adding to front of list.\n", tid); serializeInst[tid] = NULL; |