summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-08-24 17:43:08 -0400
committerKevin Lim <ktlim@umich.edu>2006-08-24 17:43:08 -0400
commitad2fa1e1c9587e8c2a2b7f3e5a9c592312042eb4 (patch)
treee52b0443cc937e127c5d31b49f0c2dcf280c4d50
parent74e8abd37ecd637a607f90e36aed1a3a16eea7da (diff)
downloadgem5-ad2fa1e1c9587e8c2a2b7f3e5a9c592312042eb4.tar.xz
Support profiling.
--HG-- extra : convert_revision : eab02dea68442bd3f8c5d1d16b7f93f43cbda2a5
-rw-r--r--cpu/o3/alpha_cpu_impl.hh24
-rw-r--r--cpu/o3/commit_impl.hh14
-rw-r--r--cpu/o3/thread_state.hh29
-rw-r--r--cpu/ozone/thread_state.hh26
-rw-r--r--cpu/thread_state.hh15
-rw-r--r--python/m5/objects/AlphaFullCPU.py2
-rw-r--r--python/m5/objects/OzoneCPU.py4
7 files changed, 100 insertions, 14 deletions
diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh
index 1bf0652cd..071a870ef 100644
--- a/cpu/o3/alpha_cpu_impl.hh
+++ b/cpu/o3/alpha_cpu_impl.hh
@@ -153,15 +153,6 @@ AlphaFullCPU<Impl>::regStats()
this->commit.regStats();
}
-#if FULL_SYSTEM
-template <class Impl>
-void
-AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
-{
- // Currently not supported
-}
-#endif
-
template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
@@ -336,13 +327,24 @@ AlphaFullCPU<Impl>::AlphaXC::readLastSuspend()
template <class Impl>
void
+AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
+{
+ thread->dumpFuncProfile();
+}
+
+template <class Impl>
+void
AlphaFullCPU<Impl>::AlphaXC::profileClear()
-{}
+{
+ thread->profileClear();
+}
template <class Impl>
void
AlphaFullCPU<Impl>::AlphaXC::profileSample()
-{}
+{
+ thread->profileSample();
+}
#endif
template <class Impl>
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh
index 364e685c2..cd10ec6b2 100644
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -1035,6 +1035,20 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
updateComInstStats(head_inst);
+#if FULL_SYSTEM
+ if (thread[tid]->profile) {
+// bool usermode =
+// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
+// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
+ thread[tid]->profilePC = head_inst->readPC();
+ ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
+ head_inst->staticInst);
+
+ if (node)
+ thread[tid]->profileNode = node;
+ }
+#endif
+
if (head_inst->traceData) {
head_inst->traceData->setFetchSeq(head_inst->seqNum);
head_inst->traceData->setCPSeq(thread[tid]->numInst);
diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh
index 3f1208ea0..28f488143 100644
--- a/cpu/o3/thread_state.hh
+++ b/cpu/o3/thread_state.hh
@@ -31,8 +31,11 @@
#include "arch/faults.hh"
#include "arch/isa_traits.hh"
+#include "base/callback.hh"
+#include "base/output.hh"
#include "cpu/exec_context.hh"
#include "cpu/thread_state.hh"
+#include "sim/sim_exit.hh"
class Event;
class Process;
@@ -83,8 +86,22 @@ struct O3ThreadState : public ThreadState {
#if FULL_SYSTEM
O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
: ThreadState(-1, _thread_num, _mem),
- inSyscall(0), trapPending(0)
- { }
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ {
+ if (cpu->params->profile) {
+ profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+ Callback *cb =
+ new MakeCallback<O3ThreadState,
+ &O3ThreadState::dumpFuncProfile>(this);
+ registerExitCallback(cb);
+ }
+
+ // let's fill with a dummy node for now so we don't get a segfault
+ // on the first cycle when there's no node available.
+ static ProfileNode dummyNode;
+ profileNode = &dummyNode;
+ profilePC = 3;
+ }
#else
O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
: ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
@@ -138,6 +155,14 @@ struct O3ThreadState : public ThreadState {
/** Handles the syscall. */
void syscall() { process->syscall(xcProxy); }
#endif
+
+#if FULL_SYSTEM
+ void dumpFuncProfile()
+ {
+ std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ profile->dump(xcProxy, *os);
+ }
+#endif
};
#endif // __CPU_O3_THREAD_STATE_HH__
diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh
index f104dff23..93a56da1b 100644
--- a/cpu/ozone/thread_state.hh
+++ b/cpu/ozone/thread_state.hh
@@ -31,9 +31,12 @@
#include "arch/faults.hh"
#include "arch/isa_traits.hh"
+#include "base/callback.hh"
+#include "base/output.hh"
#include "cpu/exec_context.hh"
#include "cpu/thread_state.hh"
#include "sim/process.hh"
+#include "sim/sim_exit.hh"
class Event;
//class Process;
@@ -62,9 +65,22 @@ struct OzoneThreadState : public ThreadState {
#if FULL_SYSTEM
OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
: ThreadState(-1, _thread_num, _mem),
- inSyscall(0), trapPending(0)
+ cpu(_cpu), inSyscall(0), trapPending(0)
{
memset(&regs, 0, sizeof(TheISA::RegFile));
+ if (cpu->params->profile) {
+ profile = new FunctionProfile(cpu->params->system->kernelSymtab);
+ Callback *cb =
+ new MakeCallback<OzoneThreadState,
+ &OzoneThreadState::dumpFuncProfile>(this);
+ registerExitCallback(cb);
+ }
+
+ // let's fill with a dummy node for now so we don't get a segfault
+ // on the first cycle when there's no node available.
+ static ProfileNode dummyNode;
+ profileNode = &dummyNode;
+ profilePC = 3;
}
#else
OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
@@ -187,6 +203,14 @@ struct OzoneThreadState : public ThreadState {
Counter readFuncExeInst() { return funcExeInst; }
void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+
+#if FULL_SYSTEM
+ void dumpFuncProfile()
+ {
+ std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ profile->dump(xcProxy, *os);
+ }
+#endif
};
#endif // __CPU_OZONE_THREAD_STATE_HH__
diff --git a/cpu/thread_state.hh b/cpu/thread_state.hh
index 12146bd11..7a19963c8 100644
--- a/cpu/thread_state.hh
+++ b/cpu/thread_state.hh
@@ -30,6 +30,7 @@
#define __CPU_THREAD_STATE_HH__
#include "cpu/exec_context.hh"
+#include "cpu/profile.hh"
#if FULL_SYSTEM
class EndQuiesceEvent;
@@ -103,6 +104,20 @@ struct ThreadState {
#endif
+#if FULL_SYSTEM
+ void profileClear()
+ {
+ if (profile)
+ profile->clear();
+ }
+
+ void profileSample()
+ {
+ if (profile)
+ profile->sample(profileNode, profilePC);
+ }
+#endif
+
/**
* Temporary storage to pass the source address from copy_load to
* copy_store.
diff --git a/python/m5/objects/AlphaFullCPU.py b/python/m5/objects/AlphaFullCPU.py
index 015e9d872..5b6fa1063 100644
--- a/python/m5/objects/AlphaFullCPU.py
+++ b/python/m5/objects/AlphaFullCPU.py
@@ -10,6 +10,8 @@ class DerivAlphaFullCPU(BaseCPU):
mem = Param.FunctionalMemory(NULL, "memory")
checker = Param.BaseCPU(NULL, "checker")
+ if build_env['FULL_SYSTEM']:
+ profile = Param.Latency('0ns', "trace the kernel stack")
cachePorts = Param.Unsigned("Cache Ports")
diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py
index ea8b6b537..dadca7990 100644
--- a/python/m5/objects/OzoneCPU.py
+++ b/python/m5/objects/OzoneCPU.py
@@ -10,9 +10,12 @@ class DerivOzoneCPU(BaseCPU):
mem = Param.FunctionalMemory(NULL, "memory")
checker = Param.BaseCPU("Checker CPU")
+ if build_env['FULL_SYSTEM']:
+ profile = Param.Latency('0ns', "trace the kernel stack")
width = Param.Unsigned("Width")
frontEndWidth = Param.Unsigned("Front end width")
+ frontEndLatency = Param.Unsigned("Front end latency")
backEndWidth = Param.Unsigned("Back end width")
backEndSquashLatency = Param.Unsigned("Back end squash latency")
backEndLatency = Param.Unsigned("Back end latency")
@@ -75,6 +78,7 @@ class DerivOzoneCPU(BaseCPU):
LQEntries = Param.Unsigned("Number of load queue entries")
SQEntries = Param.Unsigned("Number of store queue entries")
+ lsqLimits = Param.Bool(True, "LSQ size limits dispatch")
LFSTSize = Param.Unsigned("Last fetched store table size")
SSITSize = Param.Unsigned("Store set ID table size")