summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDam Sunwoo <dam.sunwoo@arm.com>2012-11-02 11:32:01 -0500
committerDam Sunwoo <dam.sunwoo@arm.com>2012-11-02 11:32:01 -0500
commit81406018b0688e956452cd3e00c1ab9aeb9af764 (patch)
treea25309e3a443f1c41a33585c3e7d1a55c2213c49
parent322daba74c122c4ba8c89b73ca8107be02990e5c (diff)
downloadgem5-81406018b0688e956452cd3e00c1ab9aeb9af764.tar.xz
ARM: dump stats and process info on context switches
This patch enables dumping statistics and Linux process information on context switch boundaries (__switch_to() calls) that are used for Streamline integration (a graphical statistics viewer from ARM).
-rw-r--r--src/arch/arm/ArmSystem.py1
-rw-r--r--src/arch/arm/linux/system.cc104
-rw-r--r--src/arch/arm/linux/system.hh42
-rw-r--r--src/cpu/base.cc11
-rw-r--r--src/cpu/base.hh19
-rw-r--r--src/cpu/pc_event.cc1
-rw-r--r--src/mem/request.hh34
-rw-r--r--src/sim/serialize.hh2
-rwxr-xr-xutil/cpt_upgrader.py13
9 files changed, 222 insertions, 5 deletions
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index db0febe18..3ca9b8573 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -71,3 +71,4 @@ class LinuxArmSystem(ArmSystem):
"File that contains the Device Tree Blob. Don't use DTB if empty.")
early_kernel_symbols = Param.Bool(False,
"enable early kernel symbol tables before MMU")
+ enable_context_switch_stats_dump = Param.Bool(False, "enable stats/task info dumping at context switch boundaries")
diff --git a/src/arch/arm/linux/system.cc b/src/arch/arm/linux/system.cc
index 1347e472d..b06439406 100644
--- a/src/arch/arm/linux/system.cc
+++ b/src/arch/arm/linux/system.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2012 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -44,19 +44,24 @@
#include "arch/arm/linux/system.hh"
#include "arch/arm/isa_traits.hh"
#include "arch/arm/utility.hh"
+#include "arch/generic/linux/threadinfo.hh"
#include "base/loader/object_file.hh"
#include "base/loader/symtab.hh"
+#include "cpu/base.hh"
+#include "cpu/pc_event.hh"
#include "cpu/thread_context.hh"
#include "debug/Loader.hh"
#include "kern/linux/events.hh"
#include "mem/fs_translating_port_proxy.hh"
#include "mem/physical.hh"
+#include "sim/stat_control.hh"
using namespace ArmISA;
using namespace Linux;
LinuxArmSystem::LinuxArmSystem(Params *p)
- : ArmSystem(p)
+ : ArmSystem(p),
+ enableContextSwitchStatsDump(p->enable_context_switch_stats_dump)
{
#ifndef NDEBUG
kernelPanicEvent = addKernelFuncEvent<BreakPCEvent>("panic");
@@ -206,6 +211,9 @@ LinuxArmSystem::~LinuxArmSystem()
delete uDelaySkipEvent;
if (constUDelaySkipEvent)
delete constUDelaySkipEvent;
+
+ if (dumpStatsPCEvent)
+ delete dumpStatsPCEvent;
}
LinuxArmSystem *
@@ -213,3 +221,95 @@ LinuxArmSystemParams::create()
{
return new LinuxArmSystem(this);
}
+
+void
+LinuxArmSystem::startup()
+{
+ if (enableContextSwitchStatsDump) {
+ dumpStatsPCEvent = addKernelFuncEvent<DumpStatsPCEvent>("__switch_to");
+ if (!dumpStatsPCEvent)
+ panic("dumpStatsPCEvent not created!");
+
+ std::string task_filename = "tasks.txt";
+ taskFile = simout.create(name() + "." + task_filename);
+
+ for (int i = 0; i < _numContexts; i++) {
+ ThreadContext *tc = threadContexts[i];
+ uint32_t pid = tc->getCpuPtr()->getPid();
+ if (pid != Request::invldPid) {
+ mapPid(tc, pid);
+ tc->getCpuPtr()->taskId(taskMap[pid]);
+ }
+ }
+ }
+}
+
+void
+LinuxArmSystem::mapPid(ThreadContext *tc, uint32_t pid)
+{
+ // Create a new unique identifier for this pid
+ std::map<uint32_t, uint32_t>::iterator itr = taskMap.find(pid);
+ if (itr == taskMap.end()) {
+ uint32_t map_size = taskMap.size();
+ if (map_size > ContextSwitchTaskId::MaxNormalTaskId + 1) {
+ warn_once("Error out of identifiers for cache occupancy stats");
+ taskMap[pid] = ContextSwitchTaskId::Unknown;
+ } else {
+ taskMap[pid] = map_size;
+ }
+ }
+}
+
+/** This function is called whenever the the kernel function
+ * "__switch_to" is called to change running tasks.
+ *
+ * r0 = task_struct of the previously running process
+ * r1 = task_info of the previously running process
+ * r2 = task_info of the next process to run
+ */
+void
+DumpStatsPCEvent::process(ThreadContext *tc)
+{
+ Linux::ThreadInfo ti(tc);
+ Addr task_descriptor = tc->readIntReg(2);
+ uint32_t pid = ti.curTaskPID(task_descriptor);
+ uint32_t tgid = ti.curTaskTGID(task_descriptor);
+ std::string next_task_str = ti.curTaskName(task_descriptor);
+
+ // Streamline treats pid == -1 as the kernel process.
+ // Also pid == 0 implies idle process (except during Linux boot)
+ int32_t mm = ti.curTaskMm(task_descriptor);
+ bool is_kernel = (mm == 0);
+ if (is_kernel && (pid != 0)) {
+ pid = -1;
+ tgid = -1;
+ next_task_str = "kernel";
+ }
+
+ LinuxArmSystem* sys = dynamic_cast<LinuxArmSystem *>(tc->getSystemPtr());
+ if (!sys) {
+ panic("System is not LinuxArmSystem while getting Linux process info!");
+ }
+ std::map<uint32_t, uint32_t>& taskMap = sys->taskMap;
+
+ // Create a new unique identifier for this pid
+ sys->mapPid(tc, pid);
+
+ // Set cpu task id, output process info, and dump stats
+ tc->getCpuPtr()->taskId(taskMap[pid]);
+ tc->getCpuPtr()->setPid(pid);
+
+ std::ostream* taskFile = sys->taskFile;
+
+ // Task file is read by cache occupancy plotting script or
+ // Streamline conversion script.
+ ccprintf(*taskFile,
+ "tick=%lld %d cpu_id=%d next_pid=%d next_tgid=%d next_task=%s\n",
+ curTick(), taskMap[pid], tc->cpuId(), (int) pid, (int) tgid,
+ next_task_str);
+ taskFile->flush();
+
+ // Dump and reset statistics
+ Stats::schedStatEvent(true, true, curTick(), 0);
+}
+
diff --git a/src/arch/arm/linux/system.hh b/src/arch/arm/linux/system.hh
index caf018cb9..feed8cfaa 100644
--- a/src/arch/arm/linux/system.hh
+++ b/src/arch/arm/linux/system.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2012 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -43,15 +43,24 @@
#ifndef __ARCH_ARM_LINUX_SYSTEM_HH__
#define __ARCH_ARM_LINUX_SYSTEM_HH__
+#include <cstdio>
+#include <map>
#include <string>
#include <vector>
#include "arch/arm/system.hh"
+#include "base/output.hh"
#include "kern/linux/events.hh"
#include "params/LinuxArmSystem.hh"
+#include "sim/core.hh"
+
+class DumpStatsPCEvent;
class LinuxArmSystem : public ArmSystem
{
+ protected:
+ DumpStatsPCEvent *dumpStatsPCEvent;
+
public:
/** Boilerplate params code */
typedef LinuxArmSystemParams Params;
@@ -61,6 +70,20 @@ class LinuxArmSystem : public ArmSystem
return dynamic_cast<const Params *>(_params);
}
+ /** When enabled, dump stats/task info on context switches for
+ * Streamline and per-thread cache occupancy studies, etc. */
+ bool enableContextSwitchStatsDump;
+
+ /** This map stores a mapping of OS process IDs to internal Task IDs. The
+ * mapping is done because the stats system doesn't tend to like vectors
+ * that are much greater than 1000 items and the entire process space is
+ * 65K. */
+ std::map<uint32_t, uint32_t> taskMap;
+
+ /** This is a file that is placed in the run directory that prints out
+ * mappings between taskIds and OS process IDs */
+ std::ostream* taskFile;
+
LinuxArmSystem(Params *p);
~LinuxArmSystem();
@@ -68,6 +91,12 @@ class LinuxArmSystem : public ArmSystem
bool adderBootUncacheable(Addr a);
+ void startup();
+
+ /** This function creates a new task Id for the given pid.
+ * @param tc thread context that is currentyl executing */
+ void mapPid(ThreadContext* tc, uint32_t pid);
+
private:
#ifndef NDEBUG
/** Event to halt the simulator if the kernel calls panic() */
@@ -97,5 +126,16 @@ class LinuxArmSystem : public ArmSystem
Addr penReleaseAddr;
};
+class DumpStatsPCEvent : public PCEvent
+{
+ public:
+ DumpStatsPCEvent(PCEventQueue *q, const std::string &desc, Addr addr)
+ : PCEvent(q, desc, addr)
+ {}
+
+ virtual void process(ThreadContext* tc);
+};
+
+
#endif // __ARCH_ARM_LINUX_SYSTEM_HH__
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 93c9f8629..aaf9c9cbc 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -118,6 +118,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
: MemObject(p), instCnt(0), _cpuId(p->cpu_id),
_instMasterId(p->system->getMasterId(name() + ".inst")),
_dataMasterId(p->system->getMasterId(name() + ".data")),
+ _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
interrupts(p->interrupts), profileEvent(NULL),
numThreads(p->numThreads), system(p->system)
{
@@ -359,6 +360,8 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
{
assert(threadContexts.size() == oldCPU->threadContexts.size());
assert(_cpuId == oldCPU->cpuId());
+ _pid = oldCPU->getPid();
+ _taskId = oldCPU->taskId();
ThreadID size = threadContexts.size();
for (ThreadID i = 0; i < size; ++i) {
@@ -489,6 +492,13 @@ void
BaseCPU::serialize(std::ostream &os)
{
SERIALIZE_SCALAR(instCnt);
+
+ /* Unlike _pid, _taskId is not serialized, as they are dynamically
+ * assigned unique ids that are only meaningful for the duration of
+ * a specific run. We will need to serialize the entire taskMap in
+ * system. */
+ SERIALIZE_SCALAR(_pid);
+
interrupts->serialize(os);
}
@@ -496,6 +506,7 @@ void
BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
{
UNSERIALIZE_SCALAR(instCnt);
+ UNSERIALIZE_SCALAR(_pid);
interrupts->unserialize(cp, section);
}
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 91cef24ed..6552be0d6 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -103,6 +103,17 @@ class BaseCPU : public MemObject
/** data side request id that must be placed in all requests */
MasterID _dataMasterId;
+ /** An intrenal representation of a task identifier within gem5. This is
+ * used so the CPU can add which taskId (which is an internal representation
+ * of the OS process ID) to each request so components in the memory system
+ * can track which process IDs are ultimately interacting with them
+ */
+ uint32_t _taskId;
+
+ /** The current OS process ID that is executing on this processor. This is
+ * used to generate a taskId */
+ uint32_t _pid;
+
/**
* Define a base class for the CPU ports (instruction and data)
* that is refined in the subclasses. This class handles the
@@ -174,6 +185,14 @@ class BaseCPU : public MemObject
BaseMasterPort &getMasterPort(const std::string &if_name,
PortID idx = InvalidPortID);
+ /** Get cpu task id */
+ uint32_t taskId() const { return _taskId; }
+ /** Set cpu task id */
+ void taskId(uint32_t id) { _taskId = id; }
+
+ uint32_t getPid() const { return _pid; }
+ void setPid(uint32_t pid) { _pid = pid; }
+
inline void workItemBegin() { numWorkItemsStarted++; }
inline void workItemEnd() { numWorkItemsCompleted++; }
// @todo remove me after debugging with legion done
diff --git a/src/cpu/pc_event.cc b/src/cpu/pc_event.cc
index 2b54ee5fb..c957fe4d5 100644
--- a/src/cpu/pc_event.cc
+++ b/src/cpu/pc_event.cc
@@ -30,7 +30,6 @@
*/
#include <algorithm>
-#include <map>
#include <string>
#include <utility>
diff --git a/src/mem/request.hh b/src/mem/request.hh
index f6406e2c5..11f1c74b3 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2012 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2002-2005 The Regents of The University of Michigan
* All rights reserved.
*
@@ -47,6 +59,24 @@
#include "base/types.hh"
#include "sim/core.hh"
+/**
+ * Special TaskIds that are used for per-context-switch stats dumps
+ * and Cache Occupancy. Having too many tasks seems to be a problem
+ * with vector stats. 1024 seems to be a reasonable number that
+ * doesn't cause a problem with stats and is large enough to realistic
+ * benchmarks (Linux/Android boot, BBench, etc.)
+ */
+
+namespace ContextSwitchTaskId {
+ enum TaskId {
+ MaxNormalTaskId = 1021, /* Maximum number of normal tasks */
+ Prefetcher = 1022, /* For cache lines brought in by prefetcher */
+ DMA = 1023, /* Mostly Table Walker */
+ Unknown = 1024,
+ NumTaskId
+ };
+}
+
class Request;
typedef Request* RequestPtr;
@@ -117,6 +147,10 @@ class Request
static const MasterID invldMasterId = USHRT_MAX;
/** @} */
+ /** Invalid or unknown Pid. Possible when operating system is not present
+ * or has not assigned a pid yet */
+ static const uint32_t invldPid = UINT_MAX;
+
private:
typedef uint8_t PrivateFlagsType;
typedef ::Flags<PrivateFlagsType> PrivateFlags;
diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh
index c0c0b63ff..531b2e1cd 100644
--- a/src/sim/serialize.hh
+++ b/src/sim/serialize.hh
@@ -57,7 +57,7 @@ class SimObject;
* SimObject shouldn't cause the version number to increase, only changes to
* existing objects such as serializing/unserializing more state, changing sizes
* of serialized arrays, etc. */
-static const uint64_t gem5CheckpointVersion = 0x0000000000000002;
+static const uint64_t gem5CheckpointVersion = 0x0000000000000003;
template <class T>
void paramOut(std::ostream &os, const std::string &name, const T &param);
diff --git a/util/cpt_upgrader.py b/util/cpt_upgrader.py
index 09e6ef194..ead3d9cbb 100755
--- a/util/cpt_upgrader.py
+++ b/util/cpt_upgrader.py
@@ -105,9 +105,22 @@ def from_1(cpt):
# the system, thus starting at 0
raise ValueError("more than one memory detected (" + sec + ")")
+def from_2(cpt):
+ for sec in cpt.sections():
+ import re
+ # Search for a CPUs
+ if re.search('.*sys.*cpu', sec):
+ try:
+ junk = cpt.get(sec, 'instCnt')
+ cpt.set(sec, '_pid', '0')
+ except ConfigParser.NoOptionError:
+ pass
+
+
migrations = []
migrations.append(from_0)
migrations.append(from_1)
+migrations.append(from_2)
verbose_print = False