summaryrefslogtreecommitdiff
path: root/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'cpu')
-rw-r--r--cpu/base_cpu.cc94
-rw-r--r--cpu/base_cpu.hh33
-rw-r--r--cpu/exec_context.cc90
-rw-r--r--cpu/exec_context.hh31
-rw-r--r--cpu/exetrace.cc16
-rw-r--r--cpu/memtest/memtest.cc52
-rw-r--r--cpu/memtest/memtest.hh15
-rw-r--r--cpu/pc_event.cc4
-rw-r--r--cpu/pc_event.hh4
-rw-r--r--cpu/simple_cpu/simple_cpu.cc103
-rw-r--r--cpu/simple_cpu/simple_cpu.hh14
-rw-r--r--cpu/static_inst.hh1
-rw-r--r--cpu/trace/opt_cpu.cc240
-rw-r--r--cpu/trace/opt_cpu.hh222
-rw-r--r--cpu/trace/reader/itx_reader.cc10
-rw-r--r--cpu/trace/reader/itx_reader.hh1
-rw-r--r--cpu/trace/trace_cpu.cc102
-rw-r--r--cpu/trace/trace_cpu.hh24
18 files changed, 823 insertions, 233 deletions
diff --git a/cpu/base_cpu.cc b/cpu/base_cpu.cc
index 988c7a602..2275f12e3 100644
--- a/cpu/base_cpu.cc
+++ b/cpu/base_cpu.cc
@@ -30,10 +30,11 @@
#include <sstream>
#include <iostream>
-#include "cpu/base_cpu.hh"
#include "base/cprintf.hh"
-#include "cpu/exec_context.hh"
+#include "base/loader/symtab.hh"
#include "base/misc.hh"
+#include "cpu/base_cpu.hh"
+#include "cpu/exec_context.hh"
#include "sim/param.hh"
#include "sim/sim_events.hh"
@@ -50,21 +51,25 @@ int maxThreadsPerCPU = 1;
extern void debug_break();
#ifdef FULL_SYSTEM
-BaseCPU::BaseCPU(const string &_name, int _number_of_threads,
+BaseCPU::BaseCPU(const string &_name, int _number_of_threads, bool _def_reg,
Counter max_insts_any_thread,
Counter max_insts_all_threads,
Counter max_loads_any_thread,
Counter max_loads_all_threads,
- System *_system, Tick freq)
- : SimObject(_name), frequency(freq),
- number_of_threads(_number_of_threads), system(_system)
+ System *_system, Tick freq,
+ bool _function_trace, Tick _function_trace_start)
+ : SimObject(_name), frequency(freq), checkInterrupts(true),
+ deferRegistration(_def_reg), number_of_threads(_number_of_threads),
+ system(_system)
#else
-BaseCPU::BaseCPU(const string &_name, int _number_of_threads,
+BaseCPU::BaseCPU(const string &_name, int _number_of_threads, bool _def_reg,
Counter max_insts_any_thread,
Counter max_insts_all_threads,
Counter max_loads_any_thread,
- Counter max_loads_all_threads)
- : SimObject(_name), number_of_threads(_number_of_threads)
+ Counter max_loads_all_threads,
+ bool _function_trace, Tick _function_trace_start)
+ : SimObject(_name), deferRegistration(_def_reg),
+ number_of_threads(_number_of_threads)
#endif
{
DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this);
@@ -134,10 +139,47 @@ BaseCPU::BaseCPU(const string &_name, int _number_of_threads,
memset(interrupts, 0, sizeof(interrupts));
intstatus = 0;
#endif
+
+ functionTracingEnabled = false;
+ if (_function_trace) {
+ std::string filename = csprintf("ftrace.%s", name());
+ functionTraceStream = makeOutputStream(filename);
+ currentFunctionStart = currentFunctionEnd = 0;
+ functionEntryTick = _function_trace_start;
+
+ if (_function_trace_start == 0) {
+ functionTracingEnabled = true;
+ } else {
+ Event *e =
+ new EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace>(this,
+ true);
+ e->schedule(_function_trace_start);
+ }
+ }
+}
+
+
+void
+BaseCPU::enableFunctionTrace()
+{
+ functionTracingEnabled = true;
+}
+
+BaseCPU::~BaseCPU()
+{
+ if (functionTracingEnabled)
+ closeOutputStream(functionTraceStream);
}
void
+BaseCPU::init()
+{
+ if (!deferRegistration)
+ registerExecContexts();
+}
+
+void
BaseCPU::regStats()
{
using namespace Stats;
@@ -195,10 +237,10 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
newXC->takeOverFrom(oldXC);
assert(newXC->cpu_id == oldXC->cpu_id);
#ifdef FULL_SYSTEM
- system->replaceExecContext(newXC->cpu_id, newXC);
+ system->replaceExecContext(newXC, newXC->cpu_id);
#else
assert(newXC->process == oldXC->process);
- newXC->process->replaceExecContext(newXC->cpu_id, newXC);
+ newXC->process->replaceExecContext(newXC, newXC->cpu_id);
#endif
}
@@ -222,7 +264,7 @@ BaseCPU::post_interrupt(int int_num, int index)
if (index < 0 || index >= sizeof(uint64_t) * 8)
panic("int_num out of bounds\n");
- AlphaISA::check_interrupts = 1;
+ checkInterrupts = true;
interrupts[int_num] |= 1 << index;
intstatus |= (ULL(1) << int_num);
}
@@ -269,4 +311,32 @@ BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
#endif // FULL_SYSTEM
+void
+BaseCPU::traceFunctionsInternal(Addr pc)
+{
+ if (!debugSymbolTable)
+ return;
+
+ // if pc enters different function, print new function symbol and
+ // update saved range. Otherwise do nothing.
+ if (pc < currentFunctionStart || pc >= currentFunctionEnd) {
+ string sym_str;
+ bool found = debugSymbolTable->findNearestSymbol(pc, sym_str,
+ currentFunctionStart,
+ currentFunctionEnd);
+
+ if (!found) {
+ // no symbol found: use addr as label
+ sym_str = csprintf("0x%x", pc);
+ currentFunctionStart = pc;
+ currentFunctionEnd = pc + 1;
+ }
+
+ ccprintf(*functionTraceStream, " (%d)\n%d: %s",
+ curTick - functionEntryTick, curTick, sym_str);
+ functionEntryTick = curTick;
+ }
+}
+
+
DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU)
diff --git a/cpu/base_cpu.hh b/cpu/base_cpu.hh
index f75f00409..baa956aa8 100644
--- a/cpu/base_cpu.hh
+++ b/cpu/base_cpu.hh
@@ -55,6 +55,7 @@ class BaseCPU : public SimObject
virtual void post_interrupt(int int_num, int index);
virtual void clear_interrupt(int int_num, int index);
virtual void clear_interrupts();
+ bool checkInterrupts;
bool check_interrupt(int int_num) const {
if (int_num > NumInterruptLevels)
@@ -91,22 +92,26 @@ class BaseCPU : public SimObject
public:
#ifdef FULL_SYSTEM
- BaseCPU(const std::string &_name, int _number_of_threads,
+ BaseCPU(const std::string &_name, int _number_of_threads, bool _def_reg,
Counter max_insts_any_thread, Counter max_insts_all_threads,
Counter max_loads_any_thread, Counter max_loads_all_threads,
- System *_system, Tick freq);
+ System *_system, Tick freq,
+ bool _function_trace = false, Tick _function_trace_start = 0);
#else
- BaseCPU(const std::string &_name, int _number_of_threads,
+ BaseCPU(const std::string &_name, int _number_of_threads, bool _def_reg,
Counter max_insts_any_thread = 0,
Counter max_insts_all_threads = 0,
Counter max_loads_any_thread = 0,
- Counter max_loads_all_threads = 0);
+ Counter max_loads_all_threads = 0,
+ bool _function_trace = false, Tick _function_trace_start = 0);
#endif
- virtual ~BaseCPU() {}
+ virtual ~BaseCPU();
+ virtual void init();
virtual void regStats();
+ bool deferRegistration;
void registerExecContexts();
/// Prepare for another CPU to take over execution. Called by
@@ -140,7 +145,6 @@ class BaseCPU : public SimObject
#ifdef FULL_SYSTEM
System *system;
-
/**
* Serialize this object to the given output stream.
* @param os The stream to serialize to.
@@ -164,6 +168,23 @@ class BaseCPU : public SimObject
virtual Counter totalInstructions() const { return 0; }
+ // Function tracing
+ private:
+ bool functionTracingEnabled;
+ std::ostream *functionTraceStream;
+ Addr currentFunctionStart;
+ Addr currentFunctionEnd;
+ Tick functionEntryTick;
+ void enableFunctionTrace();
+ void traceFunctionsInternal(Addr pc);
+
+ protected:
+ void traceFunctions(Addr pc)
+ {
+ if (functionTracingEnabled)
+ traceFunctionsInternal(pc);
+ }
+
private:
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
diff --git a/cpu/exec_context.cc b/cpu/exec_context.cc
index 9c21b3a56..1cb33f13e 100644
--- a/cpu/exec_context.cc
+++ b/cpu/exec_context.cc
@@ -32,6 +32,9 @@
#include "cpu/exec_context.hh"
#ifdef FULL_SYSTEM
+#include "base/cprintf.hh"
+#include "kern/kernel_stats.hh"
+#include "sim/serialize.hh"
#include "sim/system.hh"
#else
#include "sim/process.hh"
@@ -44,12 +47,13 @@ using namespace std;
ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num, System *_sys,
AlphaITB *_itb, AlphaDTB *_dtb,
FunctionalMemory *_mem)
- : _status(ExecContext::Unallocated),
- kernelStats(this, _cpu), cpu(_cpu), thread_num(_thread_num),
+ : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num),
cpu_id(-1), mem(_mem), itb(_itb), dtb(_dtb), system(_sys),
- memCtrl(_sys->memCtrl), physmem(_sys->physmem),
- swCtx(NULL), func_exe_inst(0), storeCondFailures(0)
+ memctrl(_sys->memctrl), physmem(_sys->physmem),
+ kernelBinning(system->kernelBinning), bin(kernelBinning->bin),
+ fnbin(kernelBinning->fnbin), func_exe_inst(0), storeCondFailures(0)
{
+ kernelStats = new Kernel::Statistics(this);
memset(&regs, 0, sizeof(RegFile));
}
#else
@@ -72,6 +76,13 @@ ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num,
}
#endif
+ExecContext::~ExecContext()
+{
+#ifdef FULL_SYSTEM
+ delete kernelStats;
+#endif
+}
+
void
ExecContext::takeOverFrom(ExecContext *oldContext)
@@ -86,9 +97,6 @@ ExecContext::takeOverFrom(ExecContext *oldContext)
// copy over functional state
_status = oldContext->_status;
-#ifdef FULL_SYSTEM
- kernelStats = oldContext->kernelStats;
-#endif
regs = oldContext->regs;
cpu_id = oldContext->cpu_id;
func_exe_inst = oldContext->func_exe_inst;
@@ -98,6 +106,14 @@ ExecContext::takeOverFrom(ExecContext *oldContext)
oldContext->_status = ExecContext::Unallocated;
}
+#ifdef FULL_SYSTEM
+void
+ExecContext::execute(const StaticInstBase *inst)
+{
+ assert(kernelStats);
+ system->kernelBinning->execute(this, inst);
+}
+#endif
void
ExecContext::serialize(ostream &os)
@@ -109,31 +125,8 @@ ExecContext::serialize(ostream &os)
SERIALIZE_SCALAR(inst);
#ifdef FULL_SYSTEM
- bool ctx = false;
- if (swCtx) {
- ctx = true;
- SERIALIZE_SCALAR(ctx);
- SERIALIZE_SCALAR(swCtx->calls);
- std::stack<fnCall *> *stack = &(swCtx->callStack);
- fnCall *top;
- int size = stack->size();
- SERIALIZE_SCALAR(size);
-
- for (int j=0; j<size; ++j) {
- top = stack->top();
- paramOut(os, csprintf("stackpos[%d]",j), top->name);
- delete top;
- stack->pop();
- }
- } else {
- SERIALIZE_SCALAR(ctx);
- }
- if (system->bin) {
- Stats::MainBin *cur = Stats::MainBin::curBin();
- string bin_name = cur->name();
- SERIALIZE_SCALAR(bin_name);
- }
-#endif //FULL_SYSTEM
+ kernelStats->serialize(os);
+#endif
}
@@ -147,35 +140,8 @@ ExecContext::unserialize(Checkpoint *cp, const std::string &section)
UNSERIALIZE_SCALAR(inst);
#ifdef FULL_SYSTEM
- bool ctx;
- UNSERIALIZE_SCALAR(ctx);
- if (ctx) {
- swCtx = new SWContext;
- UNSERIALIZE_SCALAR(swCtx->calls);
- int size;
- UNSERIALIZE_SCALAR(size);
-
- vector<fnCall *> calls;
- fnCall *call;
- for (int i=0; i<size; ++i) {
- call = new fnCall;
- paramIn(cp, section, csprintf("stackpos[%d]",i), call->name);
- call->myBin = system->getBin(call->name);
- calls.push_back(call);
- }
-
- for (int i=size-1; i>=0; --i) {
- swCtx->callStack.push(calls[i]);
- }
-
- }
-
- if (system->bin) {
- string bin_name;
- UNSERIALIZE_SCALAR(bin_name);
- system->getBin(bin_name)->activate();
- }
-#endif //FULL_SYSTEM
+ kernelStats->unserialize(cp, section);
+#endif
}
@@ -232,7 +198,7 @@ void
ExecContext::regStats(const string &name)
{
#ifdef FULL_SYSTEM
- kernelStats.regStats(name + ".kern");
+ kernelStats->regStats(name + ".kern");
#endif
}
diff --git a/cpu/exec_context.hh b/cpu/exec_context.hh
index b47f5cd08..8437a5585 100644
--- a/cpu/exec_context.hh
+++ b/cpu/exec_context.hh
@@ -42,12 +42,12 @@ class BaseCPU;
#ifdef FULL_SYSTEM
+#include "sim/system.hh"
#include "targetarch/alpha_memory.hh"
-class MemoryController;
-#include "kern/kernel_stats.hh"
-#include "sim/system.hh"
-#include "sim/sw_context.hh"
+class MemoryController;
+class StaticInstBase;
+namespace Kernel { class Binning; class Statistics; }
#else // !FULL_SYSTEM
@@ -105,11 +105,6 @@ class ExecContext
/// Set the status to Halted.
void halt();
-#ifdef FULL_SYSTEM
- public:
- KernelStats kernelStats;
-#endif
-
public:
RegFile regs; // correct-path register context
@@ -127,7 +122,6 @@ class ExecContext
int cpu_id;
#ifdef FULL_SYSTEM
-
FunctionalMemory *mem;
AlphaITB *itb;
AlphaDTB *dtb;
@@ -136,10 +130,15 @@ class ExecContext
// the following two fields are redundant, since we can always
// look them up through the system pointer, but we'll leave them
// here for now for convenience
- MemoryController *memCtrl;
+ MemoryController *memctrl;
PhysicalMemory *physmem;
- SWContext *swCtx;
+ Kernel::Binning *kernelBinning;
+ Kernel::Statistics *kernelStats;
+ bool bin;
+ bool fnbin;
+ void execute(const StaticInstBase *inst);
+
#else
Process *process;
@@ -185,7 +184,7 @@ class ExecContext
ExecContext(BaseCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
int _asid);
#endif
- virtual ~ExecContext() {}
+ virtual ~ExecContext();
virtual void takeOverFrom(ExecContext *oldContext);
@@ -197,8 +196,8 @@ class ExecContext
#ifdef FULL_SYSTEM
bool validInstAddr(Addr addr) { return true; }
bool validDataAddr(Addr addr) { return true; }
- int getInstAsid() { return ITB_ASN_ASN(regs.ipr[TheISA::IPR_ITB_ASN]); }
- int getDataAsid() { return DTB_ASN_ASN(regs.ipr[TheISA::IPR_DTB_ASN]); }
+ int getInstAsid() { return regs.instAsid(); }
+ int getDataAsid() { return regs.dataAsid(); }
Fault translateInstReq(MemReqPtr &req)
{
@@ -411,7 +410,7 @@ class ExecContext
int readIntrFlag() { return regs.intrflag; }
void setIntrFlag(int val) { regs.intrflag = val; }
Fault hwrei();
- bool inPalMode() { return PC_PAL(regs.pc); }
+ bool inPalMode() { return AlphaISA::PcPAL(regs.pc); }
void ev5_trap(Fault fault);
bool simPalCheck(int palFunc);
#endif
diff --git a/cpu/exetrace.cc b/cpu/exetrace.cc
index e31c3590c..ff7e90c9e 100644
--- a/cpu/exetrace.cc
+++ b/cpu/exetrace.cc
@@ -48,8 +48,6 @@ using namespace std;
//
-SymbolTable *debugSymbolTable = NULL;
-
void
Trace::InstRecord::dump(ostream &outs)
{
@@ -66,11 +64,17 @@ Trace::InstRecord::dump(ostream &outs)
outs << "T" << thread << " : ";
- std::string str;
- if ((debugSymbolTable) && (debugSymbolTable->findNearestSymbol(PC, str)))
- outs << "@" << setw(17) << str << " : ";
- else
+ std::string sym_str;
+ Addr sym_addr;
+ if (debugSymbolTable
+ && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) {
+ if (PC != sym_addr)
+ sym_str += csprintf("+%d", PC - sym_addr);
+ outs << "@" << sym_str << " : ";
+ }
+ else {
outs << "0x" << hex << PC << " : ";
+ }
//
// Print decoded instruction
diff --git a/cpu/memtest/memtest.cc b/cpu/memtest/memtest.cc
index b55af332a..e967c79da 100644
--- a/cpu/memtest/memtest.cc
+++ b/cpu/memtest/memtest.cc
@@ -28,9 +28,10 @@
// FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded
-#include <string>
-#include <sstream>
#include <iomanip>
+#include <set>
+#include <sstream>
+#include <string>
#include <vector>
#include "base/misc.hh"
@@ -44,6 +45,8 @@
using namespace std;
+int TESTER_ALLOCATOR=0;
+
MemTest::MemTest(const string &name,
MemInterface *_cache_interface,
FunctionalMemory *main_mem,
@@ -58,7 +61,8 @@ MemTest::MemTest(const string &name,
Addr _traceAddr,
Counter max_loads_any_thread,
Counter max_loads_all_threads)
- : BaseCPU(name, 1, 0, 0, max_loads_any_thread, max_loads_all_threads),
+ : BaseCPU(name, 1, true, 0, 0, max_loads_any_thread,
+ max_loads_all_threads),
tickEvent(this),
cacheInterface(_cache_interface),
mainMem(main_mem),
@@ -110,6 +114,8 @@ MemTest::MemTest(const string &name,
noResponseCycles = 0;
numReads = 0;
tickEvent.schedule(0);
+
+ id = TESTER_ALLOCATOR++;
}
static void
@@ -126,6 +132,11 @@ printData(ostream &os, uint8_t *data, int nbytes)
void
MemTest::completeRequest(MemReqPtr &req, uint8_t *data)
{
+ //Remove the address from the list of outstanding
+ std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr);
+ assert(removeAddr != outstandingAddrs.end());
+ outstandingAddrs.erase(removeAddr);
+
switch (req->cmd) {
case Read:
if (memcmp(req->data, data, req->size) != 0) {
@@ -157,6 +168,10 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data)
break;
case Copy:
+ //Also remove dest from outstanding list
+ removeAddr = outstandingAddrs.find(req->dest);
+ assert(removeAddr != outstandingAddrs.end());
+ outstandingAddrs.erase(removeAddr);
numCopiesStat++;
break;
@@ -211,7 +226,7 @@ MemTest::tick()
if (!tickEvent.scheduled())
tickEvent.schedule(curTick + 1);
- if (++noResponseCycles >= 5000) {
+ if (++noResponseCycles >= 500000) {
cerr << name() << ": deadlocked at cycle " << curTick << endl;
fatal("");
}
@@ -231,6 +246,16 @@ MemTest::tick()
unsigned source_align = rand() % 100;
unsigned dest_align = rand() % 100;
+ //If we aren't doing copies, use id as offset, and do a false sharing
+ //mem tester
+ if (percentCopies == 0) {
+ //We can eliminate the lower bits of the offset, and then use the id
+ //to offset within the blks
+ offset1 &= ~63; //Not the low order bits
+ offset1 += id;
+ access_size = 0;
+ }
+
MemReqPtr req = new MemReq();
if (cacheable < percentUncacheable) {
@@ -250,6 +275,13 @@ MemTest::tick()
if (cmd < percentReads) {
// read
+
+ //For now we only allow one outstanding request per addreess per tester
+ //This means we assume CPU does write forwarding to reads that alias something
+ //in the cpu store buffer.
+ if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return;
+ else outstandingAddrs.insert(req->paddr);
+
req->cmd = Read;
uint8_t *result = new uint8_t[8];
checkMem->access(Read, req->paddr, result, req->size);
@@ -272,6 +304,13 @@ MemTest::tick()
}
} else if (cmd < (100 - percentCopies)){
// write
+
+ //For now we only allow one outstanding request per addreess per tester
+ //This means we assume CPU does write forwarding to reads that alias something
+ //in the cpu store buffer.
+ if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return;
+ else outstandingAddrs.insert(req->paddr);
+
req->cmd = Write;
memcpy(req->data, &data, req->size);
checkMem->access(Write, req->paddr, req->data, req->size);
@@ -297,6 +336,11 @@ MemTest::tick()
// copy
Addr source = ((base) ? baseAddr1 : baseAddr2) + offset1;
Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2;
+ if (outstandingAddrs.find(source) != outstandingAddrs.end()) return;
+ else outstandingAddrs.insert(source);
+ if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return;
+ else outstandingAddrs.insert(dest);
+
if (source_align >= percentSourceUnaligned) {
source = blockAddr(source);
}
diff --git a/cpu/memtest/memtest.hh b/cpu/memtest/memtest.hh
index 72e0709d9..43b17a713 100644
--- a/cpu/memtest/memtest.hh
+++ b/cpu/memtest/memtest.hh
@@ -29,13 +29,14 @@
#ifndef __MEMTEST_HH__
#define __MEMTEST_HH__
-#include "sim/sim_object.hh"
-#include "mem/mem_interface.hh"
-#include "mem/functional_mem/functional_memory.hh"
-#include "cpu/base_cpu.hh"
-#include "cpu/exec_context.hh"
+#include <set>
#include "base/statistics.hh"
+#include "cpu/base_cpu.hh"
+#include "cpu/exec_context.hh"
+#include "mem/functional_mem/functional_memory.hh"
+#include "mem/mem_interface.hh"
+#include "sim/sim_object.hh"
#include "sim/stats.hh"
class MemTest : public BaseCPU
@@ -87,6 +88,10 @@ class MemTest : public BaseCPU
unsigned percentCopies; // target percentage of copy accesses
unsigned percentUncacheable;
+ int id;
+
+ std::set<unsigned> outstandingAddrs;
+
unsigned blockSize;
Addr blockAddrMask;
diff --git a/cpu/pc_event.cc b/cpu/pc_event.cc
index a86c017d4..8f046a7a4 100644
--- a/cpu/pc_event.cc
+++ b/cpu/pc_event.cc
@@ -77,7 +77,7 @@ PCEventQueue::schedule(PCEvent *event)
bool
PCEventQueue::doService(ExecContext *xc)
{
- Addr pc = xc->regs.pc;
+ Addr pc = xc->regs.pc & ~0x3;
int serviced = 0;
range_t range = equal_range(pc);
for (iterator i = range.first; i != range.second; ++i) {
@@ -85,7 +85,7 @@ PCEventQueue::doService(ExecContext *xc)
// another event. This for example, prevents two invocations
// of the SkipFuncEvent. Maybe we should have separate PC
// event queues for each processor?
- if (pc != xc->regs.pc)
+ if (pc != (xc->regs.pc & ~0x3))
continue;
DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n",
diff --git a/cpu/pc_event.hh b/cpu/pc_event.hh
index 131016fc6..9983d679b 100644
--- a/cpu/pc_event.hh
+++ b/cpu/pc_event.hh
@@ -143,7 +143,7 @@ PCEvent::schedule(Addr pc)
{
if (evpc != badpc)
panic("cannot switch PC");
- evpc = pc;
+ evpc = pc & ~0x3;
return schedule();
}
@@ -158,7 +158,7 @@ PCEvent::schedule(PCEventQueue *q, Addr pc)
panic("cannot switch addresses");
queue = q;
- evpc = pc;
+ evpc = pc & ~0x3;
return schedule();
}
diff --git a/cpu/simple_cpu/simple_cpu.cc b/cpu/simple_cpu/simple_cpu.cc
index 6c22d7c81..d48f93663 100644
--- a/cpu/simple_cpu/simple_cpu.cc
+++ b/cpu/simple_cpu/simple_cpu.cc
@@ -123,11 +123,12 @@ SimpleCPU::SimpleCPU(const string &_name,
FunctionalMemory *mem,
MemInterface *icache_interface,
MemInterface *dcache_interface,
- bool _def_reg, Tick freq)
- : BaseCPU(_name, /* number_of_threads */ 1,
+ bool _def_reg, Tick freq,
+ bool _function_trace, Tick _function_trace_start)
+ : BaseCPU(_name, /* number_of_threads */ 1, _def_reg,
max_insts_any_thread, max_insts_all_threads,
max_loads_any_thread, max_loads_all_threads,
- _system, freq),
+ _system, freq, _function_trace, _function_trace_start),
#else
SimpleCPU::SimpleCPU(const string &_name, Process *_process,
Counter max_insts_any_thread,
@@ -136,13 +137,14 @@ SimpleCPU::SimpleCPU(const string &_name, Process *_process,
Counter max_loads_all_threads,
MemInterface *icache_interface,
MemInterface *dcache_interface,
- bool _def_reg)
- : BaseCPU(_name, /* number_of_threads */ 1,
+ bool _def_reg,
+ bool _function_trace, Tick _function_trace_start)
+ : BaseCPU(_name, /* number_of_threads */ 1, _def_reg,
max_insts_any_thread, max_insts_all_threads,
- max_loads_any_thread, max_loads_all_threads),
+ max_loads_any_thread, max_loads_all_threads,
+ _function_trace, _function_trace_start),
#endif
- tickEvent(this), xc(NULL), defer_registration(_def_reg),
- cacheCompletionEvent(this)
+ tickEvent(this), xc(NULL), cacheCompletionEvent(this)
{
_status = Idle;
#ifdef FULL_SYSTEM
@@ -176,13 +178,6 @@ SimpleCPU::~SimpleCPU()
{
}
-void SimpleCPU::init()
-{
- if (!defer_registration) {
- this->registerExecContexts();
- }
-}
-
void
SimpleCPU::switchOut()
{
@@ -338,16 +333,30 @@ change_thread_state(int thread_number, int activate, int priority)
Fault
SimpleCPU::copySrcTranslate(Addr src)
{
- memReq->reset(src, (dcacheInterface) ?
- dcacheInterface->getBlockSize()
- : 64);
+ static bool no_warn = true;
+ int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+ // Only support block sizes of 64 atm.
+ assert(blk_size == 64);
+ int offset = src & (blk_size - 1);
+
+ // Make sure block doesn't span page
+ if (no_warn &&
+ (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) &&
+ (src >> 40) != 0xfffffc) {
+ warn("Copied block source spans pages %x.", src);
+ no_warn = false;
+ }
+
+ memReq->reset(src & ~(blk_size - 1), blk_size);
// translate to physical address
Fault fault = xc->translateDataReadReq(memReq);
+ assert(fault != Alignment_Fault);
+
if (fault == No_Fault) {
xc->copySrcAddr = src;
- xc->copySrcPhysAddr = memReq->paddr;
+ xc->copySrcPhysAddr = memReq->paddr + offset;
} else {
xc->copySrcAddr = 0;
xc->copySrcPhysAddr = 0;
@@ -358,19 +367,44 @@ SimpleCPU::copySrcTranslate(Addr src)
Fault
SimpleCPU::copy(Addr dest)
{
+ static bool no_warn = true;
int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+ // Only support block sizes of 64 atm.
+ assert(blk_size == 64);
uint8_t data[blk_size];
- assert(xc->copySrcAddr);
- memReq->reset(dest, blk_size);
+ //assert(xc->copySrcAddr);
+ int offset = dest & (blk_size - 1);
+
+ // Make sure block doesn't span page
+ if (no_warn &&
+ (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) &&
+ (dest >> 40) != 0xfffffc) {
+ no_warn = false;
+ warn("Copied block destination spans pages %x. ", dest);
+ }
+
+ memReq->reset(dest & ~(blk_size -1), blk_size);
// translate to physical address
Fault fault = xc->translateDataWriteReq(memReq);
+
+ assert(fault != Alignment_Fault);
+
if (fault == No_Fault) {
- Addr dest_addr = memReq->paddr;
+ Addr dest_addr = memReq->paddr + offset;
// Need to read straight from memory since we have more than 8 bytes.
memReq->paddr = xc->copySrcPhysAddr;
xc->mem->read(memReq, data);
memReq->paddr = dest_addr;
xc->mem->write(memReq, data);
+ if (dcacheInterface) {
+ memReq->cmd = Copy;
+ memReq->completionEvent = NULL;
+ memReq->paddr = xc->copySrcPhysAddr;
+ memReq->dest = dest_addr;
+ memReq->size = 64;
+ memReq->time = curTick;
+ dcacheInterface->access(memReq);
+ }
}
return fault;
}
@@ -610,13 +644,11 @@ SimpleCPU::tick()
Fault fault = No_Fault;
#ifdef FULL_SYSTEM
- if (AlphaISA::check_interrupts &&
- xc->cpu->check_interrupts() &&
- !PC_PAL(xc->regs.pc) &&
+ if (checkInterrupts && check_interrupts() && !xc->inPalMode() &&
status() != IcacheMissComplete) {
int ipl = 0;
int summary = 0;
- AlphaISA::check_interrupts = 0;
+ checkInterrupts = false;
IntReg *ipr = xc->regs.ipr;
if (xc->regs.ipr[TheISA::IPR_SIRR]) {
@@ -733,9 +765,8 @@ SimpleCPU::tick()
fault = si->execute(this, traceData);
#ifdef FULL_SYSTEM
- SWContext *ctx = xc->swCtx;
- if (ctx)
- ctx->process(xc, si.get());
+ if (xc->fnbin)
+ xc->execute(si.get());
#endif
if (si->isMemRef()) {
@@ -750,6 +781,8 @@ SimpleCPU::tick()
if (traceData)
traceData->finalize();
+ traceFunctions(xc->regs.pc);
+
} // if (fault == No_Fault)
if (fault != No_Fault) {
@@ -808,6 +841,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
Param<bool> defer_registration;
Param<int> multiplier;
+ Param<bool> function_trace;
+ Param<Tick> function_trace_start;
END_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU)
@@ -841,7 +876,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
INIT_PARAM_DFLT(defer_registration, "defer registration with system "
"(for sampling)", false),
- INIT_PARAM_DFLT(multiplier, "clock multiplier", 1)
+ INIT_PARAM_DFLT(multiplier, "clock multiplier", 1),
+ INIT_PARAM_DFLT(function_trace, "Enable function trace", false),
+ INIT_PARAM_DFLT(function_trace_start, "Cycle to start function trace", 0)
END_INIT_SIM_OBJECT_PARAMS(SimpleCPU)
@@ -860,7 +897,8 @@ CREATE_SIM_OBJECT(SimpleCPU)
(icache) ? icache->getInterface() : NULL,
(dcache) ? dcache->getInterface() : NULL,
defer_registration,
- ticksPerSecond * mult);
+ ticksPerSecond * mult,
+ function_trace, function_trace_start);
#else
cpu = new SimpleCPU(getInstanceName(), workload,
@@ -868,7 +906,8 @@ CREATE_SIM_OBJECT(SimpleCPU)
max_loads_any_thread, max_loads_all_threads,
(icache) ? icache->getInterface() : NULL,
(dcache) ? dcache->getInterface() : NULL,
- defer_registration);
+ defer_registration,
+ function_trace, function_trace_start);
#endif // FULL_SYSTEM
diff --git a/cpu/simple_cpu/simple_cpu.hh b/cpu/simple_cpu/simple_cpu.hh
index 6ab231e7e..341a0da23 100644
--- a/cpu/simple_cpu/simple_cpu.hh
+++ b/cpu/simple_cpu/simple_cpu.hh
@@ -31,7 +31,6 @@
#include "cpu/base_cpu.hh"
#include "sim/eventq.hh"
-#include "base/loader/symtab.hh"
#include "cpu/pc_event.hh"
#include "base/statistics.hh"
#include "cpu/exec_context.hh"
@@ -40,7 +39,6 @@
// forward declarations
#ifdef FULL_SYSTEM
class Processor;
-class Kernel;
class AlphaITB;
class AlphaDTB;
class PhysicalMemory;
@@ -144,7 +142,8 @@ class SimpleCPU : public BaseCPU
Counter max_loads_any_thread, Counter max_loads_all_threads,
AlphaITB *itb, AlphaDTB *dtb, FunctionalMemory *mem,
MemInterface *icache_interface, MemInterface *dcache_interface,
- bool _def_reg, Tick freq);
+ bool _def_reg, Tick freq,
+ bool _function_trace, Tick _function_trace_start);
#else
@@ -154,12 +153,12 @@ class SimpleCPU : public BaseCPU
Counter max_loads_any_thread,
Counter max_loads_all_threads,
MemInterface *icache_interface, MemInterface *dcache_interface,
- bool _def_reg);
+ bool _def_reg,
+ bool _function_trace, Tick _function_trace_start);
#endif
virtual ~SimpleCPU();
- virtual void init();
// execution context
ExecContext *xc;
@@ -179,8 +178,6 @@ class SimpleCPU : public BaseCPU
// L1 data cache
MemInterface *dcacheInterface;
- bool defer_registration;
-
// current instruction
MachInst inst;
@@ -250,8 +247,7 @@ class SimpleCPU : public BaseCPU
Fault read(Addr addr, T &data, unsigned flags);
template <class T>
- Fault write(T data, Addr addr, unsigned flags,
- uint64_t *res);
+ Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
void prefetch(Addr addr, unsigned flags)
{
diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh
index 71e9ef441..c47fa32db 100644
--- a/cpu/static_inst.hh
+++ b/cpu/static_inst.hh
@@ -48,6 +48,7 @@ class AlphaDynInst;
class FastCPU;
class SimpleCPU;
+class InorderCPU;
class SymbolTable;
namespace Trace {
diff --git a/cpu/trace/opt_cpu.cc b/cpu/trace/opt_cpu.cc
new file mode 100644
index 000000000..77211e382
--- /dev/null
+++ b/cpu/trace/opt_cpu.cc
@@ -0,0 +1,240 @@
+
+/*
+ * Copyright (c) 2004 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Definition of a memory trace CPU object for optimal caches. Uses a memory
+ * trace to access a fully associative cache with optimal replacement.
+ */
+
+#include <algorithm> // For heap functions.
+
+#include "cpu/trace/opt_cpu.hh"
+#include "cpu/trace/reader/mem_trace_reader.hh"
+
+#include "sim/builder.hh"
+#include "sim/sim_events.hh"
+
+using namespace std;
+
+OptCPU::OptCPU(const string &name,
+ MemTraceReader *_trace,
+ int block_size,
+ int cache_size,
+ int _assoc)
+ : BaseCPU(name, 1, true), tickEvent(this), trace(_trace),
+ numBlks(cache_size/block_size), assoc(_assoc), numSets(numBlks/assoc),
+ setMask(numSets - 1)
+{
+ int log_block_size = 0;
+ int tmp_block_size = block_size;
+ while (tmp_block_size > 1) {
+ ++log_block_size;
+ tmp_block_size = tmp_block_size >> 1;
+ }
+ assert(1<<log_block_size == block_size);
+ MemReqPtr req;
+ trace->getNextReq(req);
+ refInfo.resize(numSets);
+ while (req) {
+ RefInfo temp;
+ temp.addr = req->paddr >> log_block_size;
+ int set = temp.addr & setMask;
+ refInfo[set].push_back(temp);
+ trace->getNextReq(req);
+ }
+
+ // Initialize top level of lookup table.
+ lookupTable.resize(16);
+
+ // Annotate references with next ref time.
+ for (int k = 0; k < numSets; ++k) {
+ for (RefIndex i = refInfo[k].size() - 1; i >= 0; --i) {
+ Addr addr = refInfo[k][i].addr;
+ initTable(addr, InfiniteRef);
+ refInfo[k][i].nextRefTime = lookupValue(addr);
+ setValue(addr, i);
+ }
+ }
+
+ // Reset the lookup table
+ for (int j = 0; j < 16; ++j) {
+ if (lookupTable[j].size() == (1<<16)) {
+ for (int k = 0; k < (1<<16); ++k) {
+ if (lookupTable[j][k].size() == (1<<16)) {
+ for (int l = 0; l < (1<<16); ++l) {
+ lookupTable[j][k][l] = -1;
+ }
+ }
+ }
+ }
+ }
+
+ tickEvent.schedule(0);
+
+ hits = 0;
+ misses = 0;
+}
+
+void
+OptCPU::processSet(int set)
+{
+ // Initialize cache
+ int blks_in_cache = 0;
+ RefIndex i = 0;
+ cacheHeap.clear();
+ cacheHeap.resize(assoc);
+
+ while (blks_in_cache < assoc) {
+ RefIndex cache_index = lookupValue(refInfo[set][i].addr);
+ if (cache_index == -1) {
+ // First reference to this block
+ misses++;
+ cache_index = blks_in_cache++;
+ setValue(refInfo[set][i].addr, cache_index);
+ } else {
+ hits++;
+ }
+ // update cache heap to most recent reference
+ cacheHeap[cache_index] = i;
+ if (++i >= refInfo[set].size()) {
+ return;
+ }
+ }
+ for (int start = assoc/2; start >= 0; --start) {
+ heapify(set,start);
+ }
+ //verifyHeap(set,0);
+
+ for (; i < refInfo[set].size(); ++i) {
+ RefIndex cache_index = lookupValue(refInfo[set][i].addr);
+ if (cache_index == -1) {
+ // miss
+ misses++;
+ // replace from cacheHeap[0]
+ // mark replaced block as absent
+ setValue(refInfo[set][cacheHeap[0]].addr, -1);
+ setValue(refInfo[set][i].addr, 0);
+ cacheHeap[0] = i;
+ heapify(set, 0);
+ // Make sure its in the cache
+ assert(lookupValue(refInfo[set][i].addr) != -1);
+ } else {
+ // hit
+ hits++;
+ assert(refInfo[set][cacheHeap[cache_index]].addr ==
+ refInfo[set][i].addr);
+ assert(refInfo[set][cacheHeap[cache_index]].nextRefTime == i);
+ assert(heapLeft(cache_index) >= assoc);
+
+ cacheHeap[cache_index] = i;
+ processRankIncrease(set, cache_index);
+ assert(lookupValue(refInfo[set][i].addr) != -1);
+ }
+ }
+}
+void
+OptCPU::tick()
+{
+ // Do opt simulation
+
+ int references = 0;
+ for (int set = 0; set < numSets; ++set) {
+ if (!refInfo[set].empty()) {
+ processSet(set);
+ }
+ references += refInfo[set].size();
+ }
+ // exit;
+ fprintf(stderr,"sys.cpu.misses %d #opt cache misses\n",misses);
+ fprintf(stderr,"sys.cpu.hits %d #opt cache hits\n", hits);
+ fprintf(stderr,"sys.cpu.accesses %d #opt cache acceses\n", references);
+ new SimExitEvent("Finshed Memory Trace");
+}
+
+void
+OptCPU::initTable(Addr addr, RefIndex index)
+{
+ int l1_index = (addr >> 32) & 0x0f;
+ int l2_index = (addr >> 16) & 0xffff;
+ assert(l1_index == addr >> 32);
+ if (lookupTable[l1_index].size() != (1<<16)) {
+ lookupTable[l1_index].resize(1<<16);
+ }
+ if (lookupTable[l1_index][l2_index].size() != (1<<16)) {
+ lookupTable[l1_index][l2_index].resize(1<<16, index);
+ }
+}
+
+OptCPU::TickEvent::TickEvent(OptCPU *c)
+ : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+{
+}
+
+void
+OptCPU::TickEvent::process()
+{
+ cpu->tick();
+}
+
+const char *
+OptCPU::TickEvent::description()
+{
+ return "OptCPU tick event";
+}
+
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(OptCPU)
+
+ SimObjectParam<MemTraceReader *> data_trace;
+ Param<int> size;
+ Param<int> block_size;
+Param<int> assoc;
+
+END_DECLARE_SIM_OBJECT_PARAMS(OptCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(OptCPU)
+
+ INIT_PARAM_DFLT(data_trace, "memory trace", NULL),
+ INIT_PARAM(size, "cache size"),
+ INIT_PARAM(block_size, "block size"),
+ INIT_PARAM(assoc,"associativity")
+
+END_INIT_SIM_OBJECT_PARAMS(OptCPU)
+
+CREATE_SIM_OBJECT(OptCPU)
+{
+ return new OptCPU(getInstanceName(),
+ data_trace,
+ block_size,
+ size,
+ assoc);
+}
+
+REGISTER_SIM_OBJECT("OptCPU", OptCPU)
diff --git a/cpu/trace/opt_cpu.hh b/cpu/trace/opt_cpu.hh
new file mode 100644
index 000000000..847147b3c
--- /dev/null
+++ b/cpu/trace/opt_cpu.hh
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2004 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Declaration of a memory trace CPU object for optimal caches. Uses a memory
+ * trace to access a fully associative cache with optimal replacement.
+ */
+
+#ifndef __OPT_CPU_HH__
+#define __OPT_CPU_HH__
+
+#include <vector>
+
+#include "cpu/base_cpu.hh"
+#include "mem/mem_req.hh" // for MemReqPtr
+#include "sim/eventq.hh" // for Event
+
+// Forward Declaration
+class MemTraceReader;
+
+/**
+ * A CPU object to simulate a fully-associative cache with optimal replacement.
+ */
+class OptCPU : public BaseCPU
+{
+ typedef int RefIndex;
+
+ typedef std::vector<RefIndex> L3Table;
+ typedef std::vector<L3Table> L2Table;
+ typedef std::vector<L2Table> L1Table;
+
+ /**
+ * Event to call OptCPU::tick
+ */
+ class TickEvent : public Event
+ {
+ private:
+ /** The associated CPU */
+ OptCPU *cpu;
+
+ public:
+ /**
+ * Construct this event;
+ */
+ TickEvent(OptCPU *c);
+
+ /**
+ * Call the tick function.
+ */
+ void process();
+
+ /**
+ * Return a string description of this event.
+ */
+ const char *description();
+ };
+
+ TickEvent tickEvent;
+
+ class RefInfo
+ {
+ public:
+ RefIndex nextRefTime;
+ Addr addr;
+ };
+
+ /** Reference Information, per set. */
+ std::vector<std::vector<RefInfo> > refInfo;
+
+ /** Lookup table to track blocks in the cache heap */
+ L1Table lookupTable;
+
+ /**
+ * Return the correct value in the lookup table.
+ */
+ RefIndex lookupValue(Addr addr)
+ {
+ int l1_index = (addr >> 32) & 0x0f;
+ int l2_index = (addr >> 16) & 0xffff;
+ int l3_index = addr & 0xffff;
+ assert(l1_index == addr >> 32);
+ return lookupTable[l1_index][l2_index][l3_index];
+ }
+
+ /**
+ * Set the value in the lookup table.
+ */
+ void setValue(Addr addr, RefIndex index)
+ {
+ int l1_index = (addr >> 32) & 0x0f;
+ int l2_index = (addr >> 16) & 0xffff;
+ int l3_index = addr & 0xffff;
+ assert(l1_index == addr >> 32);
+ lookupTable[l1_index][l2_index][l3_index]=index;
+ }
+
+ /**
+ * Initialize the lookup table to the given value.
+ */
+ void initTable(Addr addr, RefIndex index);
+
+ void heapSwap(int set, int a, int b) {
+ RefIndex tmp = cacheHeap[a];
+ cacheHeap[a] = cacheHeap[b];
+ cacheHeap[b] = tmp;
+
+ setValue(refInfo[set][cacheHeap[a]].addr, a);
+ setValue(refInfo[set][cacheHeap[b]].addr, b);
+ }
+
+ int heapLeft(int index) { return index + index + 1; }
+ int heapRight(int index) { return index + index + 2; }
+ int heapParent(int index) { return (index - 1) >> 1; }
+
+ RefIndex heapRank(int set, int index) {
+ return refInfo[set][cacheHeap[index]].nextRefTime;
+ }
+
+ void heapify(int set, int start){
+ int left = heapLeft(start);
+ int right = heapRight(start);
+ int max = start;
+ if (left < assoc && heapRank(set, left) > heapRank(set, start)) {
+ max = left;
+ }
+ if (right < assoc && heapRank(set, right) > heapRank(set, max)) {
+ max = right;
+ }
+
+ if (max != start) {
+ heapSwap(set, start, max);
+ heapify(set, max);
+ }
+ }
+
+ void verifyHeap(int set, int start) {
+ int left = heapLeft(start);
+ int right = heapRight(start);
+
+ if (left < assoc) {
+ assert(heapRank(set, start) >= heapRank(set, left));
+ verifyHeap(set, left);
+ }
+ if (right < assoc) {
+ assert(heapRank(set, start) >= heapRank(set, right));
+ verifyHeap(set, right);
+ }
+ }
+
+ void processRankIncrease(int set, int start) {
+ int parent = heapParent(start);
+ while (start > 0 && heapRank(set,parent) < heapRank(set,start)) {
+ heapSwap(set, parent, start);
+ start = parent;
+ parent = heapParent(start);
+ }
+ }
+
+ void processSet(int set);
+
+ static const RefIndex InfiniteRef = 0x7fffffff;
+
+ /** Memory reference trace. */
+ MemTraceReader *trace;
+
+ /** Cache heap for replacement. */
+ std::vector<RefIndex> cacheHeap;
+
+ /** The number of blocks in the cache. */
+ const int numBlks;
+
+ const int assoc;
+ const int numSets;
+ const int setMask;
+
+
+ int misses;
+ int hits;
+
+ public:
+ /**
+ * Construct a OptCPU object.
+ */
+ OptCPU(const std::string &name,
+ MemTraceReader *_trace,
+ int block_size,
+ int cache_size,
+ int assoc);
+
+ /**
+ * Perform the optimal replacement simulation.
+ */
+ void tick();
+};
+
+#endif
diff --git a/cpu/trace/reader/itx_reader.cc b/cpu/trace/reader/itx_reader.cc
index 593d383ec..006fcc9dd 100644
--- a/cpu/trace/reader/itx_reader.cc
+++ b/cpu/trace/reader/itx_reader.cc
@@ -102,6 +102,7 @@ ITXReader::getNextReq(MemReqPtr &req)
} else {
codePhysAddr += tmp_req->size;
}
+ assert(tmp_req->paddr >> 36 == 0);
} else {
codePhysValid = false;
}
@@ -130,12 +131,13 @@ ITXReader::getNextReq(MemReqPtr &req)
// Get the page offset from the virtual address.
tmp_req->paddr = tmp_req->vaddr & 0xfff;
tmp_req->paddr |= (c & 0xf0) << 8;
+ tmp_req->paddr |= (Addr)(c & 0x0f) << 32;
for (int i = 2; i < 4; ++i) {
c = getc(trace);
if (c == EOF) {
fatal("Unexpected end of trace file.");
}
- tmp_req->paddr |= (c & 0xff) << (8 * i);
+ tmp_req->paddr |= (Addr)(c & 0xff) << (8 * i);
}
if (type == ITXCode) {
if (((tmp_req->paddr & 0xfff) + tmp_req->size)
@@ -148,6 +150,7 @@ ITXReader::getNextReq(MemReqPtr &req)
codePhysValid = true;
}
}
+ assert(tmp_req->paddr >> 36 == 0);
} else if (type == ITXCode) {
codePhysValid = false;
}
@@ -158,8 +161,12 @@ ITXReader::getNextReq(MemReqPtr &req)
case ITXWrite:
tmp_req->cmd = Write;
break;
+ case ITXWriteback:
+ tmp_req->cmd = Writeback;
+ break;
case ITXCode:
tmp_req->cmd = Read;
+ tmp_req->flags |= INST_READ;
break;
default:
fatal("Unknown ITX type");
@@ -173,6 +180,7 @@ ITXReader::getNextReq(MemReqPtr &req)
}
} while (!phys_val);
req = tmp_req;
+ assert(!req || (req->paddr >> 36) == 0);
return 0;
}
diff --git a/cpu/trace/reader/itx_reader.hh b/cpu/trace/reader/itx_reader.hh
index 0e08d5db5..d45a16a69 100644
--- a/cpu/trace/reader/itx_reader.hh
+++ b/cpu/trace/reader/itx_reader.hh
@@ -35,6 +35,7 @@
#define __ITX_READER_HH__
#include <stdio.h>
+#include <string>
#include "cpu/trace/reader/mem_trace_reader.hh"
#include "mem/mem_req.hh"
diff --git a/cpu/trace/trace_cpu.cc b/cpu/trace/trace_cpu.cc
index 94f311d4b..f1160337a 100644
--- a/cpu/trace/trace_cpu.cc
+++ b/cpu/trace/trace_cpu.cc
@@ -46,23 +46,13 @@ using namespace std;
TraceCPU::TraceCPU(const string &name,
MemInterface *icache_interface,
MemInterface *dcache_interface,
- MemTraceReader *inst_trace,
- MemTraceReader *data_trace,
- int icache_ports,
- int dcache_ports)
- : BaseCPU(name, 4), icacheInterface(icache_interface),
- dcacheInterface(dcache_interface), instTrace(inst_trace),
- dataTrace(data_trace), icachePorts(icache_ports),
- dcachePorts(dcache_ports), outstandingRequests(0), tickEvent(this)
+ MemTraceReader *data_trace)
+ : BaseCPU(name, 4, true), icacheInterface(icache_interface),
+ dcacheInterface(dcache_interface),
+ dataTrace(data_trace), outstandingRequests(0), tickEvent(this)
{
- if (instTrace) {
- assert(icacheInterface);
- nextInstCycle = instTrace->getNextReq(nextInstReq);
- }
- if (dataTrace) {
- assert(dcacheInterface);
- nextDataCycle = dataTrace->getNextReq(nextDataReq);
- }
+ assert(dcacheInterface);
+ nextCycle = dataTrace->getNextReq(nextReq);
tickEvent.schedule(0);
}
@@ -74,41 +64,46 @@ TraceCPU::tick()
int instReqs = 0;
int dataReqs = 0;
- // Do data first to match tracing with FullCPU dumps
-
- while (nextDataReq && (dataReqs < dcachePorts) &&
- curTick >= nextDataCycle) {
- assert(nextDataReq->thread_num < 4 && "Not enough threads");
- if (dcacheInterface->isBlocked())
- break;
-
- ++dataReqs;
- nextDataReq->time = curTick;
- nextDataReq->completionEvent =
- new TraceCompleteEvent(nextDataReq, this);
- dcacheInterface->access(nextDataReq);
- nextDataCycle = dataTrace->getNextReq(nextDataReq);
- }
-
- while (nextInstReq && (instReqs < icachePorts) &&
- curTick >= nextInstCycle) {
- assert(nextInstReq->thread_num < 4 && "Not enough threads");
- if (icacheInterface->isBlocked())
- break;
-
- nextInstReq->time = curTick;
- if (nextInstReq->cmd == Squash) {
- icacheInterface->squash(nextInstReq->asid);
+ while (nextReq && curTick >= nextCycle) {
+ assert(nextReq->thread_num < 4 && "Not enough threads");
+ if (nextReq->isInstRead() && icacheInterface) {
+ if (icacheInterface->isBlocked())
+ break;
+
+ nextReq->time = curTick;
+ if (nextReq->cmd == Squash) {
+ icacheInterface->squash(nextReq->asid);
+ } else {
+ ++instReqs;
+ if (icacheInterface->doEvents()) {
+ nextReq->completionEvent =
+ new TraceCompleteEvent(nextReq, this);
+ icacheInterface->access(nextReq);
+ } else {
+ icacheInterface->access(nextReq);
+ completeRequest(nextReq);
+ }
+ }
} else {
- ++instReqs;
- nextInstReq->completionEvent =
- new TraceCompleteEvent(nextInstReq, this);
- icacheInterface->access(nextInstReq);
+ if (dcacheInterface->isBlocked())
+ break;
+
+ ++dataReqs;
+ nextReq->time = curTick;
+ if (dcacheInterface->doEvents()) {
+ nextReq->completionEvent =
+ new TraceCompleteEvent(nextReq, this);
+ dcacheInterface->access(nextReq);
+ } else {
+ dcacheInterface->access(nextReq);
+ completeRequest(nextReq);
+ }
+
}
- nextInstCycle = instTrace->getNextReq(nextInstReq);
+ nextCycle = dataTrace->getNextReq(nextReq);
}
- if (!nextInstReq && !nextDataReq) {
+ if (!nextReq) {
// No more requests to send. Finish trailing events and exit.
if (mainEventQueue.empty()) {
new SimExitEvent("Finshed Memory Trace");
@@ -116,8 +111,7 @@ TraceCPU::tick()
tickEvent.schedule(mainEventQueue.nextEventTime() + 1);
}
} else {
- tickEvent.schedule(max(curTick + 1,
- min(nextInstCycle, nextDataCycle)));
+ tickEvent.schedule(max(curTick + 1, nextCycle));
}
}
@@ -161,10 +155,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TraceCPU)
SimObjectParam<BaseMem *> icache;
SimObjectParam<BaseMem *> dcache;
- SimObjectParam<MemTraceReader *> inst_trace;
SimObjectParam<MemTraceReader *> data_trace;
- Param<int> inst_ports;
- Param<int> data_ports;
END_DECLARE_SIM_OBJECT_PARAMS(TraceCPU)
@@ -172,10 +163,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TraceCPU)
INIT_PARAM_DFLT(icache, "instruction cache", NULL),
INIT_PARAM_DFLT(dcache, "data cache", NULL),
- INIT_PARAM_DFLT(inst_trace, "instruction trace", NULL),
- INIT_PARAM_DFLT(data_trace, "data trace", NULL),
- INIT_PARAM_DFLT(inst_ports, "instruction cache read ports", 4),
- INIT_PARAM_DFLT(data_ports, "data cache read/write ports", 4)
+ INIT_PARAM_DFLT(data_trace, "data trace", NULL)
END_INIT_SIM_OBJECT_PARAMS(TraceCPU)
@@ -184,7 +172,7 @@ CREATE_SIM_OBJECT(TraceCPU)
return new TraceCPU(getInstanceName(),
(icache) ? icache->getInterface() : NULL,
(dcache) ? dcache->getInterface() : NULL,
- inst_trace, data_trace, inst_ports, data_ports);
+ data_trace);
}
REGISTER_SIM_OBJECT("TraceCPU", TraceCPU)
diff --git a/cpu/trace/trace_cpu.hh b/cpu/trace/trace_cpu.hh
index 6f3ef50a6..1711646a8 100644
--- a/cpu/trace/trace_cpu.hh
+++ b/cpu/trace/trace_cpu.hh
@@ -55,28 +55,17 @@ class TraceCPU : public BaseCPU
/** Interface for data trace requests, if any. */
MemInterface *dcacheInterface;
- /** Instruction reference trace. */
- MemTraceReader *instTrace;
/** Data reference trace. */
MemTraceReader *dataTrace;
- /** Number of Icache read ports. */
- int icachePorts;
- /** Number of Dcache read/write ports. */
- int dcachePorts;
-
/** Number of outstanding requests. */
int outstandingRequests;
- /** Cycle of the next instruction request, 0 if not available. */
- Tick nextInstCycle;
- /** Cycle of the next data request, 0 if not available. */
- Tick nextDataCycle;
+ /** Cycle of the next request, 0 if not available. */
+ Tick nextCycle;
- /** Next instruction request. */
- MemReqPtr nextInstReq;
- /** Next data request. */
- MemReqPtr nextDataReq;
+ /** Next request. */
+ MemReqPtr nextReq;
/**
* Event to call the TraceCPU::tick
@@ -113,10 +102,7 @@ class TraceCPU : public BaseCPU
TraceCPU(const std::string &name,
MemInterface *icache_interface,
MemInterface *dcache_interface,
- MemTraceReader *inst_trace,
- MemTraceReader *data_trace,
- int icache_ports,
- int dcache_ports);
+ MemTraceReader *data_trace);
/**
* Perform all the accesses for one cycle.