diff options
Diffstat (limited to 'cpu')
-rw-r--r-- | cpu/base_cpu.cc | 94 | ||||
-rw-r--r-- | cpu/base_cpu.hh | 33 | ||||
-rw-r--r-- | cpu/exec_context.cc | 90 | ||||
-rw-r--r-- | cpu/exec_context.hh | 31 | ||||
-rw-r--r-- | cpu/exetrace.cc | 16 | ||||
-rw-r--r-- | cpu/memtest/memtest.cc | 52 | ||||
-rw-r--r-- | cpu/memtest/memtest.hh | 15 | ||||
-rw-r--r-- | cpu/pc_event.cc | 4 | ||||
-rw-r--r-- | cpu/pc_event.hh | 4 | ||||
-rw-r--r-- | cpu/simple_cpu/simple_cpu.cc | 103 | ||||
-rw-r--r-- | cpu/simple_cpu/simple_cpu.hh | 14 | ||||
-rw-r--r-- | cpu/static_inst.hh | 1 | ||||
-rw-r--r-- | cpu/trace/opt_cpu.cc | 240 | ||||
-rw-r--r-- | cpu/trace/opt_cpu.hh | 222 | ||||
-rw-r--r-- | cpu/trace/reader/itx_reader.cc | 10 | ||||
-rw-r--r-- | cpu/trace/reader/itx_reader.hh | 1 | ||||
-rw-r--r-- | cpu/trace/trace_cpu.cc | 102 | ||||
-rw-r--r-- | cpu/trace/trace_cpu.hh | 24 |
18 files changed, 823 insertions, 233 deletions
diff --git a/cpu/base_cpu.cc b/cpu/base_cpu.cc index 988c7a602..2275f12e3 100644 --- a/cpu/base_cpu.cc +++ b/cpu/base_cpu.cc @@ -30,10 +30,11 @@ #include <sstream> #include <iostream> -#include "cpu/base_cpu.hh" #include "base/cprintf.hh" -#include "cpu/exec_context.hh" +#include "base/loader/symtab.hh" #include "base/misc.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" #include "sim/param.hh" #include "sim/sim_events.hh" @@ -50,21 +51,25 @@ int maxThreadsPerCPU = 1; extern void debug_break(); #ifdef FULL_SYSTEM -BaseCPU::BaseCPU(const string &_name, int _number_of_threads, +BaseCPU::BaseCPU(const string &_name, int _number_of_threads, bool _def_reg, Counter max_insts_any_thread, Counter max_insts_all_threads, Counter max_loads_any_thread, Counter max_loads_all_threads, - System *_system, Tick freq) - : SimObject(_name), frequency(freq), - number_of_threads(_number_of_threads), system(_system) + System *_system, Tick freq, + bool _function_trace, Tick _function_trace_start) + : SimObject(_name), frequency(freq), checkInterrupts(true), + deferRegistration(_def_reg), number_of_threads(_number_of_threads), + system(_system) #else -BaseCPU::BaseCPU(const string &_name, int _number_of_threads, +BaseCPU::BaseCPU(const string &_name, int _number_of_threads, bool _def_reg, Counter max_insts_any_thread, Counter max_insts_all_threads, Counter max_loads_any_thread, - Counter max_loads_all_threads) - : SimObject(_name), number_of_threads(_number_of_threads) + Counter max_loads_all_threads, + bool _function_trace, Tick _function_trace_start) + : SimObject(_name), deferRegistration(_def_reg), + number_of_threads(_number_of_threads) #endif { DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); @@ -134,10 +139,47 @@ BaseCPU::BaseCPU(const string &_name, int _number_of_threads, memset(interrupts, 0, sizeof(interrupts)); intstatus = 0; #endif + + functionTracingEnabled = false; + if (_function_trace) { + std::string filename = csprintf("ftrace.%s", name()); + functionTraceStream = makeOutputStream(filename); + currentFunctionStart = currentFunctionEnd = 0; + functionEntryTick = _function_trace_start; + + if (_function_trace_start == 0) { + functionTracingEnabled = true; + } else { + Event *e = + new EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace>(this, + true); + e->schedule(_function_trace_start); + } + } +} + + +void +BaseCPU::enableFunctionTrace() +{ + functionTracingEnabled = true; +} + +BaseCPU::~BaseCPU() +{ + if (functionTracingEnabled) + closeOutputStream(functionTraceStream); } void +BaseCPU::init() +{ + if (!deferRegistration) + registerExecContexts(); +} + +void BaseCPU::regStats() { using namespace Stats; @@ -195,10 +237,10 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU) newXC->takeOverFrom(oldXC); assert(newXC->cpu_id == oldXC->cpu_id); #ifdef FULL_SYSTEM - system->replaceExecContext(newXC->cpu_id, newXC); + system->replaceExecContext(newXC, newXC->cpu_id); #else assert(newXC->process == oldXC->process); - newXC->process->replaceExecContext(newXC->cpu_id, newXC); + newXC->process->replaceExecContext(newXC, newXC->cpu_id); #endif } @@ -222,7 +264,7 @@ BaseCPU::post_interrupt(int int_num, int index) if (index < 0 || index >= sizeof(uint64_t) * 8) panic("int_num out of bounds\n"); - AlphaISA::check_interrupts = 1; + checkInterrupts = true; interrupts[int_num] |= 1 << index; intstatus |= (ULL(1) << int_num); } @@ -269,4 +311,32 @@ BaseCPU::unserialize(Checkpoint *cp, const std::string §ion) #endif // FULL_SYSTEM +void +BaseCPU::traceFunctionsInternal(Addr pc) +{ + if (!debugSymbolTable) + return; + + // if pc enters different function, print new function symbol and + // update saved range. Otherwise do nothing. + if (pc < currentFunctionStart || pc >= currentFunctionEnd) { + string sym_str; + bool found = debugSymbolTable->findNearestSymbol(pc, sym_str, + currentFunctionStart, + currentFunctionEnd); + + if (!found) { + // no symbol found: use addr as label + sym_str = csprintf("0x%x", pc); + currentFunctionStart = pc; + currentFunctionEnd = pc + 1; + } + + ccprintf(*functionTraceStream, " (%d)\n%d: %s", + curTick - functionEntryTick, curTick, sym_str); + functionEntryTick = curTick; + } +} + + DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU) diff --git a/cpu/base_cpu.hh b/cpu/base_cpu.hh index f75f00409..baa956aa8 100644 --- a/cpu/base_cpu.hh +++ b/cpu/base_cpu.hh @@ -55,6 +55,7 @@ class BaseCPU : public SimObject virtual void post_interrupt(int int_num, int index); virtual void clear_interrupt(int int_num, int index); virtual void clear_interrupts(); + bool checkInterrupts; bool check_interrupt(int int_num) const { if (int_num > NumInterruptLevels) @@ -91,22 +92,26 @@ class BaseCPU : public SimObject public: #ifdef FULL_SYSTEM - BaseCPU(const std::string &_name, int _number_of_threads, + BaseCPU(const std::string &_name, int _number_of_threads, bool _def_reg, Counter max_insts_any_thread, Counter max_insts_all_threads, Counter max_loads_any_thread, Counter max_loads_all_threads, - System *_system, Tick freq); + System *_system, Tick freq, + bool _function_trace = false, Tick _function_trace_start = 0); #else - BaseCPU(const std::string &_name, int _number_of_threads, + BaseCPU(const std::string &_name, int _number_of_threads, bool _def_reg, Counter max_insts_any_thread = 0, Counter max_insts_all_threads = 0, Counter max_loads_any_thread = 0, - Counter max_loads_all_threads = 0); + Counter max_loads_all_threads = 0, + bool _function_trace = false, Tick _function_trace_start = 0); #endif - virtual ~BaseCPU() {} + virtual ~BaseCPU(); + virtual void init(); virtual void regStats(); + bool deferRegistration; void registerExecContexts(); /// Prepare for another CPU to take over execution. Called by @@ -140,7 +145,6 @@ class BaseCPU : public SimObject #ifdef FULL_SYSTEM System *system; - /** * Serialize this object to the given output stream. * @param os The stream to serialize to. @@ -164,6 +168,23 @@ class BaseCPU : public SimObject virtual Counter totalInstructions() const { return 0; } + // Function tracing + private: + bool functionTracingEnabled; + std::ostream *functionTraceStream; + Addr currentFunctionStart; + Addr currentFunctionEnd; + Tick functionEntryTick; + void enableFunctionTrace(); + void traceFunctionsInternal(Addr pc); + + protected: + void traceFunctions(Addr pc) + { + if (functionTracingEnabled) + traceFunctionsInternal(pc); + } + private: static std::vector<BaseCPU *> cpuList; //!< Static global cpu list diff --git a/cpu/exec_context.cc b/cpu/exec_context.cc index 9c21b3a56..1cb33f13e 100644 --- a/cpu/exec_context.cc +++ b/cpu/exec_context.cc @@ -32,6 +32,9 @@ #include "cpu/exec_context.hh" #ifdef FULL_SYSTEM +#include "base/cprintf.hh" +#include "kern/kernel_stats.hh" +#include "sim/serialize.hh" #include "sim/system.hh" #else #include "sim/process.hh" @@ -44,12 +47,13 @@ using namespace std; ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num, System *_sys, AlphaITB *_itb, AlphaDTB *_dtb, FunctionalMemory *_mem) - : _status(ExecContext::Unallocated), - kernelStats(this, _cpu), cpu(_cpu), thread_num(_thread_num), + : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num), cpu_id(-1), mem(_mem), itb(_itb), dtb(_dtb), system(_sys), - memCtrl(_sys->memCtrl), physmem(_sys->physmem), - swCtx(NULL), func_exe_inst(0), storeCondFailures(0) + memctrl(_sys->memctrl), physmem(_sys->physmem), + kernelBinning(system->kernelBinning), bin(kernelBinning->bin), + fnbin(kernelBinning->fnbin), func_exe_inst(0), storeCondFailures(0) { + kernelStats = new Kernel::Statistics(this); memset(®s, 0, sizeof(RegFile)); } #else @@ -72,6 +76,13 @@ ExecContext::ExecContext(BaseCPU *_cpu, int _thread_num, } #endif +ExecContext::~ExecContext() +{ +#ifdef FULL_SYSTEM + delete kernelStats; +#endif +} + void ExecContext::takeOverFrom(ExecContext *oldContext) @@ -86,9 +97,6 @@ ExecContext::takeOverFrom(ExecContext *oldContext) // copy over functional state _status = oldContext->_status; -#ifdef FULL_SYSTEM - kernelStats = oldContext->kernelStats; -#endif regs = oldContext->regs; cpu_id = oldContext->cpu_id; func_exe_inst = oldContext->func_exe_inst; @@ -98,6 +106,14 @@ ExecContext::takeOverFrom(ExecContext *oldContext) oldContext->_status = ExecContext::Unallocated; } +#ifdef FULL_SYSTEM +void +ExecContext::execute(const StaticInstBase *inst) +{ + assert(kernelStats); + system->kernelBinning->execute(this, inst); +} +#endif void ExecContext::serialize(ostream &os) @@ -109,31 +125,8 @@ ExecContext::serialize(ostream &os) SERIALIZE_SCALAR(inst); #ifdef FULL_SYSTEM - bool ctx = false; - if (swCtx) { - ctx = true; - SERIALIZE_SCALAR(ctx); - SERIALIZE_SCALAR(swCtx->calls); - std::stack<fnCall *> *stack = &(swCtx->callStack); - fnCall *top; - int size = stack->size(); - SERIALIZE_SCALAR(size); - - for (int j=0; j<size; ++j) { - top = stack->top(); - paramOut(os, csprintf("stackpos[%d]",j), top->name); - delete top; - stack->pop(); - } - } else { - SERIALIZE_SCALAR(ctx); - } - if (system->bin) { - Stats::MainBin *cur = Stats::MainBin::curBin(); - string bin_name = cur->name(); - SERIALIZE_SCALAR(bin_name); - } -#endif //FULL_SYSTEM + kernelStats->serialize(os); +#endif } @@ -147,35 +140,8 @@ ExecContext::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(inst); #ifdef FULL_SYSTEM - bool ctx; - UNSERIALIZE_SCALAR(ctx); - if (ctx) { - swCtx = new SWContext; - UNSERIALIZE_SCALAR(swCtx->calls); - int size; - UNSERIALIZE_SCALAR(size); - - vector<fnCall *> calls; - fnCall *call; - for (int i=0; i<size; ++i) { - call = new fnCall; - paramIn(cp, section, csprintf("stackpos[%d]",i), call->name); - call->myBin = system->getBin(call->name); - calls.push_back(call); - } - - for (int i=size-1; i>=0; --i) { - swCtx->callStack.push(calls[i]); - } - - } - - if (system->bin) { - string bin_name; - UNSERIALIZE_SCALAR(bin_name); - system->getBin(bin_name)->activate(); - } -#endif //FULL_SYSTEM + kernelStats->unserialize(cp, section); +#endif } @@ -232,7 +198,7 @@ void ExecContext::regStats(const string &name) { #ifdef FULL_SYSTEM - kernelStats.regStats(name + ".kern"); + kernelStats->regStats(name + ".kern"); #endif } diff --git a/cpu/exec_context.hh b/cpu/exec_context.hh index b47f5cd08..8437a5585 100644 --- a/cpu/exec_context.hh +++ b/cpu/exec_context.hh @@ -42,12 +42,12 @@ class BaseCPU; #ifdef FULL_SYSTEM +#include "sim/system.hh" #include "targetarch/alpha_memory.hh" -class MemoryController; -#include "kern/kernel_stats.hh" -#include "sim/system.hh" -#include "sim/sw_context.hh" +class MemoryController; +class StaticInstBase; +namespace Kernel { class Binning; class Statistics; } #else // !FULL_SYSTEM @@ -105,11 +105,6 @@ class ExecContext /// Set the status to Halted. void halt(); -#ifdef FULL_SYSTEM - public: - KernelStats kernelStats; -#endif - public: RegFile regs; // correct-path register context @@ -127,7 +122,6 @@ class ExecContext int cpu_id; #ifdef FULL_SYSTEM - FunctionalMemory *mem; AlphaITB *itb; AlphaDTB *dtb; @@ -136,10 +130,15 @@ class ExecContext // the following two fields are redundant, since we can always // look them up through the system pointer, but we'll leave them // here for now for convenience - MemoryController *memCtrl; + MemoryController *memctrl; PhysicalMemory *physmem; - SWContext *swCtx; + Kernel::Binning *kernelBinning; + Kernel::Statistics *kernelStats; + bool bin; + bool fnbin; + void execute(const StaticInstBase *inst); + #else Process *process; @@ -185,7 +184,7 @@ class ExecContext ExecContext(BaseCPU *_cpu, int _thread_num, FunctionalMemory *_mem, int _asid); #endif - virtual ~ExecContext() {} + virtual ~ExecContext(); virtual void takeOverFrom(ExecContext *oldContext); @@ -197,8 +196,8 @@ class ExecContext #ifdef FULL_SYSTEM bool validInstAddr(Addr addr) { return true; } bool validDataAddr(Addr addr) { return true; } - int getInstAsid() { return ITB_ASN_ASN(regs.ipr[TheISA::IPR_ITB_ASN]); } - int getDataAsid() { return DTB_ASN_ASN(regs.ipr[TheISA::IPR_DTB_ASN]); } + int getInstAsid() { return regs.instAsid(); } + int getDataAsid() { return regs.dataAsid(); } Fault translateInstReq(MemReqPtr &req) { @@ -411,7 +410,7 @@ class ExecContext int readIntrFlag() { return regs.intrflag; } void setIntrFlag(int val) { regs.intrflag = val; } Fault hwrei(); - bool inPalMode() { return PC_PAL(regs.pc); } + bool inPalMode() { return AlphaISA::PcPAL(regs.pc); } void ev5_trap(Fault fault); bool simPalCheck(int palFunc); #endif diff --git a/cpu/exetrace.cc b/cpu/exetrace.cc index e31c3590c..ff7e90c9e 100644 --- a/cpu/exetrace.cc +++ b/cpu/exetrace.cc @@ -48,8 +48,6 @@ using namespace std; // -SymbolTable *debugSymbolTable = NULL; - void Trace::InstRecord::dump(ostream &outs) { @@ -66,11 +64,17 @@ Trace::InstRecord::dump(ostream &outs) outs << "T" << thread << " : "; - std::string str; - if ((debugSymbolTable) && (debugSymbolTable->findNearestSymbol(PC, str))) - outs << "@" << setw(17) << str << " : "; - else + std::string sym_str; + Addr sym_addr; + if (debugSymbolTable + && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) { + if (PC != sym_addr) + sym_str += csprintf("+%d", PC - sym_addr); + outs << "@" << sym_str << " : "; + } + else { outs << "0x" << hex << PC << " : "; + } // // Print decoded instruction diff --git a/cpu/memtest/memtest.cc b/cpu/memtest/memtest.cc index b55af332a..e967c79da 100644 --- a/cpu/memtest/memtest.cc +++ b/cpu/memtest/memtest.cc @@ -28,9 +28,10 @@ // FIX ME: make trackBlkAddr use blocksize from actual cache, not hard coded -#include <string> -#include <sstream> #include <iomanip> +#include <set> +#include <sstream> +#include <string> #include <vector> #include "base/misc.hh" @@ -44,6 +45,8 @@ using namespace std; +int TESTER_ALLOCATOR=0; + MemTest::MemTest(const string &name, MemInterface *_cache_interface, FunctionalMemory *main_mem, @@ -58,7 +61,8 @@ MemTest::MemTest(const string &name, Addr _traceAddr, Counter max_loads_any_thread, Counter max_loads_all_threads) - : BaseCPU(name, 1, 0, 0, max_loads_any_thread, max_loads_all_threads), + : BaseCPU(name, 1, true, 0, 0, max_loads_any_thread, + max_loads_all_threads), tickEvent(this), cacheInterface(_cache_interface), mainMem(main_mem), @@ -110,6 +114,8 @@ MemTest::MemTest(const string &name, noResponseCycles = 0; numReads = 0; tickEvent.schedule(0); + + id = TESTER_ALLOCATOR++; } static void @@ -126,6 +132,11 @@ printData(ostream &os, uint8_t *data, int nbytes) void MemTest::completeRequest(MemReqPtr &req, uint8_t *data) { + //Remove the address from the list of outstanding + std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); + switch (req->cmd) { case Read: if (memcmp(req->data, data, req->size) != 0) { @@ -157,6 +168,10 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data) break; case Copy: + //Also remove dest from outstanding list + removeAddr = outstandingAddrs.find(req->dest); + assert(removeAddr != outstandingAddrs.end()); + outstandingAddrs.erase(removeAddr); numCopiesStat++; break; @@ -211,7 +226,7 @@ MemTest::tick() if (!tickEvent.scheduled()) tickEvent.schedule(curTick + 1); - if (++noResponseCycles >= 5000) { + if (++noResponseCycles >= 500000) { cerr << name() << ": deadlocked at cycle " << curTick << endl; fatal(""); } @@ -231,6 +246,16 @@ MemTest::tick() unsigned source_align = rand() % 100; unsigned dest_align = rand() % 100; + //If we aren't doing copies, use id as offset, and do a false sharing + //mem tester + if (percentCopies == 0) { + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset1 &= ~63; //Not the low order bits + offset1 += id; + access_size = 0; + } + MemReqPtr req = new MemReq(); if (cacheable < percentUncacheable) { @@ -250,6 +275,13 @@ MemTest::tick() if (cmd < percentReads) { // read + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + req->cmd = Read; uint8_t *result = new uint8_t[8]; checkMem->access(Read, req->paddr, result, req->size); @@ -272,6 +304,13 @@ MemTest::tick() } } else if (cmd < (100 - percentCopies)){ // write + + //For now we only allow one outstanding request per addreess per tester + //This means we assume CPU does write forwarding to reads that alias something + //in the cpu store buffer. + if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(req->paddr); + req->cmd = Write; memcpy(req->data, &data, req->size); checkMem->access(Write, req->paddr, req->data, req->size); @@ -297,6 +336,11 @@ MemTest::tick() // copy Addr source = ((base) ? baseAddr1 : baseAddr2) + offset1; Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2; + if (outstandingAddrs.find(source) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(source); + if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(dest); + if (source_align >= percentSourceUnaligned) { source = blockAddr(source); } diff --git a/cpu/memtest/memtest.hh b/cpu/memtest/memtest.hh index 72e0709d9..43b17a713 100644 --- a/cpu/memtest/memtest.hh +++ b/cpu/memtest/memtest.hh @@ -29,13 +29,14 @@ #ifndef __MEMTEST_HH__ #define __MEMTEST_HH__ -#include "sim/sim_object.hh" -#include "mem/mem_interface.hh" -#include "mem/functional_mem/functional_memory.hh" -#include "cpu/base_cpu.hh" -#include "cpu/exec_context.hh" +#include <set> #include "base/statistics.hh" +#include "cpu/base_cpu.hh" +#include "cpu/exec_context.hh" +#include "mem/functional_mem/functional_memory.hh" +#include "mem/mem_interface.hh" +#include "sim/sim_object.hh" #include "sim/stats.hh" class MemTest : public BaseCPU @@ -87,6 +88,10 @@ class MemTest : public BaseCPU unsigned percentCopies; // target percentage of copy accesses unsigned percentUncacheable; + int id; + + std::set<unsigned> outstandingAddrs; + unsigned blockSize; Addr blockAddrMask; diff --git a/cpu/pc_event.cc b/cpu/pc_event.cc index a86c017d4..8f046a7a4 100644 --- a/cpu/pc_event.cc +++ b/cpu/pc_event.cc @@ -77,7 +77,7 @@ PCEventQueue::schedule(PCEvent *event) bool PCEventQueue::doService(ExecContext *xc) { - Addr pc = xc->regs.pc; + Addr pc = xc->regs.pc & ~0x3; int serviced = 0; range_t range = equal_range(pc); for (iterator i = range.first; i != range.second; ++i) { @@ -85,7 +85,7 @@ PCEventQueue::doService(ExecContext *xc) // another event. This for example, prevents two invocations // of the SkipFuncEvent. Maybe we should have separate PC // event queues for each processor? - if (pc != xc->regs.pc) + if (pc != (xc->regs.pc & ~0x3)) continue; DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n", diff --git a/cpu/pc_event.hh b/cpu/pc_event.hh index 131016fc6..9983d679b 100644 --- a/cpu/pc_event.hh +++ b/cpu/pc_event.hh @@ -143,7 +143,7 @@ PCEvent::schedule(Addr pc) { if (evpc != badpc) panic("cannot switch PC"); - evpc = pc; + evpc = pc & ~0x3; return schedule(); } @@ -158,7 +158,7 @@ PCEvent::schedule(PCEventQueue *q, Addr pc) panic("cannot switch addresses"); queue = q; - evpc = pc; + evpc = pc & ~0x3; return schedule(); } diff --git a/cpu/simple_cpu/simple_cpu.cc b/cpu/simple_cpu/simple_cpu.cc index 6c22d7c81..d48f93663 100644 --- a/cpu/simple_cpu/simple_cpu.cc +++ b/cpu/simple_cpu/simple_cpu.cc @@ -123,11 +123,12 @@ SimpleCPU::SimpleCPU(const string &_name, FunctionalMemory *mem, MemInterface *icache_interface, MemInterface *dcache_interface, - bool _def_reg, Tick freq) - : BaseCPU(_name, /* number_of_threads */ 1, + bool _def_reg, Tick freq, + bool _function_trace, Tick _function_trace_start) + : BaseCPU(_name, /* number_of_threads */ 1, _def_reg, max_insts_any_thread, max_insts_all_threads, max_loads_any_thread, max_loads_all_threads, - _system, freq), + _system, freq, _function_trace, _function_trace_start), #else SimpleCPU::SimpleCPU(const string &_name, Process *_process, Counter max_insts_any_thread, @@ -136,13 +137,14 @@ SimpleCPU::SimpleCPU(const string &_name, Process *_process, Counter max_loads_all_threads, MemInterface *icache_interface, MemInterface *dcache_interface, - bool _def_reg) - : BaseCPU(_name, /* number_of_threads */ 1, + bool _def_reg, + bool _function_trace, Tick _function_trace_start) + : BaseCPU(_name, /* number_of_threads */ 1, _def_reg, max_insts_any_thread, max_insts_all_threads, - max_loads_any_thread, max_loads_all_threads), + max_loads_any_thread, max_loads_all_threads, + _function_trace, _function_trace_start), #endif - tickEvent(this), xc(NULL), defer_registration(_def_reg), - cacheCompletionEvent(this) + tickEvent(this), xc(NULL), cacheCompletionEvent(this) { _status = Idle; #ifdef FULL_SYSTEM @@ -176,13 +178,6 @@ SimpleCPU::~SimpleCPU() { } -void SimpleCPU::init() -{ - if (!defer_registration) { - this->registerExecContexts(); - } -} - void SimpleCPU::switchOut() { @@ -338,16 +333,30 @@ change_thread_state(int thread_number, int activate, int priority) Fault SimpleCPU::copySrcTranslate(Addr src) { - memReq->reset(src, (dcacheInterface) ? - dcacheInterface->getBlockSize() - : 64); + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + int offset = src & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) && + (src >> 40) != 0xfffffc) { + warn("Copied block source spans pages %x.", src); + no_warn = false; + } + + memReq->reset(src & ~(blk_size - 1), blk_size); // translate to physical address Fault fault = xc->translateDataReadReq(memReq); + assert(fault != Alignment_Fault); + if (fault == No_Fault) { xc->copySrcAddr = src; - xc->copySrcPhysAddr = memReq->paddr; + xc->copySrcPhysAddr = memReq->paddr + offset; } else { xc->copySrcAddr = 0; xc->copySrcPhysAddr = 0; @@ -358,19 +367,44 @@ SimpleCPU::copySrcTranslate(Addr src) Fault SimpleCPU::copy(Addr dest) { + static bool no_warn = true; int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); uint8_t data[blk_size]; - assert(xc->copySrcAddr); - memReq->reset(dest, blk_size); + //assert(xc->copySrcAddr); + int offset = dest & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) && + (dest >> 40) != 0xfffffc) { + no_warn = false; + warn("Copied block destination spans pages %x. ", dest); + } + + memReq->reset(dest & ~(blk_size -1), blk_size); // translate to physical address Fault fault = xc->translateDataWriteReq(memReq); + + assert(fault != Alignment_Fault); + if (fault == No_Fault) { - Addr dest_addr = memReq->paddr; + Addr dest_addr = memReq->paddr + offset; // Need to read straight from memory since we have more than 8 bytes. memReq->paddr = xc->copySrcPhysAddr; xc->mem->read(memReq, data); memReq->paddr = dest_addr; xc->mem->write(memReq, data); + if (dcacheInterface) { + memReq->cmd = Copy; + memReq->completionEvent = NULL; + memReq->paddr = xc->copySrcPhysAddr; + memReq->dest = dest_addr; + memReq->size = 64; + memReq->time = curTick; + dcacheInterface->access(memReq); + } } return fault; } @@ -610,13 +644,11 @@ SimpleCPU::tick() Fault fault = No_Fault; #ifdef FULL_SYSTEM - if (AlphaISA::check_interrupts && - xc->cpu->check_interrupts() && - !PC_PAL(xc->regs.pc) && + if (checkInterrupts && check_interrupts() && !xc->inPalMode() && status() != IcacheMissComplete) { int ipl = 0; int summary = 0; - AlphaISA::check_interrupts = 0; + checkInterrupts = false; IntReg *ipr = xc->regs.ipr; if (xc->regs.ipr[TheISA::IPR_SIRR]) { @@ -733,9 +765,8 @@ SimpleCPU::tick() fault = si->execute(this, traceData); #ifdef FULL_SYSTEM - SWContext *ctx = xc->swCtx; - if (ctx) - ctx->process(xc, si.get()); + if (xc->fnbin) + xc->execute(si.get()); #endif if (si->isMemRef()) { @@ -750,6 +781,8 @@ SimpleCPU::tick() if (traceData) traceData->finalize(); + traceFunctions(xc->regs.pc); + } // if (fault == No_Fault) if (fault != No_Fault) { @@ -808,6 +841,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU) Param<bool> defer_registration; Param<int> multiplier; + Param<bool> function_trace; + Param<Tick> function_trace_start; END_DECLARE_SIM_OBJECT_PARAMS(SimpleCPU) @@ -841,7 +876,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleCPU) INIT_PARAM_DFLT(defer_registration, "defer registration with system " "(for sampling)", false), - INIT_PARAM_DFLT(multiplier, "clock multiplier", 1) + INIT_PARAM_DFLT(multiplier, "clock multiplier", 1), + INIT_PARAM_DFLT(function_trace, "Enable function trace", false), + INIT_PARAM_DFLT(function_trace_start, "Cycle to start function trace", 0) END_INIT_SIM_OBJECT_PARAMS(SimpleCPU) @@ -860,7 +897,8 @@ CREATE_SIM_OBJECT(SimpleCPU) (icache) ? icache->getInterface() : NULL, (dcache) ? dcache->getInterface() : NULL, defer_registration, - ticksPerSecond * mult); + ticksPerSecond * mult, + function_trace, function_trace_start); #else cpu = new SimpleCPU(getInstanceName(), workload, @@ -868,7 +906,8 @@ CREATE_SIM_OBJECT(SimpleCPU) max_loads_any_thread, max_loads_all_threads, (icache) ? icache->getInterface() : NULL, (dcache) ? dcache->getInterface() : NULL, - defer_registration); + defer_registration, + function_trace, function_trace_start); #endif // FULL_SYSTEM diff --git a/cpu/simple_cpu/simple_cpu.hh b/cpu/simple_cpu/simple_cpu.hh index 6ab231e7e..341a0da23 100644 --- a/cpu/simple_cpu/simple_cpu.hh +++ b/cpu/simple_cpu/simple_cpu.hh @@ -31,7 +31,6 @@ #include "cpu/base_cpu.hh" #include "sim/eventq.hh" -#include "base/loader/symtab.hh" #include "cpu/pc_event.hh" #include "base/statistics.hh" #include "cpu/exec_context.hh" @@ -40,7 +39,6 @@ // forward declarations #ifdef FULL_SYSTEM class Processor; -class Kernel; class AlphaITB; class AlphaDTB; class PhysicalMemory; @@ -144,7 +142,8 @@ class SimpleCPU : public BaseCPU Counter max_loads_any_thread, Counter max_loads_all_threads, AlphaITB *itb, AlphaDTB *dtb, FunctionalMemory *mem, MemInterface *icache_interface, MemInterface *dcache_interface, - bool _def_reg, Tick freq); + bool _def_reg, Tick freq, + bool _function_trace, Tick _function_trace_start); #else @@ -154,12 +153,12 @@ class SimpleCPU : public BaseCPU Counter max_loads_any_thread, Counter max_loads_all_threads, MemInterface *icache_interface, MemInterface *dcache_interface, - bool _def_reg); + bool _def_reg, + bool _function_trace, Tick _function_trace_start); #endif virtual ~SimpleCPU(); - virtual void init(); // execution context ExecContext *xc; @@ -179,8 +178,6 @@ class SimpleCPU : public BaseCPU // L1 data cache MemInterface *dcacheInterface; - bool defer_registration; - // current instruction MachInst inst; @@ -250,8 +247,7 @@ class SimpleCPU : public BaseCPU Fault read(Addr addr, T &data, unsigned flags); template <class T> - Fault write(T data, Addr addr, unsigned flags, - uint64_t *res); + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); void prefetch(Addr addr, unsigned flags) { diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index 71e9ef441..c47fa32db 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -48,6 +48,7 @@ class AlphaDynInst; class FastCPU; class SimpleCPU; +class InorderCPU; class SymbolTable; namespace Trace { diff --git a/cpu/trace/opt_cpu.cc b/cpu/trace/opt_cpu.cc new file mode 100644 index 000000000..77211e382 --- /dev/null +++ b/cpu/trace/opt_cpu.cc @@ -0,0 +1,240 @@ + +/* + * Copyright (c) 2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definition of a memory trace CPU object for optimal caches. Uses a memory + * trace to access a fully associative cache with optimal replacement. + */ + +#include <algorithm> // For heap functions. + +#include "cpu/trace/opt_cpu.hh" +#include "cpu/trace/reader/mem_trace_reader.hh" + +#include "sim/builder.hh" +#include "sim/sim_events.hh" + +using namespace std; + +OptCPU::OptCPU(const string &name, + MemTraceReader *_trace, + int block_size, + int cache_size, + int _assoc) + : BaseCPU(name, 1, true), tickEvent(this), trace(_trace), + numBlks(cache_size/block_size), assoc(_assoc), numSets(numBlks/assoc), + setMask(numSets - 1) +{ + int log_block_size = 0; + int tmp_block_size = block_size; + while (tmp_block_size > 1) { + ++log_block_size; + tmp_block_size = tmp_block_size >> 1; + } + assert(1<<log_block_size == block_size); + MemReqPtr req; + trace->getNextReq(req); + refInfo.resize(numSets); + while (req) { + RefInfo temp; + temp.addr = req->paddr >> log_block_size; + int set = temp.addr & setMask; + refInfo[set].push_back(temp); + trace->getNextReq(req); + } + + // Initialize top level of lookup table. + lookupTable.resize(16); + + // Annotate references with next ref time. + for (int k = 0; k < numSets; ++k) { + for (RefIndex i = refInfo[k].size() - 1; i >= 0; --i) { + Addr addr = refInfo[k][i].addr; + initTable(addr, InfiniteRef); + refInfo[k][i].nextRefTime = lookupValue(addr); + setValue(addr, i); + } + } + + // Reset the lookup table + for (int j = 0; j < 16; ++j) { + if (lookupTable[j].size() == (1<<16)) { + for (int k = 0; k < (1<<16); ++k) { + if (lookupTable[j][k].size() == (1<<16)) { + for (int l = 0; l < (1<<16); ++l) { + lookupTable[j][k][l] = -1; + } + } + } + } + } + + tickEvent.schedule(0); + + hits = 0; + misses = 0; +} + +void +OptCPU::processSet(int set) +{ + // Initialize cache + int blks_in_cache = 0; + RefIndex i = 0; + cacheHeap.clear(); + cacheHeap.resize(assoc); + + while (blks_in_cache < assoc) { + RefIndex cache_index = lookupValue(refInfo[set][i].addr); + if (cache_index == -1) { + // First reference to this block + misses++; + cache_index = blks_in_cache++; + setValue(refInfo[set][i].addr, cache_index); + } else { + hits++; + } + // update cache heap to most recent reference + cacheHeap[cache_index] = i; + if (++i >= refInfo[set].size()) { + return; + } + } + for (int start = assoc/2; start >= 0; --start) { + heapify(set,start); + } + //verifyHeap(set,0); + + for (; i < refInfo[set].size(); ++i) { + RefIndex cache_index = lookupValue(refInfo[set][i].addr); + if (cache_index == -1) { + // miss + misses++; + // replace from cacheHeap[0] + // mark replaced block as absent + setValue(refInfo[set][cacheHeap[0]].addr, -1); + setValue(refInfo[set][i].addr, 0); + cacheHeap[0] = i; + heapify(set, 0); + // Make sure its in the cache + assert(lookupValue(refInfo[set][i].addr) != -1); + } else { + // hit + hits++; + assert(refInfo[set][cacheHeap[cache_index]].addr == + refInfo[set][i].addr); + assert(refInfo[set][cacheHeap[cache_index]].nextRefTime == i); + assert(heapLeft(cache_index) >= assoc); + + cacheHeap[cache_index] = i; + processRankIncrease(set, cache_index); + assert(lookupValue(refInfo[set][i].addr) != -1); + } + } +} +void +OptCPU::tick() +{ + // Do opt simulation + + int references = 0; + for (int set = 0; set < numSets; ++set) { + if (!refInfo[set].empty()) { + processSet(set); + } + references += refInfo[set].size(); + } + // exit; + fprintf(stderr,"sys.cpu.misses %d #opt cache misses\n",misses); + fprintf(stderr,"sys.cpu.hits %d #opt cache hits\n", hits); + fprintf(stderr,"sys.cpu.accesses %d #opt cache acceses\n", references); + new SimExitEvent("Finshed Memory Trace"); +} + +void +OptCPU::initTable(Addr addr, RefIndex index) +{ + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + assert(l1_index == addr >> 32); + if (lookupTable[l1_index].size() != (1<<16)) { + lookupTable[l1_index].resize(1<<16); + } + if (lookupTable[l1_index][l2_index].size() != (1<<16)) { + lookupTable[l1_index][l2_index].resize(1<<16, index); + } +} + +OptCPU::TickEvent::TickEvent(OptCPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +void +OptCPU::TickEvent::process() +{ + cpu->tick(); +} + +const char * +OptCPU::TickEvent::description() +{ + return "OptCPU tick event"; +} + + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OptCPU) + + SimObjectParam<MemTraceReader *> data_trace; + Param<int> size; + Param<int> block_size; +Param<int> assoc; + +END_DECLARE_SIM_OBJECT_PARAMS(OptCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OptCPU) + + INIT_PARAM_DFLT(data_trace, "memory trace", NULL), + INIT_PARAM(size, "cache size"), + INIT_PARAM(block_size, "block size"), + INIT_PARAM(assoc,"associativity") + +END_INIT_SIM_OBJECT_PARAMS(OptCPU) + +CREATE_SIM_OBJECT(OptCPU) +{ + return new OptCPU(getInstanceName(), + data_trace, + block_size, + size, + assoc); +} + +REGISTER_SIM_OBJECT("OptCPU", OptCPU) diff --git a/cpu/trace/opt_cpu.hh b/cpu/trace/opt_cpu.hh new file mode 100644 index 000000000..847147b3c --- /dev/null +++ b/cpu/trace/opt_cpu.hh @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2004 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a memory trace CPU object for optimal caches. Uses a memory + * trace to access a fully associative cache with optimal replacement. + */ + +#ifndef __OPT_CPU_HH__ +#define __OPT_CPU_HH__ + +#include <vector> + +#include "cpu/base_cpu.hh" +#include "mem/mem_req.hh" // for MemReqPtr +#include "sim/eventq.hh" // for Event + +// Forward Declaration +class MemTraceReader; + +/** + * A CPU object to simulate a fully-associative cache with optimal replacement. + */ +class OptCPU : public BaseCPU +{ + typedef int RefIndex; + + typedef std::vector<RefIndex> L3Table; + typedef std::vector<L3Table> L2Table; + typedef std::vector<L2Table> L1Table; + + /** + * Event to call OptCPU::tick + */ + class TickEvent : public Event + { + private: + /** The associated CPU */ + OptCPU *cpu; + + public: + /** + * Construct this event; + */ + TickEvent(OptCPU *c); + + /** + * Call the tick function. + */ + void process(); + + /** + * Return a string description of this event. + */ + const char *description(); + }; + + TickEvent tickEvent; + + class RefInfo + { + public: + RefIndex nextRefTime; + Addr addr; + }; + + /** Reference Information, per set. */ + std::vector<std::vector<RefInfo> > refInfo; + + /** Lookup table to track blocks in the cache heap */ + L1Table lookupTable; + + /** + * Return the correct value in the lookup table. + */ + RefIndex lookupValue(Addr addr) + { + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + int l3_index = addr & 0xffff; + assert(l1_index == addr >> 32); + return lookupTable[l1_index][l2_index][l3_index]; + } + + /** + * Set the value in the lookup table. + */ + void setValue(Addr addr, RefIndex index) + { + int l1_index = (addr >> 32) & 0x0f; + int l2_index = (addr >> 16) & 0xffff; + int l3_index = addr & 0xffff; + assert(l1_index == addr >> 32); + lookupTable[l1_index][l2_index][l3_index]=index; + } + + /** + * Initialize the lookup table to the given value. + */ + void initTable(Addr addr, RefIndex index); + + void heapSwap(int set, int a, int b) { + RefIndex tmp = cacheHeap[a]; + cacheHeap[a] = cacheHeap[b]; + cacheHeap[b] = tmp; + + setValue(refInfo[set][cacheHeap[a]].addr, a); + setValue(refInfo[set][cacheHeap[b]].addr, b); + } + + int heapLeft(int index) { return index + index + 1; } + int heapRight(int index) { return index + index + 2; } + int heapParent(int index) { return (index - 1) >> 1; } + + RefIndex heapRank(int set, int index) { + return refInfo[set][cacheHeap[index]].nextRefTime; + } + + void heapify(int set, int start){ + int left = heapLeft(start); + int right = heapRight(start); + int max = start; + if (left < assoc && heapRank(set, left) > heapRank(set, start)) { + max = left; + } + if (right < assoc && heapRank(set, right) > heapRank(set, max)) { + max = right; + } + + if (max != start) { + heapSwap(set, start, max); + heapify(set, max); + } + } + + void verifyHeap(int set, int start) { + int left = heapLeft(start); + int right = heapRight(start); + + if (left < assoc) { + assert(heapRank(set, start) >= heapRank(set, left)); + verifyHeap(set, left); + } + if (right < assoc) { + assert(heapRank(set, start) >= heapRank(set, right)); + verifyHeap(set, right); + } + } + + void processRankIncrease(int set, int start) { + int parent = heapParent(start); + while (start > 0 && heapRank(set,parent) < heapRank(set,start)) { + heapSwap(set, parent, start); + start = parent; + parent = heapParent(start); + } + } + + void processSet(int set); + + static const RefIndex InfiniteRef = 0x7fffffff; + + /** Memory reference trace. */ + MemTraceReader *trace; + + /** Cache heap for replacement. */ + std::vector<RefIndex> cacheHeap; + + /** The number of blocks in the cache. */ + const int numBlks; + + const int assoc; + const int numSets; + const int setMask; + + + int misses; + int hits; + + public: + /** + * Construct a OptCPU object. + */ + OptCPU(const std::string &name, + MemTraceReader *_trace, + int block_size, + int cache_size, + int assoc); + + /** + * Perform the optimal replacement simulation. + */ + void tick(); +}; + +#endif diff --git a/cpu/trace/reader/itx_reader.cc b/cpu/trace/reader/itx_reader.cc index 593d383ec..006fcc9dd 100644 --- a/cpu/trace/reader/itx_reader.cc +++ b/cpu/trace/reader/itx_reader.cc @@ -102,6 +102,7 @@ ITXReader::getNextReq(MemReqPtr &req) } else { codePhysAddr += tmp_req->size; } + assert(tmp_req->paddr >> 36 == 0); } else { codePhysValid = false; } @@ -130,12 +131,13 @@ ITXReader::getNextReq(MemReqPtr &req) // Get the page offset from the virtual address. tmp_req->paddr = tmp_req->vaddr & 0xfff; tmp_req->paddr |= (c & 0xf0) << 8; + tmp_req->paddr |= (Addr)(c & 0x0f) << 32; for (int i = 2; i < 4; ++i) { c = getc(trace); if (c == EOF) { fatal("Unexpected end of trace file."); } - tmp_req->paddr |= (c & 0xff) << (8 * i); + tmp_req->paddr |= (Addr)(c & 0xff) << (8 * i); } if (type == ITXCode) { if (((tmp_req->paddr & 0xfff) + tmp_req->size) @@ -148,6 +150,7 @@ ITXReader::getNextReq(MemReqPtr &req) codePhysValid = true; } } + assert(tmp_req->paddr >> 36 == 0); } else if (type == ITXCode) { codePhysValid = false; } @@ -158,8 +161,12 @@ ITXReader::getNextReq(MemReqPtr &req) case ITXWrite: tmp_req->cmd = Write; break; + case ITXWriteback: + tmp_req->cmd = Writeback; + break; case ITXCode: tmp_req->cmd = Read; + tmp_req->flags |= INST_READ; break; default: fatal("Unknown ITX type"); @@ -173,6 +180,7 @@ ITXReader::getNextReq(MemReqPtr &req) } } while (!phys_val); req = tmp_req; + assert(!req || (req->paddr >> 36) == 0); return 0; } diff --git a/cpu/trace/reader/itx_reader.hh b/cpu/trace/reader/itx_reader.hh index 0e08d5db5..d45a16a69 100644 --- a/cpu/trace/reader/itx_reader.hh +++ b/cpu/trace/reader/itx_reader.hh @@ -35,6 +35,7 @@ #define __ITX_READER_HH__ #include <stdio.h> +#include <string> #include "cpu/trace/reader/mem_trace_reader.hh" #include "mem/mem_req.hh" diff --git a/cpu/trace/trace_cpu.cc b/cpu/trace/trace_cpu.cc index 94f311d4b..f1160337a 100644 --- a/cpu/trace/trace_cpu.cc +++ b/cpu/trace/trace_cpu.cc @@ -46,23 +46,13 @@ using namespace std; TraceCPU::TraceCPU(const string &name, MemInterface *icache_interface, MemInterface *dcache_interface, - MemTraceReader *inst_trace, - MemTraceReader *data_trace, - int icache_ports, - int dcache_ports) - : BaseCPU(name, 4), icacheInterface(icache_interface), - dcacheInterface(dcache_interface), instTrace(inst_trace), - dataTrace(data_trace), icachePorts(icache_ports), - dcachePorts(dcache_ports), outstandingRequests(0), tickEvent(this) + MemTraceReader *data_trace) + : BaseCPU(name, 4, true), icacheInterface(icache_interface), + dcacheInterface(dcache_interface), + dataTrace(data_trace), outstandingRequests(0), tickEvent(this) { - if (instTrace) { - assert(icacheInterface); - nextInstCycle = instTrace->getNextReq(nextInstReq); - } - if (dataTrace) { - assert(dcacheInterface); - nextDataCycle = dataTrace->getNextReq(nextDataReq); - } + assert(dcacheInterface); + nextCycle = dataTrace->getNextReq(nextReq); tickEvent.schedule(0); } @@ -74,41 +64,46 @@ TraceCPU::tick() int instReqs = 0; int dataReqs = 0; - // Do data first to match tracing with FullCPU dumps - - while (nextDataReq && (dataReqs < dcachePorts) && - curTick >= nextDataCycle) { - assert(nextDataReq->thread_num < 4 && "Not enough threads"); - if (dcacheInterface->isBlocked()) - break; - - ++dataReqs; - nextDataReq->time = curTick; - nextDataReq->completionEvent = - new TraceCompleteEvent(nextDataReq, this); - dcacheInterface->access(nextDataReq); - nextDataCycle = dataTrace->getNextReq(nextDataReq); - } - - while (nextInstReq && (instReqs < icachePorts) && - curTick >= nextInstCycle) { - assert(nextInstReq->thread_num < 4 && "Not enough threads"); - if (icacheInterface->isBlocked()) - break; - - nextInstReq->time = curTick; - if (nextInstReq->cmd == Squash) { - icacheInterface->squash(nextInstReq->asid); + while (nextReq && curTick >= nextCycle) { + assert(nextReq->thread_num < 4 && "Not enough threads"); + if (nextReq->isInstRead() && icacheInterface) { + if (icacheInterface->isBlocked()) + break; + + nextReq->time = curTick; + if (nextReq->cmd == Squash) { + icacheInterface->squash(nextReq->asid); + } else { + ++instReqs; + if (icacheInterface->doEvents()) { + nextReq->completionEvent = + new TraceCompleteEvent(nextReq, this); + icacheInterface->access(nextReq); + } else { + icacheInterface->access(nextReq); + completeRequest(nextReq); + } + } } else { - ++instReqs; - nextInstReq->completionEvent = - new TraceCompleteEvent(nextInstReq, this); - icacheInterface->access(nextInstReq); + if (dcacheInterface->isBlocked()) + break; + + ++dataReqs; + nextReq->time = curTick; + if (dcacheInterface->doEvents()) { + nextReq->completionEvent = + new TraceCompleteEvent(nextReq, this); + dcacheInterface->access(nextReq); + } else { + dcacheInterface->access(nextReq); + completeRequest(nextReq); + } + } - nextInstCycle = instTrace->getNextReq(nextInstReq); + nextCycle = dataTrace->getNextReq(nextReq); } - if (!nextInstReq && !nextDataReq) { + if (!nextReq) { // No more requests to send. Finish trailing events and exit. if (mainEventQueue.empty()) { new SimExitEvent("Finshed Memory Trace"); @@ -116,8 +111,7 @@ TraceCPU::tick() tickEvent.schedule(mainEventQueue.nextEventTime() + 1); } } else { - tickEvent.schedule(max(curTick + 1, - min(nextInstCycle, nextDataCycle))); + tickEvent.schedule(max(curTick + 1, nextCycle)); } } @@ -161,10 +155,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TraceCPU) SimObjectParam<BaseMem *> icache; SimObjectParam<BaseMem *> dcache; - SimObjectParam<MemTraceReader *> inst_trace; SimObjectParam<MemTraceReader *> data_trace; - Param<int> inst_ports; - Param<int> data_ports; END_DECLARE_SIM_OBJECT_PARAMS(TraceCPU) @@ -172,10 +163,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TraceCPU) INIT_PARAM_DFLT(icache, "instruction cache", NULL), INIT_PARAM_DFLT(dcache, "data cache", NULL), - INIT_PARAM_DFLT(inst_trace, "instruction trace", NULL), - INIT_PARAM_DFLT(data_trace, "data trace", NULL), - INIT_PARAM_DFLT(inst_ports, "instruction cache read ports", 4), - INIT_PARAM_DFLT(data_ports, "data cache read/write ports", 4) + INIT_PARAM_DFLT(data_trace, "data trace", NULL) END_INIT_SIM_OBJECT_PARAMS(TraceCPU) @@ -184,7 +172,7 @@ CREATE_SIM_OBJECT(TraceCPU) return new TraceCPU(getInstanceName(), (icache) ? icache->getInterface() : NULL, (dcache) ? dcache->getInterface() : NULL, - inst_trace, data_trace, inst_ports, data_ports); + data_trace); } REGISTER_SIM_OBJECT("TraceCPU", TraceCPU) diff --git a/cpu/trace/trace_cpu.hh b/cpu/trace/trace_cpu.hh index 6f3ef50a6..1711646a8 100644 --- a/cpu/trace/trace_cpu.hh +++ b/cpu/trace/trace_cpu.hh @@ -55,28 +55,17 @@ class TraceCPU : public BaseCPU /** Interface for data trace requests, if any. */ MemInterface *dcacheInterface; - /** Instruction reference trace. */ - MemTraceReader *instTrace; /** Data reference trace. */ MemTraceReader *dataTrace; - /** Number of Icache read ports. */ - int icachePorts; - /** Number of Dcache read/write ports. */ - int dcachePorts; - /** Number of outstanding requests. */ int outstandingRequests; - /** Cycle of the next instruction request, 0 if not available. */ - Tick nextInstCycle; - /** Cycle of the next data request, 0 if not available. */ - Tick nextDataCycle; + /** Cycle of the next request, 0 if not available. */ + Tick nextCycle; - /** Next instruction request. */ - MemReqPtr nextInstReq; - /** Next data request. */ - MemReqPtr nextDataReq; + /** Next request. */ + MemReqPtr nextReq; /** * Event to call the TraceCPU::tick @@ -113,10 +102,7 @@ class TraceCPU : public BaseCPU TraceCPU(const std::string &name, MemInterface *icache_interface, MemInterface *dcache_interface, - MemTraceReader *inst_trace, - MemTraceReader *data_trace, - int icache_ports, - int dcache_ports); + MemTraceReader *data_trace); /** * Perform all the accesses for one cycle. |