diff options
author | Gabe Black <gblack@eecs.umich.edu> | 2012-01-16 04:27:10 -0800 |
---|---|---|
committer | Gabe Black <gblack@eecs.umich.edu> | 2012-01-16 04:27:10 -0800 |
commit | da2a4acc26ba264c3c4a12495776fd6a1c4fb133 (patch) | |
tree | f142100388b9d1403492c97b0d323728ce18ef8a /src | |
parent | 241cc0c8402f1b9f2ec20d1cc152d96930959b2a (diff) | |
parent | a7394ad6807bd5e85f680184bf308673ca00534a (diff) | |
download | gem5-da2a4acc26ba264c3c4a12495776fd6a1c4fb133.tar.xz |
Merge yet again with the main repository.
Diffstat (limited to 'src')
70 files changed, 1235 insertions, 759 deletions
diff --git a/src/SConscript b/src/SConscript index 0a4bb57f4..7fb03e821 100755 --- a/src/SConscript +++ b/src/SConscript @@ -851,8 +851,8 @@ def makeEnv(label, objsfx, strip = False, **kwargs): swig_env.Append(CCFLAGS='-Wno-uninitialized') swig_env.Append(CCFLAGS='-Wno-sign-compare') swig_env.Append(CCFLAGS='-Wno-parentheses') + swig_env.Append(CCFLAGS='-Wno-unused-label') if compareVersions(env['GCC_VERSION'], '4.6.0') != -1: - swig_env.Append(CCFLAGS='-Wno-unused-label') swig_env.Append(CCFLAGS='-Wno-unused-but-set-variable') werror_env = new_env.Clone() diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index a157b414c..da2e10886 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -190,12 +190,15 @@ let {{ exec_output += PredOpExecute.subst(loadsymbolIop) initparamCode = ''' - Rt = PseudoInst::initParam(xc->tcBase()); + uint64_t ip_val = PseudoInst::initParam(xc->tcBase()); + R0 = bits(ip_val, 31, 0); + R1 = bits(ip_val, 63, 32); ''' initparamIop = InstObjParams("initparam", "Initparam", "PredOp", { "code": initparamCode, - "predicate_test": predicateTest }) + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) header_output += BasicDeclare.subst(initparamIop) decoder_output += BasicConstructor.subst(initparamIop) exec_output += PredOpExecute.subst(initparamIop) diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc index 524efa178..3076e0afe 100644 --- a/src/arch/mips/faults.cc +++ b/src/arch/mips/faults.cc @@ -29,6 +29,8 @@ * Authors: Gabe Black * Korey Sewell * Jaidev Patwardhan + * Zhengxing Li + * Deyuan Guo */ #include "arch/mips/faults.hh" @@ -118,7 +120,7 @@ MipsFaultBase::setExceptionState(ThreadContext *tc, uint8_t excCode) DPRINTF(MipsPRA, "PC: %s\n", pc); bool delay_slot = pc.pc() + sizeof(MachInst) != pc.npc(); tc->setMiscRegNoEffect(MISCREG_EPC, - pc.pc() - delay_slot ? sizeof(MachInst) : 0); + pc.pc() - (delay_slot ? sizeof(MachInst) : 0)); // Set Cause_EXCCODE field CauseReg cause = tc->readMiscReg(MISCREG_CAUSE); diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh index bce828ec1..b90c38e99 100644 --- a/src/arch/mips/faults.hh +++ b/src/arch/mips/faults.hh @@ -29,6 +29,8 @@ * Authors: Gabe Black * Korey Sewell * Jaidev Patwardhan + * Zhengxing Li + * Deyuan Guo */ #ifndef __MIPS_FAULTS_HH__ @@ -88,7 +90,7 @@ class MipsFaultBase : public FaultBase virtual FaultVect base(ThreadContext *tc) const { StatusReg status = tc->readMiscReg(MISCREG_STATUS); - if (status.bev) + if (!status.bev) return tc->readMiscReg(MISCREG_EBASE); else return 0xbfc00200; @@ -167,7 +169,7 @@ class CoprocessorUnusableFault : public MipsFault<CoprocessorUnusableFault> if (FullSystem) { CauseReg cause = tc->readMiscReg(MISCREG_CAUSE); cause.ce = coProcID; - tc->setMiscReg(MISCREG_CAUSE, cause); + tc->setMiscRegNoEffect(MISCREG_CAUSE, cause); } } }; @@ -179,7 +181,8 @@ class InterruptFault : public MipsFault<InterruptFault> offset(ThreadContext *tc) const { CauseReg cause = tc->readMiscRegNoEffect(MISCREG_CAUSE); - return cause.iv ? 0x200 : 0x000; + // offset 0x200 for release 2, 0x180 for release 1. + return cause.iv ? 0x200 : 0x180; } }; @@ -251,9 +254,10 @@ class TlbFault : public AddressFault<T> StaticInstPtr inst = StaticInst::nullStaticInstPtr) { if (FullSystem) { - DPRINTF(MipsPRA, "Fault %s encountered.\n", name()); - tc->pcState(this->vect(tc)); + DPRINTF(MipsPRA, "Fault %s encountered.\n", this->name()); + Addr vect = this->vect(tc); setTlbExceptionState(tc, this->code()); + tc->pcState(vect); } else { AddressFault<T>::invoke(tc, inst); } diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 193f050de..034133f96 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -1253,7 +1253,7 @@ decode OPCODE_HI default Unknown::unknown() { //When rs=L1 //Note: "1. Format type L is legal only if 64-bit //floating point operations are enabled." - 0x5: decode FUNCTION_HI { + 0x5: decode FUNCTION { format FloatConvertOp { 0x20: cvt_s_l({{ val = Fs_ud; }}, ToSingle); 0x21: cvt_d_l({{ val = Fs_ud; }}, ToDouble); diff --git a/src/arch/mips/linux/process.cc b/src/arch/mips/linux/process.cc index 156d4ea05..0982e05cb 100644 --- a/src/arch/mips/linux/process.cc +++ b/src/arch/mips/linux/process.cc @@ -55,7 +55,7 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process, strcpy(name->sysname, "Linux"); strcpy(name->nodename,"m5.eecs.umich.edu"); - strcpy(name->release, "2.4.20"); + strcpy(name->release, "2.6.35"); strcpy(name->version, "#1 Mon Aug 18 11:32:15 EDT 2003"); strcpy(name->machine, "mips"); diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index dce7858bf..d3cf1650d 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -55,7 +55,7 @@ const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; //HI & LO Regs const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;// const uint32_t MIPS32_QNAN = 0x7fbfffff; -const uint64_t MIPS64_QNAN = ULL(0x7fbfffffffffffff); +const uint64_t MIPS64_QNAN = ULL(0x7ff7ffffffffffff); enum FPControlRegNums { FLOATREG_FIR = NumFloatArchRegs, diff --git a/src/arch/mips/tlb.cc b/src/arch/mips/tlb.cc index 057fb5e76..d28ef8231 100644 --- a/src/arch/mips/tlb.cc +++ b/src/arch/mips/tlb.cc @@ -29,6 +29,8 @@ * Authors: Nathan Binkert * Steve Reinhardt * Jaidev Patwardhan + * Zhengxing Li + * Deyuan Guo */ #include <string> @@ -310,18 +312,6 @@ Fault TLB::translateData(RequestPtr req, ThreadContext *tc, bool write) { if (!FullSystem) { - //@TODO: This should actually use TLB instead of going directly - // to the page table in syscall mode. - /** - * Check for alignment faults - */ - if (req->getVaddr() & (req->getSize() - 1)) { - DPRINTF(TLB, "Alignment Fault on %#x, size = %d", req->getVaddr(), - req->getSize()); - return new AddressErrorFault(req->getVaddr(), write); - } - - Process * p = tc->getProcessPtr(); Fault fault = p->pTable->translate(req); diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py index c034f8a48..0465b3447 100644 --- a/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py +++ b/src/arch/x86/isa/insts/general_purpose/input_output/general_io.py @@ -42,26 +42,34 @@ microcode = ''' def macroop IN_R_I { .adjust_imm trimImm(8) limm t1, imm, dataSize=asz + mfence ld reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True + mfence }; def macroop IN_R_R { zexti t2, regm, 15, dataSize=8 + mfence ld reg, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True + mfence }; def macroop OUT_I_R { .adjust_imm trimImm(8) limm t1, imm, dataSize=8 + mfence st reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True + mfence }; def macroop OUT_R_R { zexti t2, reg, 15, dataSize=8 + mfence st regm, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True + mfence }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py index 3c90ee7e7..044e57edc 100644 --- a/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py +++ b/src/arch/x86/isa/insts/general_purpose/input_output/string_io.py @@ -45,9 +45,11 @@ def macroop INS_M_R { zexti t2, reg, 15, dataSize=8 + mfence ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True st t6, es, [1, t0, rdi] + mfence add rdi, rdi, t3, dataSize=asz }; @@ -63,6 +65,7 @@ def macroop INS_E_M_R { zexti t2, reg, 15, dataSize=8 + mfence topOfLoop: ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True @@ -72,6 +75,7 @@ topOfLoop: add rdi, rdi, t3, dataSize=asz br label("topOfLoop"), flags=(nCEZF,) end: + mfence fault "NoFault" }; @@ -84,9 +88,11 @@ def macroop OUTS_R_M { zexti t2, reg, 15, dataSize=8 + mfence ld t6, ds, [1, t0, rsi] st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \ nonSpec=True + mfence add rsi, rsi, t3, dataSize=asz }; @@ -102,6 +108,7 @@ def macroop OUTS_E_R_M { zexti t2, reg, 15, dataSize=8 + mfence topOfLoop: ld t6, ds, [1, t0, rsi] st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \ @@ -111,6 +118,7 @@ topOfLoop: add rsi, rsi, t3, dataSize=asz br label("topOfLoop"), flags=(nCEZF,) end: + mfence fault "NoFault" }; ''' diff --git a/src/base/hostinfo.cc b/src/base/hostinfo.cc index 5ff34e603..857ccfa7f 100644 --- a/src/base/hostinfo.cc +++ b/src/base/hostinfo.cc @@ -30,6 +30,12 @@ #include <unistd.h> +#ifdef __APPLE__ +#include <mach/mach_init.h> +#include <mach/shared_region.h> +#include <mach/task.h> +#endif + #include <cctype> #include <cerrno> #include <cmath> @@ -82,7 +88,31 @@ procInfo(const char *filename, const char *target) } if (fp) - fclose(fp); + fclose(fp); return 0; } + +uint64_t +memUsage() +{ +// For the Mach-based Darwin kernel, use the task_info of the self task +#ifdef __APPLE__ + struct task_basic_info t_info; + mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; + + if (KERN_SUCCESS != task_info(mach_task_self(), + TASK_BASIC_INFO, (task_info_t)&t_info, + &t_info_count)) { + return 0; + } + + // Mimic Darwin's implementation of top and subtract + // SHARED_REGION_SIZE from the tasks virtual size to account for the + // shared memory submap that is incorporated into every process. + return (t_info.virtual_size - SHARED_REGION_SIZE) / 1024; +#else + // Linux implementation + return procInfo("/proc/self/status", "VmSize:"); +#endif +} diff --git a/src/base/hostinfo.hh b/src/base/hostinfo.hh index ac7d40f13..d9a30481a 100644 --- a/src/base/hostinfo.hh +++ b/src/base/hostinfo.hh @@ -39,7 +39,11 @@ std::string &hostname(); uint64_t procInfo(const char *filename, const char *target); -inline uint64_t memUsage() -{ return procInfo("/proc/self/status", "VmSize:"); } +/** + * Determine the simulator process' total virtual memory usage. + * + * @return virtual memory usage in kilobytes + */ +uint64_t memUsage(); #endif // __HOSTINFO_HH__ diff --git a/src/base/random.cc b/src/base/random.cc index 457b0c98b..cffeddec9 100644 --- a/src/base/random.cc +++ b/src/base/random.cc @@ -29,6 +29,7 @@ * Ali Saidi */ +#include <limits> #include "base/fenv.hh" #include "base/intmath.hh" #include "base/misc.hh" @@ -67,7 +68,10 @@ Random::genrand(uint32_t max) { if (max == 0) return 0; - int log = ceilLog2(max) + 1; + if (max == std::numeric_limits<uint32_t>::max()) + return genrand(); + + int log = ceilLog2(max + 1); int shift = (sizeof(uint32_t) * 8 - log); uint32_t random; @@ -83,7 +87,10 @@ Random::genrand(uint64_t max) { if (max == 0) return 0; - int log = ceilLog2(max) + 1; + if (max == std::numeric_limits<uint64_t>::max()) + return genrand(); + + int log = ceilLog2(max + 1); int shift = (sizeof(uint64_t) * 8 - log); uint64_t random; diff --git a/src/base/statistics.hh b/src/base/statistics.hh index d98c79414..1f8a59326 100644 --- a/src/base/statistics.hh +++ b/src/base/statistics.hh @@ -1477,6 +1477,8 @@ class HistStor /** The current sum. */ Counter sum; + /** The sum of logarithm of each sample, used to compute geometric mean. */ + Counter logs; /** The sum of squares. */ Counter squares; /** The number of samples. */ @@ -1528,6 +1530,7 @@ class HistStor sum += val * number; squares += val * val * number; + logs += log(val) * number; samples += number; } @@ -1567,6 +1570,7 @@ class HistStor data.cvec[i] = cvec[i]; data.sum = sum; + data.logs = logs; data.squares = squares; data.samples = samples; } @@ -1589,6 +1593,7 @@ class HistStor sum = Counter(); squares = Counter(); samples = Counter(); + logs = Counter(); } }; diff --git a/src/base/stats/info.hh b/src/base/stats/info.hh index 2c5b44a38..98e811747 100644 --- a/src/base/stats/info.hh +++ b/src/base/stats/info.hh @@ -183,6 +183,7 @@ struct DistData VCounter cvec; Counter sum; Counter squares; + Counter logs; Counter samples; }; diff --git a/src/base/stats/text.cc b/src/base/stats/text.cc index 683ba7fe4..8fb49dc59 100644 --- a/src/base/stats/text.cc +++ b/src/base/stats/text.cc @@ -367,6 +367,12 @@ DistPrint::operator()(ostream &stream) const print.value = data.samples ? data.sum / data.samples : NAN; print(stream); + if (data.type == Hist) { + print.name = base + "gmean"; + print.value = data.samples ? exp(data.logs / data.samples) : NAN; + print(stream); + } + Result stdev = NAN; if (data.samples) stdev = sqrt((data.samples * data.squares - data.sum * data.sum) / @@ -507,7 +513,14 @@ Text::visit(const Vector2dInfo &info) bool havesub = false; VectorPrint print; - print.subnames = info.y_subnames; + if (!info.y_subnames.empty()) { + for (off_type i = 0; i < info.y; ++i) { + if (!info.y_subnames[i].empty()) { + print.subnames = info.y_subnames; + } + break; + } + } print.flags = info.flags; print.separatorString = info.separatorString; print.descriptions = descriptions; diff --git a/src/cpu/base.hh b/src/cpu/base.hh index cf647daaa..a4ffb4716 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -284,17 +284,16 @@ class BaseCPU : public MemObject void enableFunctionTrace(); void traceFunctionsInternal(Addr pc); - protected: + private: + static std::vector<BaseCPU *> cpuList; //!< Static global cpu list + + public: void traceFunctions(Addr pc) { if (functionTracingEnabled) traceFunctionsInternal(pc); } - private: - static std::vector<BaseCPU *> cpuList; //!< Static global cpu list - - public: static int numSimulatedCPUs() { return cpuList.size(); } static Counter numSimulatedInstructions() { diff --git a/src/cpu/inorder/resources/mult_div_unit.cc b/src/cpu/inorder/resources/mult_div_unit.cc index 0ff05252c..ab0081787 100644 --- a/src/cpu/inorder/resources/mult_div_unit.cc +++ b/src/cpu/inorder/resources/mult_div_unit.cc @@ -299,6 +299,7 @@ MultDivUnit::exeMulDiv(int slot_num) } mult_div_req->setProcessing(false); + cpu->wakeCPU(); } void diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c481d7b9c..b6a4c0387 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -51,6 +51,7 @@ #include "config/use_checker.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/thread_state.hh" +#include "cpu/base.hh" #include "cpu/exetrace.hh" #include "cpu/timebuf.hh" #include "debug/Activity.hh" @@ -987,6 +988,8 @@ DefaultCommit<Impl>::commitInsts() // Updates misc. registers. head_inst->updateMiscRegs(); + cpu->traceFunctions(pc[tid].instAddr()); + TheISA::advancePC(pc[tid], head_inst->staticInst); // Keep track of the last sequence number commited diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 3def971e9..985e92826 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -446,10 +446,6 @@ void DefaultDecode<Impl>::sortInsts() { int insts_from_fetch = fromFetch->size; -#ifdef DEBUG - for (ThreadID tid = 0; tid < numThreads; tid++) - assert(insts[tid].empty()); -#endif for (int i = 0; i < insts_from_fetch; ++i) { insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]); } diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 9c4b1068d..92c8875e4 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1340,10 +1340,10 @@ DefaultIEW<Impl>::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); - DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n", - inst->predInstAddr(), inst->predNextInstAddr()); + DPRINTF(IEW, "Predicted target was PC: %s.\n", + inst->readPredTarg()); DPRINTF(IEW, "Execute: Redirecting fetch to PC: %s.\n", - inst->pcState(), inst->nextInstAddr()); + inst->pcState()); // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst, tid); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 4106bbef9..04935604e 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -766,10 +766,6 @@ void DefaultRename<Impl>::sortInsts() { int insts_from_decode = fromDecode->size; -#ifdef DEBUG - for (ThreadID tid = 0; tid < numThreads; tid++) - assert(insts[tid].empty()); -#endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; insts[inst->threadNumber].push_back(inst); diff --git a/src/cpu/pc_event.cc b/src/cpu/pc_event.cc index a07d787f0..9cf63f7d3 100644 --- a/src/cpu/pc_event.cc +++ b/src/cpu/pc_event.cc @@ -83,7 +83,9 @@ PCEventQueue::schedule(PCEvent *event) bool PCEventQueue::doService(ThreadContext *tc) { - Addr pc = tc->instAddr() & ~0x3; + // This will fail to break on Alpha PALcode addresses, but that is + // a rare use case. + Addr pc = tc->instAddr(); int serviced = 0; range_t range = equal_range(pc); for (iterator i = range.first; i != range.second; ++i) { @@ -91,7 +93,7 @@ PCEventQueue::doService(ThreadContext *tc) // another event. This for example, prevents two invocations // of the SkipFuncEvent. Maybe we should have separate PC // event queues for each processor? - if (pc != (tc->instAddr() & ~0x3)) + if (pc != tc->instAddr()) continue; DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n", diff --git a/src/cpu/testers/directedtest/DirectedGenerator.hh b/src/cpu/testers/directedtest/DirectedGenerator.hh index 904dcf399..c156efff0 100644 --- a/src/cpu/testers/directedtest/DirectedGenerator.hh +++ b/src/cpu/testers/directedtest/DirectedGenerator.hh @@ -43,7 +43,7 @@ class DirectedGenerator : public SimObject virtual ~DirectedGenerator() {} virtual bool initiate() = 0; - virtual void performCallback(uint proc, Addr address) = 0; + virtual void performCallback(uint32_t proc, Addr address) = 0; void setDirectedTester(RubyDirectedTester* directed_tester); diff --git a/src/cpu/testers/directedtest/InvalidateGenerator.cc b/src/cpu/testers/directedtest/InvalidateGenerator.cc index 902c6cc15..4d8271a05 100644 --- a/src/cpu/testers/directedtest/InvalidateGenerator.cc +++ b/src/cpu/testers/directedtest/InvalidateGenerator.cc @@ -103,7 +103,7 @@ InvalidateGenerator::initiate() } void -InvalidateGenerator::performCallback(uint proc, Addr address) +InvalidateGenerator::performCallback(uint32_t proc, Addr address) { assert(m_address == address); diff --git a/src/cpu/testers/directedtest/InvalidateGenerator.hh b/src/cpu/testers/directedtest/InvalidateGenerator.hh index 14c47b70b..50db180e3 100644 --- a/src/cpu/testers/directedtest/InvalidateGenerator.hh +++ b/src/cpu/testers/directedtest/InvalidateGenerator.hh @@ -49,14 +49,14 @@ class InvalidateGenerator : public DirectedGenerator ~InvalidateGenerator(); bool initiate(); - void performCallback(uint proc, Addr address); + void performCallback(uint32_t proc, Addr address); private: InvalidateGeneratorStatus m_status; Addr m_address; - uint m_active_read_node; - uint m_active_inv_node; - uint m_addr_increment_size; + uint32_t m_active_read_node; + uint32_t m_active_inv_node; + uint32_t m_addr_increment_size; }; #endif //__CPU_DIRECTEDTEST_INVALIDATEGENERATOR_HH__ diff --git a/src/cpu/testers/directedtest/RubyDirectedTester.hh b/src/cpu/testers/directedtest/RubyDirectedTester.hh index 163c206d8..53c389692 100644 --- a/src/cpu/testers/directedtest/RubyDirectedTester.hh +++ b/src/cpu/testers/directedtest/RubyDirectedTester.hh @@ -53,11 +53,11 @@ class RubyDirectedTester : public MemObject RubyDirectedTester *tester; public: - CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint _idx) + CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint32_t _idx) : SimpleTimingPort(_name, _tester), tester(_tester), idx(_idx) {} - uint idx; + uint32_t idx; protected: virtual bool recvTiming(PacketPtr pkt); diff --git a/src/cpu/testers/directedtest/SeriesRequestGenerator.cc b/src/cpu/testers/directedtest/SeriesRequestGenerator.cc index 43e140178..4cf9aed1c 100644 --- a/src/cpu/testers/directedtest/SeriesRequestGenerator.cc +++ b/src/cpu/testers/directedtest/SeriesRequestGenerator.cc @@ -89,7 +89,7 @@ SeriesRequestGenerator::initiate() } void -SeriesRequestGenerator::performCallback(uint proc, Addr address) +SeriesRequestGenerator::performCallback(uint32_t proc, Addr address) { assert(m_active_node == proc); assert(m_address == address); diff --git a/src/cpu/testers/directedtest/SeriesRequestGenerator.hh b/src/cpu/testers/directedtest/SeriesRequestGenerator.hh index 97b632a12..9b1c3e8ba 100644 --- a/src/cpu/testers/directedtest/SeriesRequestGenerator.hh +++ b/src/cpu/testers/directedtest/SeriesRequestGenerator.hh @@ -49,13 +49,13 @@ class SeriesRequestGenerator : public DirectedGenerator ~SeriesRequestGenerator(); bool initiate(); - void performCallback(uint proc, Addr address); + void performCallback(uint32_t proc, Addr address); private: SeriesRequestGeneratorStatus m_status; Addr m_address; - uint m_active_node; - uint m_addr_increment_size; + uint32_t m_active_node; + uint32_t m_addr_increment_size; bool m_issue_writes; }; diff --git a/src/dev/arm/pl111.cc b/src/dev/arm/pl111.cc index e13045338..958f07aa7 100644 --- a/src/dev/arm/pl111.cc +++ b/src/dev/arm/pl111.cc @@ -67,7 +67,7 @@ Pl111::Pl111(const Params *p) { pioSize = 0xFFFF; - pic = simout.create("framebuffer.bmp", true); + pic = simout.create(csprintf("%s.framebuffer.bmp", sys->name()), true); dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)]; diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index f124767ca..0ad53f09e 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -71,7 +71,7 @@ void PioDevice::init() { if (!pioPort) - panic("Pio port %s not connected to anything!", name()); + panic("Pio port of %s not connected to anything!", name()); pioPort->sendStatusChange(Port::RangeChange); } diff --git a/src/mem/SConscript b/src/mem/SConscript index 2aa7d0323..8418a4f51 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -59,6 +59,7 @@ DebugFlag('MemoryAccess') DebugFlag('ProtocolTrace') DebugFlag('RubyCache') +DebugFlag('RubyCacheTrace') DebugFlag('RubyDma') DebugFlag('RubyGenerated') DebugFlag('RubyMemory') @@ -67,9 +68,9 @@ DebugFlag('RubyPort') DebugFlag('RubyQueue') DebugFlag('RubySequencer') DebugFlag('RubySlicc') -DebugFlag('RubyStorebuffer') +DebugFlag('RubySystem') DebugFlag('RubyTester') CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester', - 'RubyGenerated', 'RubySlicc', 'RubyStorebuffer', 'RubyCache', - 'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer']) + 'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache', + 'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer', 'RubyCacheTrace']) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 69b14547e..db71b86b7 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -447,13 +447,6 @@ Bus::recvAtomic(PacketPtr pkt) void Bus::recvFunctional(PacketPtr pkt) { - if (!pkt->isPrint()) { - // don't do DPRINTFs on PrintReq as it clutters up the output - DPRINTF(Bus, - "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", - pkt->getSrc(), pkt->getDest(), pkt->getAddr(), - pkt->cmdString()); - } assert(pkt->getDest() == Packet::Broadcast); int port_id = findPort(pkt->getAddr()); @@ -462,6 +455,14 @@ Bus::recvFunctional(PacketPtr pkt) // id after each int src_id = pkt->getSrc(); + if (!pkt->isPrint()) { + // don't do DPRINTFs on PrintReq as it clutters up the output + DPRINTF(Bus, + "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", + src_id, port_id, pkt->getAddr(), + pkt->cmdString()); + } + assert(pkt->isRequest()); // hasn't already been satisfied SnoopIter s_end = snoopPorts.end(); diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 5ec977ed4..64f4fcd14 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * Copyright (c) 2010 Advanced Micro Devices, Inc. * All rights reserved. @@ -192,14 +204,98 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data) memcpy(getPtr<uint8_t>(), data + offset, getSize()); return true; } else { - // In this case the timing packet only partially satisfies - // the request, so we would need more information to make - // this work. Like bytes valid in the packet or - // something, so the request could continue and get this - // bit of possibly newer data along with the older data - // not written to yet. - panic("Memory value only partially satisfies the functional " - "request. Now what?"); + // Offsets and sizes to copy in case of partial overlap + int func_offset; + int val_offset; + int overlap_size; + + // calculate offsets and copy sizes for the two byte arrays + if (val_start < func_start && val_end <= func_end) { + val_offset = func_start - val_start; + func_offset = 0; + overlap_size = val_end - func_start; + } else if (val_start >= func_start && val_end > func_end) { + val_offset = 0; + func_offset = val_start - func_start; + overlap_size = func_end - val_start; + } else if (val_start >= func_start && val_end <= func_end) { + val_offset = 0; + func_offset = val_start - func_start; + overlap_size = size; + } else { + panic("BUG: Missed a case for a partial functional request"); + } + + // Figure out how much of the partial overlap should be copied + // into the packet and not overwrite previously found bytes. + if (bytesValidStart == 0 && bytesValidEnd == 0) { + // No bytes have been copied yet, just set indices + // to found range + bytesValidStart = func_offset; + bytesValidEnd = func_offset + overlap_size; + } else { + // Some bytes have already been copied. Use bytesValid + // indices and offset values to figure out how much data + // to copy and where to copy it to. + + // Indice overlap conditions to check + int a = func_offset - bytesValidStart; + int b = (func_offset + overlap_size) - bytesValidEnd; + int c = func_offset - bytesValidEnd; + int d = (func_offset + overlap_size) - bytesValidStart; + + if (a >= 0 && b <= 0) { + // bytes already in pkt data array are superset of + // found bytes, will not copy any bytes + overlap_size = 0; + } else if (a < 0 && d >= 0 && b <= 0) { + // found bytes will move bytesValidStart towards 0 + overlap_size = bytesValidStart - func_offset; + bytesValidStart = func_offset; + } else if (b > 0 && c <= 0 && a >= 0) { + // found bytes will move bytesValidEnd + // towards end of pkt data array + overlap_size = + (func_offset + overlap_size) - bytesValidEnd; + val_offset += bytesValidEnd - func_offset; + func_offset = bytesValidEnd; + bytesValidEnd += overlap_size; + } else if (a < 0 && b > 0) { + // Found bytes are superset of copied range. Will move + // bytesValidStart towards 0 and bytesValidEnd towards + // end of pkt data array. Need to break copy into two + // pieces so as to not overwrite previously found data. + + // copy the first half + uint8_t *dest = getPtr<uint8_t>() + func_offset; + uint8_t *src = data + val_offset; + memcpy(dest, src, (bytesValidStart - func_offset)); + + // re-calc the offsets and indices to do the copy + // required for the second half + val_offset += (bytesValidEnd - func_offset); + bytesValidStart = func_offset; + overlap_size = + (func_offset + overlap_size) - bytesValidEnd; + func_offset = bytesValidEnd; + bytesValidEnd += overlap_size; + } else if ((c > 0 && b > 0) + || (a < 0 && d < 0)) { + // region to be copied is discontiguous! Not supported. + panic("BUG: Discontiguous bytes found" + "for functional copying!"); + } + } + assert(bytesValidEnd <= getSize()); + + // copy partial data into the packet's data array + uint8_t *dest = getPtr<uint8_t>() + func_offset; + uint8_t *src = data + val_offset; + memcpy(dest, src, overlap_size); + + // check if we're done filling the functional access + bool done = (bytesValidStart == 0) && (bytesValidEnd == getSize()); + return done; } } else if (isWrite()) { if (offset >= 0) { diff --git a/src/mem/packet.hh b/src/mem/packet.hh index be0c20d42..6347c21ea 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -299,6 +299,13 @@ class Packet : public FastAlloc, public Printable */ MemCmd origCmd; + /** + * These values specify the range of bytes found that satisfy a + * functional read. + */ + uint16_t bytesValidStart; + uint16_t bytesValidEnd; + public: /// Used to calculate latencies for each packet. Tick time; @@ -507,7 +514,8 @@ class Packet : public FastAlloc, public Printable */ Packet(Request *_req, MemCmd _cmd, NodeID _dest) : flags(VALID_DST), cmd(_cmd), req(_req), data(NULL), - dest(_dest), time(curTick()), senderState(NULL) + dest(_dest), bytesValidStart(0), bytesValidEnd(0), + time(curTick()), senderState(NULL) { if (req->hasPaddr()) { addr = req->getPaddr(); @@ -526,7 +534,8 @@ class Packet : public FastAlloc, public Printable */ Packet(Request *_req, MemCmd _cmd, NodeID _dest, int _blkSize) : flags(VALID_DST), cmd(_cmd), req(_req), data(NULL), - dest(_dest), time(curTick()), senderState(NULL) + dest(_dest), bytesValidStart(0), bytesValidEnd(0), + time(curTick()), senderState(NULL) { if (req->hasPaddr()) { addr = req->getPaddr() & ~(_blkSize - 1); @@ -547,6 +556,7 @@ class Packet : public FastAlloc, public Printable : cmd(pkt->cmd), req(pkt->req), data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL), addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest), + bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd), time(curTick()), senderState(pkt->senderState) { if (!clearFlags) @@ -554,6 +564,7 @@ class Packet : public FastAlloc, public Printable flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE|VALID_SRC|VALID_DST)); flags.set(pkt->flags & STATIC_DATA); + } /** diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index b9d355736..ce16a8777 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -1285,7 +1285,6 @@ machine(L1Cache, "AMD Hammer-like protocol") vv_allocateL2CacheBlock; hp_copyFromTBEToL2; s_deallocateTBE; - ka_wakeUpAllDependents; } transition(I, Trigger_L2_to_L1D, IT) { @@ -1566,7 +1565,7 @@ machine(L1Cache, "AMD Hammer-like protocol") k_popMandatoryQueue; } - transition({MM, M, MMR}, Flush_line, MM_F) { + transition({MM, M, MMR, MR}, Flush_line, MM_F) { i_allocateTBE; bf_issueGETF; p_decrementNumberOfMessagesByOne; diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc index cab98cee9..9a7fdb61b 100644 --- a/src/mem/ruby/buffers/MessageBuffer.cc +++ b/src/mem/ruby/buffers/MessageBuffer.cc @@ -198,7 +198,11 @@ MessageBuffer::enqueue(MsgPtr message, Time delta) m_last_arrival_time * g_eventQueue_ptr->getClock()); } } - m_last_arrival_time = arrival_time; + + // If running a cache trace, don't worry about the last arrival checks + if (!g_system_ptr->m_warmup_enabled) { + m_last_arrival_time = arrival_time; + } // compute the delay cycles and set enqueue time Message* msg_ptr = message.get(); diff --git a/src/mem/ruby/eventqueue/RubyEventQueue.hh b/src/mem/ruby/eventqueue/RubyEventQueue.hh index 20b44362a..67fe6131b 100644 --- a/src/mem/ruby/eventqueue/RubyEventQueue.hh +++ b/src/mem/ruby/eventqueue/RubyEventQueue.hh @@ -58,7 +58,6 @@ #include <iostream> -#include "config/no_vector_bounds_checks.hh" #include "mem/ruby/common/TypeDefines.hh" #include "sim/eventq.hh" @@ -77,9 +76,6 @@ class RubyEventQueue : public EventManager void scheduleEventAbsolute(Consumer* consumer, Time timeAbs); void print(std::ostream& out) const; - void triggerEvents(Time t) { assert(0); } - void triggerAllEvents() { assert(0); } - private: // Private copy constructor and assignment operator RubyEventQueue(const RubyEventQueue& obj); diff --git a/src/mem/ruby/recorder/CacheRecorder.cc b/src/mem/ruby/recorder/CacheRecorder.cc index fc6ad0975..8b724859e 100644 --- a/src/mem/ruby/recorder/CacheRecorder.cc +++ b/src/mem/ruby/recorder/CacheRecorder.cc @@ -1,5 +1,6 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood + * Copyright (c) 2010 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,43 +27,154 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <algorithm> - -#include "mem/ruby/eventqueue/RubyEventQueue.hh" +#include "debug/RubyCacheTrace.hh" #include "mem/ruby/recorder/CacheRecorder.hh" -#include "gzstream.hh" +#include "mem/ruby/system/Sequencer.hh" +#include "mem/ruby/system/System.hh" using namespace std; void -CacheRecorder::addRecord(Sequencer* sequencer, const Address& data_addr, - const Address& pc_addr, RubyRequestType type, Time time) +TraceRecord::print(ostream& out) const +{ + out << "[TraceRecord: Node, " << m_cntrl_id << ", " + << m_data_address << ", " << m_pc_address << ", " + << m_type << ", Time: " << m_time << "]"; +} + +CacheRecorder::CacheRecorder() + : m_uncompressed_trace(NULL), + m_uncompressed_trace_size(0) { - TraceRecord rec(sequencer, data_addr, pc_addr, type, time); - m_records.push_back(rec); } -int -CacheRecorder::dumpRecords(string filename) +CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace, + uint64_t uncompressed_trace_size, + std::vector<Sequencer*>& seq_map) + : m_uncompressed_trace(uncompressed_trace), + m_uncompressed_trace_size(uncompressed_trace_size), + m_seq_map(seq_map), m_bytes_read(0), m_records_read(0), + m_records_flushed(0) { - ogzstream out(filename.c_str()); - if (out.fail()) { - cout << "Error: error opening file '" << filename << "'" << endl; - return 0; +} + +CacheRecorder::~CacheRecorder() +{ + if (m_uncompressed_trace != NULL) { + delete m_uncompressed_trace; + m_uncompressed_trace = NULL; } + m_seq_map.clear(); +} - std::sort(m_records.begin(), m_records.end(), greater<TraceRecord>()); +void +CacheRecorder::enqueueNextFlushRequest() +{ + if (m_records_flushed < m_records.size()) { + TraceRecord* rec = m_records[m_records_flushed]; + m_records_flushed++; + Request* req = new Request(rec->m_data_address, + RubySystem::getBlockSizeBytes(),0); + MemCmd::Command requestType = MemCmd::FlushReq; + Packet *pkt = new Packet(req, requestType, -1); - int size = m_records.size(); - for (int i = 0; i < size; ++i) - m_records[i].output(out); + Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id]; + assert(m_sequencer_ptr != NULL); + m_sequencer_ptr->makeRequest(pkt); - m_records.clear(); + DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec); + } +} + +void +CacheRecorder::enqueueNextFetchRequest() +{ + if (m_bytes_read < m_uncompressed_trace_size) { + TraceRecord* traceRecord = (TraceRecord*) (m_uncompressed_trace + + m_bytes_read); - return size; + DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord); + Request* req = new Request(); + MemCmd::Command requestType; + + if (traceRecord->m_type == RubyRequestType_LD) { + requestType = MemCmd::ReadReq; + req->setPhys(traceRecord->m_data_address, + RubySystem::getBlockSizeBytes(),0); + } else if (traceRecord->m_type == RubyRequestType_IFETCH) { + requestType = MemCmd::ReadReq; + req->setPhys(traceRecord->m_data_address, + RubySystem::getBlockSizeBytes(), + Request::INST_FETCH); + } else { + requestType = MemCmd::WriteReq; + req->setPhys(traceRecord->m_data_address, + RubySystem::getBlockSizeBytes(),0); + } + + Packet *pkt = new Packet(req, requestType, -1); + pkt->dataStatic(traceRecord->m_data); + + Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id]; + assert(m_sequencer_ptr != NULL); + m_sequencer_ptr->makeRequest(pkt); + + m_bytes_read += (sizeof(TraceRecord) + + RubySystem::getBlockSizeBytes()); + m_records_read++; + } } void -CacheRecorder::print(ostream& out) const +CacheRecorder::addRecord(int cntrl, const physical_address_t data_addr, + const physical_address_t pc_addr, + RubyRequestType type, Time time, DataBlock& data) +{ + TraceRecord* rec = (TraceRecord*)malloc(sizeof(TraceRecord) + + RubySystem::getBlockSizeBytes()); + rec->m_cntrl_id = cntrl; + rec->m_time = time; + rec->m_data_address = data_addr; + rec->m_pc_address = pc_addr; + rec->m_type = type; + memcpy(rec->m_data, data.getData(0, RubySystem::getBlockSizeBytes()), + RubySystem::getBlockSizeBytes()); + + m_records.push_back(rec); +} + +uint64 +CacheRecorder::aggregateRecords(uint8_t** buf, uint64 total_size) { + std::sort(m_records.begin(), m_records.end(), compareTraceRecords); + + int size = m_records.size(); + uint64 current_size = 0; + int record_size = sizeof(TraceRecord) + RubySystem::getBlockSizeBytes(); + + for (int i = 0; i < size; ++i) { + // Determine if we need to expand the buffer size + if (current_size + record_size > total_size) { + uint8_t* new_buf = new (nothrow) uint8_t[total_size * 2]; + if (new_buf == NULL) { + fatal("Unable to allocate buffer of size %s\n", + total_size * 2); + } + total_size = total_size * 2; + uint8_t* old_buf = *buf; + memcpy(new_buf, old_buf, current_size); + *buf = new_buf; + delete [] old_buf; + } + + // Copy the current record into the buffer + memcpy(&((*buf)[current_size]), m_records[i], record_size); + current_size += record_size; + + free(m_records[i]); + m_records[i] = NULL; + } + + m_records.clear(); + return current_size; } diff --git a/src/mem/ruby/recorder/CacheRecorder.hh b/src/mem/ruby/recorder/CacheRecorder.hh index 9f96f4fa0..839c4f6b1 100644 --- a/src/mem/ruby/recorder/CacheRecorder.hh +++ b/src/mem/ruby/recorder/CacheRecorder.hh @@ -1,5 +1,6 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood + * Copyright (c) 2010 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,37 +35,90 @@ #ifndef __MEM_RUBY_RECORDER_CACHERECORDER_HH__ #define __MEM_RUBY_RECORDER_CACHERECORDER_HH__ -#include <iostream> -#include <string> #include <vector> +#include "base/hashmap.hh" #include "mem/protocol/RubyRequestType.hh" -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/recorder/TraceRecord.hh" +#include "mem/ruby/common/Address.hh" +#include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/TypeDefines.hh" -class Address; -class TraceRecord; class Sequencer; +/*! + * Class for recording cache contents. Note that the last element of the + * class is an array of length zero. It is used for creating variable + * length object, so that while writing the data to a file one does not + * need to copy the meta data and the actual data separately. + */ +class TraceRecord { + public: + int m_cntrl_id; + Time m_time; + physical_address_t m_data_address; + physical_address_t m_pc_address; + RubyRequestType m_type; + uint8_t m_data[0]; + + void print(std::ostream& out) const; +}; + class CacheRecorder { public: - void addRecord(Sequencer* sequencer, const Address& data_addr, - const Address& pc_addr, RubyRequestType type, Time time); - int dumpRecords(std::string filename); + CacheRecorder(); + ~CacheRecorder(); - void print(std::ostream& out) const; + CacheRecorder(uint8_t* uncompressed_trace, + uint64_t uncompressed_trace_size, + std::vector<Sequencer*>& SequencerMap); + void addRecord(int cntrl, const physical_address_t data_addr, + const physical_address_t pc_addr, RubyRequestType type, + Time time, DataBlock& data); + + uint64 aggregateRecords(uint8_t** data, uint64 size); + + /*! + * Function for flushing the memory contents of the caches to the + * main memory. It goes through the recorded contents of the caches, + * and issues flush requests. Except for the first one, a flush request + * is issued only after the previous one has completed. This currently + * requires use of MOESI Hammer protocol since only that protocol + * supports flush requests. + */ + void enqueueNextFlushRequest(); + + /*! + * Function for fetching warming up the memory and the caches. It goes + * through the recorded contents of the caches, as available in the + * checkpoint and issues fetch requests. Except for the first one, a + * fetch request is issued only after the previous one has completed. + * It should be possible to use this with any protocol. + */ + void enqueueNextFetchRequest(); private: // Private copy constructor and assignment operator CacheRecorder(const CacheRecorder& obj); CacheRecorder& operator=(const CacheRecorder& obj); - std::vector<TraceRecord> m_records; + std::vector<TraceRecord*> m_records; + uint8_t* m_uncompressed_trace; + uint64_t m_uncompressed_trace_size; + std::vector<Sequencer*> m_seq_map; + uint64_t m_bytes_read; + uint64_t m_records_read; + uint64_t m_records_flushed; }; +inline bool +compareTraceRecords(const TraceRecord* n1, const TraceRecord* n2) +{ + return n1->m_time > n2->m_time; +} + inline std::ostream& -operator<<(std::ostream& out, const CacheRecorder& obj) +operator<<(std::ostream& out, const TraceRecord& obj) { obj.print(out); out << std::flush; diff --git a/src/mem/ruby/recorder/SConscript b/src/mem/ruby/recorder/SConscript index 035f896a4..e1b3d78b7 100644 --- a/src/mem/ruby/recorder/SConscript +++ b/src/mem/ruby/recorder/SConscript @@ -33,8 +33,4 @@ Import('*') if env['PROTOCOL'] == 'None': Return() -SimObject('Tracer.py') - Source('CacheRecorder.cc') -Source('Tracer.cc') -Source('TraceRecord.cc', Werror=False) diff --git a/src/mem/ruby/recorder/TraceRecord.cc b/src/mem/ruby/recorder/TraceRecord.cc deleted file mode 100644 index 79186d33b..000000000 --- a/src/mem/ruby/recorder/TraceRecord.cc +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mem/protocol/RubyRequest.hh" -#include "mem/ruby/recorder/TraceRecord.hh" -#include "mem/ruby/system/Sequencer.hh" -#include "mem/ruby/system/System.hh" -#include "sim/sim_object.hh" - -using namespace std; - -TraceRecord::TraceRecord(Sequencer* _sequencer, const Address& data_addr, - const Address& pc_addr, RubyRequestType type, Time time) -{ - m_sequencer_ptr = _sequencer; - m_data_address = data_addr; - m_pc_address = pc_addr; - m_time = time; - m_type = type; - - // Don't differentiate between store misses and atomic requests in - // the trace - if (m_type == RubyRequestType_Load_Linked) { - m_type = RubyRequestType_ST; - } else if (m_type == RubyRequestType_Store_Conditional) { - m_type = RubyRequestType_ST; - } -} - -TraceRecord::TraceRecord(const TraceRecord& obj) -{ - // Call assignment operator - *this = obj; -} - -TraceRecord& -TraceRecord::operator=(const TraceRecord& obj) -{ - m_sequencer_ptr = obj.m_sequencer_ptr; - m_time = obj.m_time; - m_data_address = obj.m_data_address; - m_pc_address = obj.m_pc_address; - m_type = obj.m_type; - return *this; -} - -void -TraceRecord::issueRequest() const -{ - assert(m_sequencer_ptr != NULL); - Request req(m_data_address.getAddress(), 0, 0); - Packet *pkt = new Packet(&req, MemCmd(MemCmd::InvalidCmd), -1); - - // Clear out the sequencer - while (!m_sequencer_ptr->empty()) { - g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100); - } - - m_sequencer_ptr->makeRequest(pkt); - - // Clear out the sequencer - while (!m_sequencer_ptr->empty()) { - g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100); - } -} - -void -TraceRecord::print(ostream& out) const -{ - out << "[TraceRecord: Node, " << m_sequencer_ptr->name() << ", " - << m_data_address << ", " << m_pc_address << ", " - << m_type << ", Time: " << m_time << "]"; -} - -void -TraceRecord::output(ostream& out) const -{ - out << m_sequencer_ptr->name() << " "; - m_data_address.output(out); - out << " "; - m_pc_address.output(out); - out << " "; - out << m_type; - out << endl; -} - -bool -TraceRecord::input(istream& in) -{ - string sequencer_name; - in >> sequencer_name; - - // The SimObject find function is slow and iterates through the - // simObjectList to find the sequencer pointer. Therefore, expect - // trace playback to be slow. - m_sequencer_ptr = (Sequencer*)SimObject::find(sequencer_name.c_str()); - - m_data_address.input(in); - m_pc_address.input(in); - if (in.eof()) - return false; - - string type; - in >> type; - m_type = string_to_RubyRequestType(type); - - // Ignore the rest of the line - char c = '\0'; - while ((!in.eof()) && (c != '\n')) { - in.get(c); - } - - return true; -} diff --git a/src/mem/ruby/recorder/TraceRecord.hh b/src/mem/ruby/recorder/TraceRecord.hh deleted file mode 100644 index 42f213564..000000000 --- a/src/mem/ruby/recorder/TraceRecord.hh +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * A entry in the cache request record. It is aware of the ruby time - * and can issue the request back to the cache. - */ - -#ifndef __MEM_RUBY_RECORDER_TRACERECORD_HH__ -#define __MEM_RUBY_RECORDER_TRACERECORD_HH__ - -#include <iostream> - -#include "mem/ruby/common/Address.hh" -#include "mem/ruby/common/Global.hh" -#include "mem/ruby/system/Sequencer.hh" - -class CacheMsg; - -class TraceRecord -{ - public: - TraceRecord(Sequencer* _sequencer, const Address& data_addr, - const Address& pc_addr, RubyRequestType type, Time time); - - TraceRecord() - { - m_sequencer_ptr = NULL; - m_time = 0; - m_type = RubyRequestType_NULL; - } - - TraceRecord(const TraceRecord& obj); - TraceRecord& operator=(const TraceRecord& obj); - - void issueRequest() const; - - void print(std::ostream& out) const; - void output(std::ostream& out) const; - bool input(std::istream& in); - - private: - friend bool operator>(const TraceRecord& n1, const TraceRecord& n2); - - Sequencer* m_sequencer_ptr; - Time m_time; - Address m_data_address; - Address m_pc_address; - RubyRequestType m_type; -}; - -inline bool -operator>(const TraceRecord& n1, const TraceRecord& n2) -{ - return n1.m_time > n2.m_time; -} - -inline std::ostream& -operator<<(std::ostream& out, const TraceRecord& obj) -{ - obj.print(out); - out << std::flush; - return out; -} - -#endif // __MEM_RUBY_RECORDER_TRACERECORD_HH__ diff --git a/src/mem/ruby/recorder/Tracer.cc b/src/mem/ruby/recorder/Tracer.cc deleted file mode 100644 index fcfe5338c..000000000 --- a/src/mem/ruby/recorder/Tracer.cc +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "base/cprintf.hh" -#include "mem/ruby/eventqueue/RubyEventQueue.hh" -#include "mem/ruby/recorder/TraceRecord.hh" -#include "mem/ruby/recorder/Tracer.hh" -#include "mem/ruby/system/System.hh" - -using namespace std; - -Tracer::Tracer(const Params *p) - : SimObject(p) -{ - m_enabled = false; - m_warmup_length = p->warmup_length; - assert(m_warmup_length > 0); - p->ruby_system->registerTracer(this); -} - -void -Tracer::startTrace(string filename) -{ - if (m_enabled) - stopTrace(); - - if (filename != "") { - m_trace_file.open(filename.c_str()); - if (m_trace_file.fail()) { - cprintf("Error: error opening file '%s'\n", filename); - cprintf("Trace not enabled.\n"); - return; - } - cprintf("Request trace enabled to output file '%s'\n", filename); - m_enabled = true; - } -} - -void -Tracer::stopTrace() -{ - if (m_enabled) { - m_trace_file.close(); - cout << "Request trace file closed." << endl; - m_enabled = false; - } -} - -void -Tracer::traceRequest(Sequencer* sequencer, const Address& data_addr, - const Address& pc_addr, RubyRequestType type, Time time) -{ - assert(m_enabled); - TraceRecord tr(sequencer, data_addr, pc_addr, type, time); - tr.output(m_trace_file); -} - -int -Tracer::playbackTrace(string filename) -{ - igzstream in(filename.c_str()); - if (in.fail()) { - cprintf("Error: error opening file '%s'\n", filename); - return 0; - } - - time_t start_time = time(NULL); - - TraceRecord record; - int counter = 0; - // Read in the next TraceRecord - bool ok = record.input(in); - while (ok) { - // Put it in the right cache - record.issueRequest(); - counter++; - - // Read in the next TraceRecord - ok = record.input(in); - - // Clear the statistics after warmup - if (counter == m_warmup_length) { - cprintf("Clearing stats after warmup of length %s\n", - m_warmup_length); - g_system_ptr->clearStats(); - } - } - - // Flush the prefetches through the system - // FIXME - should be smarter - g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 1000); - - time_t stop_time = time(NULL); - double seconds = difftime(stop_time, start_time); - double minutes = seconds / 60.0; - cout << "playbackTrace: " << minutes << " minutes" << endl; - - return counter; -} - -void -Tracer::print(ostream& out) const -{ -} - -Tracer * -RubyTracerParams::create() -{ - return new Tracer(this); -} diff --git a/src/mem/ruby/recorder/Tracer.hh b/src/mem/ruby/recorder/Tracer.hh deleted file mode 100644 index cad47b28c..000000000 --- a/src/mem/ruby/recorder/Tracer.hh +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Controller class of the tracer. Can stop/start/playback the ruby - * cache requests trace. - */ - -#ifndef __MEM_RUBY_RECORDER_TRACER_HH__ -#define __MEM_RUBY_RECORDER_TRACER_HH__ - -#include <iostream> -#include <string> - -#include "mem/protocol/RubyRequestType.hh" -#include "mem/ruby/common/Global.hh" -#include "params/RubyTracer.hh" -#include "sim/sim_object.hh" -#include "gzstream.hh" - -class Address; -class TraceRecord; -class Sequencer; - -class Tracer : public SimObject -{ - public: - typedef RubyTracerParams Params; - Tracer(const Params *p); - - void startTrace(std::string filename); - void stopTrace(); - bool traceEnabled() { return m_enabled; } - void traceRequest(Sequencer* sequencer, const Address& data_addr, - const Address& pc_addr, RubyRequestType type, Time time); - - void print(std::ostream& out) const; - - int playbackTrace(std::string filename); - - private: - // Private copy constructor and assignment operator - Tracer(const Tracer& obj); - Tracer& operator=(const Tracer& obj); - - ogzstream m_trace_file; - bool m_enabled; - - //added by SS - int m_warmup_length; -}; - -inline std::ostream& -operator<<(std::ostream& out, const Tracer& obj) -{ - obj.print(out); - out << std::flush; - return out; -} - -#endif // __MEM_RUBY_RECORDER_TRACER_HH__ diff --git a/src/mem/ruby/recorder/Tracer.py b/src/mem/ruby/recorder/Tracer.py deleted file mode 100644 index 7a689f9f7..000000000 --- a/src/mem/ruby/recorder/Tracer.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2009 Advanced Micro Devices, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Authors: Steve Reinhardt -# Brad Beckmann - -from m5.params import * -from m5.SimObject import SimObject - -class RubyTracer(SimObject): - type = 'RubyTracer' - cxx_class = 'Tracer' - warmup_length = Param.Int(100000, "") - ruby_system = Param.RubySystem("") diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index ca37a90de..a0e3b3fbb 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -33,12 +33,11 @@ #include <string> #include "mem/protocol/AccessPermission.hh" -#include "mem/protocol/MachineType.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/common/Consumer.hh" #include "mem/ruby/common/DataBlock.hh" #include "mem/ruby/network/Network.hh" -#include "mem/ruby/system/System.hh" +#include "mem/ruby/recorder/CacheRecorder.hh" #include "params/RubyController.hh" #include "sim/sim_object.hh" @@ -68,6 +67,8 @@ class AbstractController : public SimObject, public Consumer virtual void wakeup() = 0; // virtual void dumpStats(std::ostream & out) = 0; virtual void clearStats() = 0; + virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0; + virtual Sequencer* getSequencer() const = 0; }; #endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__ diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc index 1564128d3..9f1fe6320 100644 --- a/src/mem/ruby/system/CacheMemory.cc +++ b/src/mem/ruby/system/CacheMemory.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,9 @@ #include "base/intmath.hh" #include "debug/RubyCache.hh" +#include "mem/protocol/AccessPermission.hh" #include "mem/ruby/system/CacheMemory.hh" +#include "mem/ruby/system/System.hh" using namespace std; @@ -364,31 +366,42 @@ CacheMemory::profileGenericRequest(GenericRequestType requestType, } void -CacheMemory::recordCacheContents(CacheRecorder& tr) const +CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const { + uint64 warmedUpBlocks = 0; + uint64 totalBlocks M5_VAR_USED = (uint64)m_cache_num_sets + * (uint64)m_cache_assoc; + for (int i = 0; i < m_cache_num_sets; i++) { for (int j = 0; j < m_cache_assoc; j++) { - AccessPermission perm = m_cache[i][j]->m_Permission; - RubyRequestType request_type = RubyRequestType_NULL; - if (perm == AccessPermission_Read_Only) { - if (m_is_instruction_only_cache) { - request_type = RubyRequestType_IFETCH; - } else { - request_type = RubyRequestType_LD; + if (m_cache[i][j] != NULL) { + AccessPermission perm = m_cache[i][j]->m_Permission; + RubyRequestType request_type = RubyRequestType_NULL; + if (perm == AccessPermission_Read_Only) { + if (m_is_instruction_only_cache) { + request_type = RubyRequestType_IFETCH; + } else { + request_type = RubyRequestType_LD; + } + } else if (perm == AccessPermission_Read_Write) { + request_type = RubyRequestType_ST; } - } else if (perm == AccessPermission_Read_Write) { - request_type = RubyRequestType_ST; - } - if (request_type != RubyRequestType_NULL) { -#if 0 - tr.addRecord(m_chip_ptr->getID(), m_cache[i][j].m_Address, - Address(0), request_type, - m_replacementPolicy_ptr->getLastAccess(i, j)); -#endif + if (request_type != RubyRequestType_NULL) { + tr->addRecord(cntrl, m_cache[i][j]->m_Address.getAddress(), + 0, request_type, + m_replacementPolicy_ptr->getLastAccess(i, j), + m_cache[i][j]->getDataBlk()); + warmedUpBlocks++; + } } } } + + DPRINTF(RubyCache, "%s: %lli blocks of %lli total blocks" + "recorded %.2f%% \n", name().c_str(), warmedUpBlocks, + (uint64)m_cache_num_sets * (uint64)m_cache_assoc, + (float(warmedUpBlocks)/float(totalBlocks))*100.0); } void diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh index f0acba9cb..f270e88cd 100644 --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,21 +34,15 @@ #include <vector> #include "base/hashmap.hh" -#include "mem/protocol/AccessPermission.hh" #include "mem/protocol/GenericRequestType.hh" #include "mem/protocol/RubyRequest.hh" -#include "mem/protocol/RubyRequestType.hh" -#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/DataBlock.hh" -#include "mem/ruby/common/Global.hh" #include "mem/ruby/profiler/CacheProfiler.hh" #include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/slicc_interface/AbstractCacheEntry.hh" -#include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" #include "mem/ruby/system/LRUPolicy.hh" #include "mem/ruby/system/PseudoLRUPolicy.hh" -#include "mem/ruby/system/System.hh" #include "params/RubyCache.hh" #include "sim/sim_object.hh" @@ -100,12 +94,7 @@ class CacheMemory : public SimObject int getLatency() const { return m_latency; } // Hook for checkpointing the contents of the cache - void recordCacheContents(CacheRecorder& tr) const; - void - setAsInstructionCache(bool is_icache) - { - m_is_instruction_only_cache = is_icache; - } + void recordCacheContents(int cntrl, CacheRecorder* tr) const; // Set this address to most recently used void setMRU(const Address& address); @@ -146,7 +135,6 @@ class CacheMemory : public SimObject // Data Members (m_prefix) bool m_is_instruction_only_cache; - bool m_is_data_only_cache; // The first index is the # of cache lines. // The second index is the the amount associativity. diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh index 5f6b9f100..099c1d991 100644 --- a/src/mem/ruby/system/DMASequencer.hh +++ b/src/mem/ruby/system/DMASequencer.hh @@ -55,6 +55,9 @@ class DMASequencer : public RubyPort /* external interface */ RequestStatus makeRequest(PacketPtr pkt); bool busy() { return m_is_busy;} + int outstandingCount() const { return (m_is_busy ? 1 : 0); } + bool isDeadlockEventScheduled() const { return false; } + void descheduleDeadlockEvent() {} /* SLICC callback */ void dataCallback(const DataBlock & dblk); diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc index 03aa68919..d2e00ab3b 100644 --- a/src/mem/ruby/system/DirectoryMemory.cc +++ b/src/mem/ruby/system/DirectoryMemory.cc @@ -58,6 +58,7 @@ DirectoryMemory::init() if (m_use_map) { m_sparseMemory = new SparseMemory(m_map_levels); + g_system_ptr->registerSparseMemory(m_sparseMemory); } else { m_entries = new AbstractEntry*[m_num_entries]; for (int i = 0; i < m_num_entries; i++) diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh index 6719b9fb6..9bd3516c2 100644 --- a/src/mem/ruby/system/MemoryVector.hh +++ b/src/mem/ruby/system/MemoryVector.hh @@ -29,6 +29,7 @@ #ifndef __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__ #define __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__ +#include "base/trace.hh" #include "mem/ruby/common/Address.hh" class DirectoryMemory; @@ -48,6 +49,8 @@ class MemoryVector void write(const Address & paddr, uint8* data, int len); uint8* read(const Address & paddr, uint8* data, int len); + uint32 collatePages(uint8* &raw_data); + void populatePages(uint8* raw_data); private: uint8* getBlockPtr(const PhysAddress & addr); @@ -56,6 +59,7 @@ class MemoryVector uint8** m_pages; uint32 m_num_pages; const uint32 m_page_offset_mask; + static const uint32 PAGE_SIZE = 4096; }; inline @@ -97,7 +101,7 @@ MemoryVector::resize(uint32 size) delete [] m_pages; } m_size = size; - assert(size%4096 == 0); + assert(size%PAGE_SIZE == 0); m_num_pages = size >> 12; m_pages = new uint8*[m_num_pages]; memset(m_pages, 0, m_num_pages * sizeof(uint8*)); @@ -118,8 +122,8 @@ MemoryVector::write(const Address & paddr, uint8* data, int len) } if (all_zeros) return; - m_pages[page_num] = new uint8[4096]; - memset(m_pages[page_num], 0, 4096); + m_pages[page_num] = new uint8[PAGE_SIZE]; + memset(m_pages[page_num], 0, PAGE_SIZE); uint32 offset = paddr.getAddress() & m_page_offset_mask; memcpy(&m_pages[page_num][offset], data, len); } else { @@ -147,10 +151,82 @@ MemoryVector::getBlockPtr(const PhysAddress & paddr) { uint32 page_num = paddr.getAddress() >> 12; if (m_pages[page_num] == 0) { - m_pages[page_num] = new uint8[4096]; - memset(m_pages[page_num], 0, 4096); + m_pages[page_num] = new uint8[PAGE_SIZE]; + memset(m_pages[page_num], 0, PAGE_SIZE); } return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask]; } +/*! + * Function for collating all the pages of the physical memory together. + * In case a pointer for a page is NULL, this page needs only a single byte + * to represent that the pointer is NULL. Otherwise, it needs 1 + PAGE_SIZE + * bytes. The first represents that the page pointer is not NULL, and rest of + * the bytes represent the data on the page. + */ + +inline uint32 +MemoryVector::collatePages(uint8* &raw_data) +{ + uint32 num_zero_pages = 0; + uint32 data_size = 0; + + for (uint32 i = 0;i < m_num_pages; ++i) + { + if (m_pages[i] == 0) num_zero_pages++; + } + + raw_data = new uint8[ sizeof(uint32) /* number of pages*/ + + m_num_pages /* whether the page is all zeros */ + + PAGE_SIZE * (m_num_pages - num_zero_pages)]; + + /* Write the number of pages to be stored. */ + memcpy(raw_data, &m_num_pages, sizeof(uint32)); + data_size = sizeof(uint32); + + for (uint32 i = 0;i < m_num_pages; ++i) + { + if (m_pages[i] == 0) { + raw_data[data_size] = 0; + } else { + raw_data[data_size] = 1; + memcpy(raw_data + data_size + 1, m_pages[i], PAGE_SIZE); + data_size += PAGE_SIZE; + } + data_size += 1; + } + + return data_size; +} + +/*! + * Function for populating the pages of the memory using the available raw + * data. Each page has a byte associate with it, which represents whether the + * page was NULL or not, when all the pages were collated. The function assumes + * that the number of pages in the memory are same as those that were recorded + * in the checkpoint. + */ +inline void +MemoryVector::populatePages(uint8* raw_data) +{ + uint32 data_size = 0; + uint32 num_pages = 0; + + /* Read the number of pages that were stored. */ + memcpy(&num_pages, raw_data, sizeof(uint32)); + data_size = sizeof(uint32); + assert(num_pages == m_num_pages); + + for (uint32 i = 0;i < m_num_pages; ++i) + { + assert(m_pages[i] == 0); + if (raw_data[data_size] != 0) { + m_pages[i] = new uint8[PAGE_SIZE]; + memcpy(m_pages[i], raw_data + data_size + 1, PAGE_SIZE); + data_size += PAGE_SIZE; + } + data_size += 1; + } +} + #endif // __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__ diff --git a/src/mem/ruby/system/PerfectCacheMemory.hh b/src/mem/ruby/system/PerfectCacheMemory.hh index 772b3d1f9..b880b6434 100644 --- a/src/mem/ruby/system/PerfectCacheMemory.hh +++ b/src/mem/ruby/system/PerfectCacheMemory.hh @@ -32,7 +32,6 @@ #include "base/hashmap.hh" #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/common/Address.hh" -#include "mem/ruby/common/Global.hh" template<class ENTRY> struct PerfectCacheLineState @@ -57,10 +56,6 @@ class PerfectCacheMemory static void printConfig(std::ostream& out); - // perform a cache access and see if we hit or not. Return true - // on a hit. - bool tryCacheAccess(const CacheMsg& msg, bool& block_stc, ENTRY*& entry); - // tests to see if an address is present in the cache bool isTagPresent(const Address& address) const; @@ -118,15 +113,6 @@ PerfectCacheMemory<ENTRY>::printConfig(std::ostream& out) { } -template<class ENTRY> -inline bool -PerfectCacheMemory<ENTRY>::tryCacheAccess(const CacheMsg& msg, - bool& block_stc, ENTRY*& entry) -{ - panic("not implemented"); - return true; -} - // tests to see if an address is present in the cache template<class ENTRY> inline bool diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index f7bde739e..64faf6aed 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -27,11 +27,11 @@ */ #include "cpu/testers/rubytest/RubyTester.hh" +#include "debug/Config.hh" #include "debug/Ruby.hh" #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/system/RubyPort.hh" -#include "mem/physical.hh" RubyPort::RubyPort(const Params *p) : MemObject(p) @@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p) m_usingRubyTester = p->using_ruby_tester; access_phys_mem = p->access_phys_mem; + drainEvent = NULL; + ruby_system = p->ruby_system; waitingOnSequencer = false; } @@ -66,8 +68,10 @@ Port * RubyPort::getPort(const std::string &if_name, int idx) { if (if_name == "port") { - return new M5Port(csprintf("%s-port%d", name(), idx), this, - ruby_system, access_phys_mem); + M5Port* cpuPort = new M5Port(csprintf("%s-port%d", name(), idx), + this, ruby_system, access_phys_mem); + cpu_ports.push_back(cpuPort); + return cpuPort; } if (if_name == "pio_port") { @@ -508,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt) (*i)->sendRetry(); } } + + testDrainComplete(); +} + +void +RubyPort::testDrainComplete() +{ + //If we weren't able to drain before, we might be able to now. + if (drainEvent != NULL) { + unsigned int drainCount = getDrainCount(drainEvent); + DPRINTF(Config, "Drain count: %u\n", drainCount); + if (drainCount == 0) { + drainEvent->process(); + // Clear the drain event once we're done with it. + drainEvent = NULL; + } + } +} + +unsigned int +RubyPort::getDrainCount(Event *de) +{ + int count = 0; + // + // If the sequencer is not empty, then requests need to drain. + // The outstandingCount is the number of requests outstanding and thus the + // number of times M5's timing port will process the drain event. + // + count += outstandingCount(); + + DPRINTF(Config, "outstanding count %d\n", outstandingCount()); + + // To simplify the draining process, the sequencer's deadlock detection + // event should have been descheduled. + assert(isDeadlockEventScheduled() == false); + + if (pio_port != NULL) { + count += pio_port->drain(de); + DPRINTF(Config, "count after pio check %d\n", count); + } + if (physMemPort != NULL) { + count += physMemPort->drain(de); + DPRINTF(Config, "count after physmem check %d\n", count); + } + + for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end(); + p_iter++) { + M5Port* cpu_port = *p_iter; + count += cpu_port->drain(de); + DPRINTF(Config, "count after cpu port check %d\n", count); + } + + DPRINTF(Config, "final count %d\n", count); + + return count; +} + +unsigned int +RubyPort::drain(Event *de) +{ + if (isDeadlockEventScheduled()) { + descheduleDeadlockEvent(); + } + + int count = getDrainCount(de); + + // Set status + if (count != 0) { + drainEvent = de; + + changeState(SimObject::Draining); + return count; + } + + changeState(SimObject::Drained); + return 0; } void diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 88e865766..d8dbe0cda 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -33,7 +33,6 @@ #include <string> #include "mem/protocol/RequestStatus.hh" -#include "mem/ruby/slicc_interface/RubyRequest.hh" #include "mem/ruby/system/System.hh" #include "mem/mem_object.hh" #include "mem/physical.hh" @@ -115,17 +114,23 @@ class RubyPort : public MemObject Port *getPort(const std::string &if_name, int idx); virtual RequestStatus makeRequest(PacketPtr pkt) = 0; + virtual int outstandingCount() const = 0; + virtual bool isDeadlockEventScheduled() const = 0; + virtual void descheduleDeadlockEvent() = 0; // // Called by the controller to give the sequencer a pointer. // A pointer to the controller is needed for atomic support. // void setController(AbstractController* _cntrl) { m_controller = _cntrl; } + int getId() { return m_version; } + unsigned int drain(Event *de); protected: const std::string m_name; void ruby_hit_callback(PacketPtr pkt); void hit(PacketPtr pkt); + void testDrainComplete(); int m_version; AbstractController* m_controller; @@ -143,11 +148,19 @@ class RubyPort : public MemObject } } + unsigned int getDrainCount(Event *de); + uint16_t m_port_id; uint64_t m_request_cnt; M5Port* physMemPort; + /*! Vector of CPU Port attached to this Ruby port. */ + typedef std::vector<M5Port*>::iterator CpuPortIter; + std::vector<M5Port*> cpu_ports; + + Event *drainEvent; + PhysicalMemory* physmem; RubySystem* ruby_system; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 7137dcc28..3f9ceb34d 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -40,9 +40,7 @@ #include "mem/protocol/RubyAccessMode.hh" #include "mem/ruby/buffers/MessageBuffer.hh" #include "mem/ruby/common/Global.hh" -#include "mem/ruby/common/SubBlock.hh" #include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/recorder/Tracer.hh" #include "mem/ruby/slicc_interface/RubyRequest.hh" #include "mem/ruby/system/CacheMemory.hh" #include "mem/ruby/system/Sequencer.hh" @@ -521,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest, } // update the data - if (pkt->getPtr<uint8_t>(true) != NULL) { + if (g_system_ptr->m_warmup_enabled) { + assert(pkt->getPtr<uint8_t>(false) != NULL); + data.setData(pkt->getPtr<uint8_t>(false), + request_address.getOffset(), pkt->getSize()); + } else if (pkt->getPtr<uint8_t>(true) != NULL) { if ((type == RubyRequestType_LD) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || @@ -553,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest, testerSenderState->subBlock->mergeFrom(data); } - ruby_hit_callback(pkt); delete srequest; + + if (g_system_ptr->m_warmup_enabled) { + delete pkt; + g_system_ptr->m_cache_recorder->enqueueNextFetchRequest(); + } else if (g_system_ptr->m_cooldown_enabled) { + delete pkt; + g_system_ptr->m_cache_recorder->enqueueNextFlushRequest(); + } else { + ruby_hit_callback(pkt); + } } bool diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 7c2d0af13..4a6d46c01 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -39,8 +39,6 @@ #include "mem/ruby/system/RubyPort.hh" class DataBlock; -class CacheMsg; -class MachineID; class CacheMemory; class RubySequencerParams; @@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer RequestStatus makeRequest(PacketPtr pkt); bool empty() const; + int outstandingCount() const { return m_outstanding_count; } + bool + isDeadlockEventScheduled() const + { + return deadlockCheckEvent.scheduled(); + } + + void + descheduleDeadlockEvent() + { + deschedule(deadlockCheckEvent); + } void print(std::ostream& out) const; void printStats(std::ostream& out) const; diff --git a/src/mem/ruby/system/SparseMemory.cc b/src/mem/ruby/system/SparseMemory.cc index 8e4f37c46..db8d494f8 100644 --- a/src/mem/ruby/system/SparseMemory.cc +++ b/src/mem/ruby/system/SparseMemory.cc @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +27,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <queue> + #include "debug/RubyCache.hh" #include "mem/ruby/system/SparseMemory.hh" #include "mem/ruby/system/System.hh" @@ -82,19 +85,19 @@ SparseMemory::recursivelyRemoveTables(SparseMapType* curTable, int curLevel) SparseMapType::iterator iter; for (iter = curTable->begin(); iter != curTable->end(); iter++) { - SparseMemEntry* entryStruct = &((*iter).second); + SparseMemEntry entry = (*iter).second; if (curLevel != (m_number_of_levels - 1)) { // If the not at the last level, analyze those lower level // tables first, then delete those next tables - SparseMapType* nextTable = (SparseMapType*)(entryStruct->entry); + SparseMapType* nextTable = (SparseMapType*)(entry); recursivelyRemoveTables(nextTable, (curLevel + 1)); delete nextTable; } else { // If at the last level, delete the directory entry - delete (AbstractEntry*)(entryStruct->entry); + delete (AbstractEntry*)(entry); } - entryStruct->entry = NULL; + entry = NULL; } // Once all entries have been deleted, erase the entries @@ -134,7 +137,7 @@ SparseMemory::exist(const Address& address) const // If the address is found, move on to the next level. // Otherwise, return not found if (curTable->count(curAddress) != 0) { - curTable = (SparseMapType*)(((*curTable)[curAddress]).entry); + curTable = (SparseMapType*)((*curTable)[curAddress]); } else { DPRINTF(RubyCache, "Not found\n"); return false; @@ -156,7 +159,6 @@ SparseMemory::add(const Address& address, AbstractEntry* entry) Address curAddress; SparseMapType* curTable = m_map_head; - SparseMemEntry* entryStruct = NULL; // Initiallize the high bit to be the total number of bits plus // the block offset. However the highest bit index is one less @@ -179,7 +181,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry) // if the address exists in the cur table, move on. Otherwise // create a new table. if (curTable->count(curAddress) != 0) { - curTable = (SparseMapType*)(((*curTable)[curAddress]).entry); + curTable = (SparseMapType*)((*curTable)[curAddress]); } else { m_adds_per_level[level]++; @@ -194,9 +196,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry) // Create the pointer container SparseMemEntry and add it // to the table. - entryStruct = new SparseMemEntry; - entryStruct->entry = newEntry; - (*curTable)[curAddress] = *entryStruct; + (*curTable)[curAddress] = newEntry; // Move to the next level of the heirarchy curTable = (SparseMapType*)newEntry; @@ -215,7 +215,7 @@ SparseMemory::recursivelyRemoveLevels(const Address& address, { Address curAddress; CurNextInfo nextInfo; - SparseMemEntry* entryStruct; + SparseMemEntry entry; // create the appropriate address for this level // Note: that set Address is inclusive of the specified range, @@ -231,11 +231,11 @@ SparseMemory::recursivelyRemoveLevels(const Address& address, assert(curInfo.curTable->count(curAddress) != 0); - entryStruct = &((*(curInfo.curTable))[curAddress]); + entry = (*(curInfo.curTable))[curAddress]; if (curInfo.level < (m_number_of_levels - 1)) { // set up next level's info - nextInfo.curTable = (SparseMapType*)(entryStruct->entry); + nextInfo.curTable = (SparseMapType*)(entry); nextInfo.level = curInfo.level + 1; nextInfo.highBit = curInfo.highBit - @@ -252,15 +252,15 @@ SparseMemory::recursivelyRemoveLevels(const Address& address, if (tableSize == 0) { m_removes_per_level[curInfo.level]++; delete nextInfo.curTable; - entryStruct->entry = NULL; + entry = NULL; curInfo.curTable->erase(curAddress); } } else { // if this is the last level, we have reached the Directory // Entry and thus we should delete it including the // SparseMemEntry container struct. - delete (AbstractEntry*)(entryStruct->entry); - entryStruct->entry = NULL; + delete (AbstractEntry*)(entry); + entry = NULL; curInfo.curTable->erase(curAddress); m_removes_per_level[curInfo.level]++; } @@ -331,7 +331,7 @@ SparseMemory::lookup(const Address& address) // If the address is found, move on to the next level. // Otherwise, return not found if (curTable->count(curAddress) != 0) { - curTable = (SparseMapType*)(((*curTable)[curAddress]).entry); + curTable = (SparseMapType*)((*curTable)[curAddress]); } else { DPRINTF(RubyCache, "Not found\n"); return NULL; @@ -345,6 +345,70 @@ SparseMemory::lookup(const Address& address) } void +SparseMemory::recordBlocks(int cntrl_id, CacheRecorder* tr) const +{ + queue<SparseMapType*> unexplored_nodes[2]; + queue<physical_address_t> address_of_nodes[2]; + + unexplored_nodes[0].push(m_map_head); + address_of_nodes[0].push(0); + + int parity_of_level = 0; + physical_address_t address, temp_address; + Address curAddress; + + // Initiallize the high bit to be the total number of bits plus + // the block offset. However the highest bit index is one less + // than this value. + int highBit = m_total_number_of_bits + RubySystem::getBlockSizeBits(); + int lowBit; + + for (int cur_level = 0; cur_level < m_number_of_levels; cur_level++) { + + // create the appropriate address for this level + // Note: that set Address is inclusive of the specified range, + // thus the high bit is one less than the total number of bits + // used to create the address. + lowBit = highBit - m_number_of_bits_per_level[cur_level]; + + while (!unexplored_nodes[parity_of_level].empty()) { + + SparseMapType* node = unexplored_nodes[parity_of_level].front(); + unexplored_nodes[parity_of_level].pop(); + + address = address_of_nodes[parity_of_level].front(); + address_of_nodes[parity_of_level].pop(); + + SparseMapType::iterator iter; + + for (iter = node->begin(); iter != node->end(); iter++) { + SparseMemEntry entry = (*iter).second; + curAddress = (*iter).first; + + if (cur_level != (m_number_of_levels - 1)) { + // If not at the last level, put this node in the queue + unexplored_nodes[1 - parity_of_level].push( + (SparseMapType*)(entry)); + address_of_nodes[1 - parity_of_level].push(address | + (curAddress.getAddress() << lowBit)); + } else { + // If at the last level, add a trace record + temp_address = address | (curAddress.getAddress() + << lowBit); + DataBlock block = ((AbstractEntry*)entry)->getDataBlk(); + tr->addRecord(cntrl_id, temp_address, 0, RubyRequestType_ST, 0, + block); + } + } + } + + // Adjust the highBit value for the next level + highBit -= m_number_of_bits_per_level[cur_level]; + parity_of_level = 1 - parity_of_level; + } +} + +void SparseMemory::print(ostream& out) const { } diff --git a/src/mem/ruby/system/SparseMemory.hh b/src/mem/ruby/system/SparseMemory.hh index f6937ef54..e4237dbcd 100644 --- a/src/mem/ruby/system/SparseMemory.hh +++ b/src/mem/ruby/system/SparseMemory.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Advanced Micro Devices, Inc. + * Copyright (c) 2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,15 +33,11 @@ #include <iostream> #include "base/hashmap.hh" -#include "mem/ruby/slicc_interface/AbstractEntry.hh" #include "mem/ruby/common/Address.hh" -#include "mem/ruby/common/Global.hh" - -struct SparseMemEntry -{ - void* entry; -}; +#include "mem/ruby/recorder/CacheRecorder.hh" +#include "mem/ruby/slicc_interface/AbstractEntry.hh" +typedef void* SparseMemEntry; typedef m5::hash_map<Address, SparseMemEntry> SparseMapType; struct CurNextInfo @@ -63,6 +60,14 @@ class SparseMemory void add(const Address& address, AbstractEntry*); void remove(const Address& address); + /*! + * Function for recording the contents of memory. This function walks + * through all the levels of the sparse memory in a breadth first + * fashion. This might need more memory than a depth first approach. + * But breadth first seems easier to me than a depth first approach. + */ + void recordBlocks(int cntrl_id, CacheRecorder *) const; + AbstractEntry* lookup(const Address& address); // Print cache contents @@ -95,12 +100,4 @@ class SparseMemory uint64_t* m_removes_per_level; }; -inline std::ostream& -operator<<(std::ostream& out, const SparseMemEntry& obj) -{ - out << "SparseMemEntry"; - out << std::flush; - return out; -} - #endif // __MEM_RUBY_SYSTEM_SPARSEMEMORY_HH__ diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc index 81824b9b7..6f191819b 100644 --- a/src/mem/ruby/system/System.cc +++ b/src/mem/ruby/system/System.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,16 +26,19 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <fcntl.h> +#include <zlib.h> + +#include <cstdio> + #include "base/intmath.hh" #include "base/output.hh" -#include "mem/ruby/buffers/MessageBuffer.hh" +#include "debug/RubySystem.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/profiler/Profiler.hh" -#include "mem/ruby/recorder/Tracer.hh" -#include "mem/ruby/slicc_interface/AbstractController.hh" -#include "mem/ruby/system/MemoryVector.hh" #include "mem/ruby/system/System.hh" +#include "sim/simulate.hh" using namespace std; @@ -49,7 +52,6 @@ int RubySystem::m_memory_size_bits; Network* RubySystem::m_network_ptr; Profiler* RubySystem::m_profiler_ptr; -Tracer* RubySystem::m_tracer_ptr; MemoryVector* RubySystem::m_mem_vec_ptr; RubySystem::RubySystem(const Params *p) @@ -88,6 +90,8 @@ RubySystem::RubySystem(const Params *p) // RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename); registerExitCallback(rubyExitCB); + m_warmup_enabled = false; + m_cooldown_enabled = false; } void @@ -109,22 +113,21 @@ RubySystem::registerProfiler(Profiler* profiler_ptr) } void -RubySystem::registerTracer(Tracer* tracer_ptr) +RubySystem::registerAbstractController(AbstractController* cntrl) { - m_tracer_ptr = tracer_ptr; + m_abs_cntrl_vec.push_back(cntrl); } void -RubySystem::registerAbstractController(AbstractController* cntrl) +RubySystem::registerSparseMemory(SparseMemory* s) { - m_abs_cntrl_vec.push_back(cntrl); + m_sparse_memory_vector.push_back(s); } RubySystem::~RubySystem() { delete m_network_ptr; delete m_profiler_ptr; - delete m_tracer_ptr; if (m_mem_vec_ptr) delete m_mem_vec_ptr; } @@ -167,9 +170,143 @@ RubySystem::printStats(ostream& out) } void +RubySystem::writeCompressedTrace(uint8* raw_data, string filename, + uint64 uncompressed_trace_size) +{ + // Create the checkpoint file for the memory + string thefile = Checkpoint::dir() + "/" + filename.c_str(); + + int fd = creat(thefile.c_str(), 0664); + if (fd < 0) { + perror("creat"); + fatal("Can't open memory trace file '%s'\n", filename); + } + + gzFile compressedMemory = gzdopen(fd, "wb"); + if (compressedMemory == NULL) + fatal("Insufficient memory to allocate compression state for %s\n", + filename); + + if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != + uncompressed_trace_size) { + fatal("Write failed on memory trace file '%s'\n", filename); + } + + if (gzclose(compressedMemory)) { + fatal("Close failed on memory trace file '%s'\n", filename); + } + delete raw_data; +} + +void RubySystem::serialize(std::ostream &os) { + m_cooldown_enabled = true; + + vector<Sequencer*> sequencer_map; + Sequencer* sequencer_ptr = NULL; + int cntrl_id = -1; + + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + if (sequencer_ptr == NULL) { + sequencer_ptr = sequencer_map[cntrl]; + cntrl_id = cntrl; + } + } + + assert(sequencer_ptr != NULL); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + if (sequencer_map[cntrl] == NULL) { + sequencer_map[cntrl] = sequencer_ptr; + } + } + + // Create the CacheRecorder and record the cache trace + m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); + } + + // save the current tick value + Tick curtick_original = curTick(); + // save the event queue head + Event* eventq_head = eventq->replaceHead(NULL); + + // Schedule an event to start cache cooldown + RubyEvent* e = new RubyEvent(this); + schedule(e,curTick()); + simulate(); + + // Restore eventq head + eventq_head = eventq->replaceHead(eventq_head); + // Restore curTick + curTick(curtick_original); + + uint8* raw_data = NULL; + + if (m_mem_vec_ptr != NULL) { + uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); + + string memory_trace_file = name() + ".memory.gz"; + writeCompressedTrace(raw_data, memory_trace_file, + memory_trace_size); + + SERIALIZE_SCALAR(memory_trace_file); + SERIALIZE_SCALAR(memory_trace_size); + + } else { + for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { + m_sparse_memory_vector[i]->recordBlocks(cntrl_id, + m_cache_recorder); + } + } + + // Aggergate the trace entries together into a single array + raw_data = new uint8_t[4096]; + uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, + 4096); + string cache_trace_file = name() + ".cache.gz"; + writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); + + SERIALIZE_SCALAR(cache_trace_file); + SERIALIZE_SCALAR(cache_trace_size); + m_cooldown_enabled = false; +} + +void +RubySystem::readCompressedTrace(string filename, uint8*& raw_data, + uint64& uncompressed_trace_size) +{ + // Read the trace file + gzFile compressedTrace; + + // trace file + int fd = open(filename.c_str(), O_RDONLY); + if (fd < 0) { + perror("open"); + fatal("Unable to open trace file %s", filename); + } + + compressedTrace = gzdopen(fd, "rb"); + if (compressedTrace == NULL) { + fatal("Insufficient memory to allocate compression state for %s\n", + filename); + } + + raw_data = new uint8_t[uncompressed_trace_size]; + if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < + uncompressed_trace_size) { + fatal("Unable to read complete trace from file %s\n", filename); + } + + if (gzclose(compressedTrace)) { + fatal("Failed to close cache trace file '%s'\n", filename); + } } void @@ -181,18 +318,95 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion) // value of curTick() // clearStats(); + uint8* uncompressed_trace = NULL; + + if (m_mem_vec_ptr != NULL) { + string memory_trace_file; + uint64 memory_trace_size = 0; + + UNSERIALIZE_SCALAR(memory_trace_file); + UNSERIALIZE_SCALAR(memory_trace_size); + memory_trace_file = cp->cptDir + "/" + memory_trace_file; + + readCompressedTrace(memory_trace_file, uncompressed_trace, + memory_trace_size); + m_mem_vec_ptr->populatePages(uncompressed_trace); + + delete uncompressed_trace; + uncompressed_trace = NULL; + } + + string cache_trace_file; + uint64 cache_trace_size = 0; + + UNSERIALIZE_SCALAR(cache_trace_file); + UNSERIALIZE_SCALAR(cache_trace_size); + cache_trace_file = cp->cptDir + "/" + cache_trace_file; + + readCompressedTrace(cache_trace_file, uncompressed_trace, + cache_trace_size); + m_warmup_enabled = true; + + vector<Sequencer*> sequencer_map; + Sequencer* t = NULL; + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); + if(t == NULL) t = sequencer_map[cntrl]; + } + + assert(t != NULL); + + for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { + if (sequencer_map[cntrl] == NULL) { + sequencer_map[cntrl] = t; + } + } + + m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, + sequencer_map); } void -RubySystem::clearStats() const +RubySystem::startup() { - m_profiler_ptr->clearStats(); - m_network_ptr->clearStats(); + if (m_warmup_enabled) { + // save the current tick value + Tick curtick_original = curTick(); + // save the event queue head + Event* eventq_head = eventq->replaceHead(NULL); + // set curTick to 0 + curTick(0); + + // Schedule an event to start cache warmup + RubyEvent* e = new RubyEvent(this); + schedule(e,curTick()); + simulate(); + + delete m_cache_recorder; + m_cache_recorder = NULL; + m_warmup_enabled = false; + // Restore eventq head + eventq_head = eventq->replaceHead(eventq_head); + // Restore curTick + curTick(curtick_original); + } +} + +void +RubySystem::RubyEvent::process() +{ + if (ruby_system->m_warmup_enabled) { + ruby_system->m_cache_recorder->enqueueNextFetchRequest(); + } else if (ruby_system->m_cooldown_enabled) { + ruby_system->m_cache_recorder->enqueueNextFlushRequest(); + } } void -RubySystem::recordCacheContents(CacheRecorder& tr) const +RubySystem::clearStats() const { + m_profiler_ptr->clearStats(); + m_network_ptr->clearStats(); } #ifdef CHECK_COHERENCE diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh index 704cc3b27..461abffe2 100644 --- a/src/mem/ruby/system/System.hh +++ b/src/mem/ruby/system/System.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,21 +38,34 @@ #include "base/callback.hh" #include "mem/ruby/common/Global.hh" #include "mem/ruby/eventqueue/RubyEventQueue.hh" -#include "mem/ruby/system/RubyPort.hh" +#include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/system/MemoryVector.hh" +#include "mem/ruby/system/SparseMemory.hh" #include "params/RubySystem.hh" #include "sim/sim_object.hh" -class AbstractController; -class CacheRecorder; -class MemoryVector; class Network; class Profiler; -class Tracer; class RubySystem : public SimObject { public: + class RubyEvent : public Event + { + public: + RubyEvent(RubySystem* _ruby_system) + { + ruby_system = _ruby_system; + } + private: + void process(); + + RubySystem* ruby_system; + }; + + friend class RubyEvent; + typedef RubySystemParams Params; RubySystem(const Params *p); ~RubySystem(); @@ -86,13 +99,6 @@ class RubySystem : public SimObject return m_profiler_ptr; } - static Tracer* - getTracer() - { - assert(m_tracer_ptr != NULL); - return m_tracer_ptr; - } - static MemoryVector* getMemoryVector() { @@ -100,7 +106,6 @@ class RubySystem : public SimObject return m_mem_vec_ptr; } - void recordCacheContents(CacheRecorder& tr) const; static void printConfig(std::ostream& out); static void printStats(std::ostream& out); void clearStats() const; @@ -114,13 +119,15 @@ class RubySystem : public SimObject void print(std::ostream& out) const; - virtual void serialize(std::ostream &os); - virtual void unserialize(Checkpoint *cp, const std::string §ion); + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + void process(); + void startup(); void registerNetwork(Network*); void registerProfiler(Profiler*); - void registerTracer(Tracer*); void registerAbstractController(AbstractController*); + void registerSparseMemory(SparseMemory*); private: // Private copy constructor and assignment operator @@ -130,6 +137,11 @@ class RubySystem : public SimObject void init(); static void printSystemConfig(std::ostream& out); + void readCompressedTrace(std::string filename, + uint8*& raw_data, + uint64& uncompressed_trace_size); + void writeCompressedTrace(uint8* raw_data, std::string file, + uint64 uncompressed_trace_size); private: // configuration parameters @@ -140,14 +152,16 @@ class RubySystem : public SimObject static int m_block_size_bits; static uint64 m_memory_size_bytes; static int m_memory_size_bits; - static Network* m_network_ptr; public: static Profiler* m_profiler_ptr; - static Tracer* m_tracer_ptr; static MemoryVector* m_mem_vec_ptr; std::vector<AbstractController*> m_abs_cntrl_vec; + bool m_warmup_enabled; + bool m_cooldown_enabled; + CacheRecorder* m_cache_recorder; + std::vector<SparseMemory*> m_sparse_memory_vector; }; inline std::ostream& diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index a3ea1ca8a..85df3f9e8 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -264,6 +264,8 @@ public: void clearStats(); void blockOnQueue(Address addr, MessageBuffer* port); void unblock(Address addr); + void recordCacheTrace(int cntrl, CacheRecorder* tr); + Sequencer* getSequencer() const; private: ''') @@ -674,6 +676,12 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v else: mq_ident = "NULL" + seq_ident = "NULL" + for param in self.config_parameters: + if param.name == "sequencer": + assert(param.pointer) + seq_ident = "m_%s_ptr" % param.name + code(''' int $c_ident::getNumControllers() @@ -687,6 +695,12 @@ $c_ident::getMandatoryQueue() const return $mq_ident; } +Sequencer* +$c_ident::getSequencer() const +{ + return $seq_ident; +} + const int & $c_ident::getVersion() const { @@ -875,6 +889,23 @@ $c_ident::unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr) code(''' +void +$c_ident::recordCacheTrace(int cntrl, CacheRecorder* tr) +{ +''') + # + # Record cache contents for all associated caches. + # + code.indent() + for param in self.config_parameters: + if param.type_ast.type.ident == "CacheMemory": + assert(param.pointer) + code('m_${{param.ident}}_ptr->recordCacheContents(cntrl, tr);') + + code.dedent() + code(''' +} + // Actions ''') if self.TBEType != None and self.EntryType != None: diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index 60693758c..47ca32af2 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -874,29 +874,62 @@ class SimObject(object): if hasattr(self, 'type'): print >>ini_file, 'type=%s' % self.type - child_names = self._children.keys() - child_names.sort() - if len(child_names): + if len(self._children.keys()): print >>ini_file, 'children=%s' % \ - ' '.join(self._children[n].get_name() for n in child_names) + ' '.join(self._children[n].get_name() \ + for n in sorted(self._children.keys())) - param_names = self._params.keys() - param_names.sort() - for param in param_names: + for param in sorted(self._params.keys()): value = self._values.get(param) if value != None: print >>ini_file, '%s=%s' % (param, self._values[param].ini_str()) - port_names = self._ports.keys() - port_names.sort() - for port_name in port_names: + for port_name in sorted(self._ports.keys()): port = self._port_refs.get(port_name, None) if port != None: print >>ini_file, '%s=%s' % (port_name, port.ini_str()) print >>ini_file # blank line between objects + # generate a tree of dictionaries expressing all the parameters in the + # instantiated system for use by scripts that want to do power, thermal + # visualization, and other similar tasks + def get_config_as_dict(self): + d = attrdict() + if hasattr(self, 'type'): + d.type = self.type + if hasattr(self, 'cxx_class'): + d.cxx_class = self.cxx_class + + for param in sorted(self._params.keys()): + value = self._values.get(param) + try: + # Use native type for those supported by JSON and + # strings for everything else. skipkeys=True seems + # to not work as well as one would hope + if type(self._values[param].value) in \ + [str, unicode, int, long, float, bool, None]: + d[param] = self._values[param].value + else: + d[param] = str(self._values[param]) + + except AttributeError: + pass + + for n in sorted(self._children.keys()): + d[self._children[n].get_name()] = self._children[n].get_config_as_dict() + + for port_name in sorted(self._ports.keys()): + port = self._port_refs.get(port_name, None) + if port != None: + # Might want to actually make this reference the object + # in the future, although execing the string problem would + # get some of the way there + d[port_name] = port.ini_str() + + return d + def getCCParams(self): if self._ccParams: return self._ccParams diff --git a/src/python/m5/main.py b/src/python/m5/main.py index 58de62cc3..910cb6ce6 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -87,6 +87,8 @@ def parse_options(): group("Configuration Options") option("--dump-config", metavar="FILE", default="config.ini", help="Dump configuration output file [Default: %default]") + option("--json-config", metavar="FILE", default="config.json", + help="Create JSON output of the configuration [Default: %default]") # Debugging options group("Debugging Options") @@ -121,7 +123,6 @@ def parse_options(): execfile(options_file, scope) arguments = options.parse_args() - return options,arguments def interact(scope): diff --git a/src/python/m5/params.py b/src/python/m5/params.py index ee3678dc9..05fe9b774 100644 --- a/src/python/m5/params.py +++ b/src/python/m5/params.py @@ -228,6 +228,12 @@ class SimObjectVector(VectorParamValue): for obj in v.descendants(): yield obj + def get_config_as_dict(self): + a = [] + for v in self: + a.append(v.get_config_as_dict()) + return a + class VectorParamDesc(ParamDesc): # Convert assigned value to appropriate type. If the RHS is not a # list or tuple, it generates a single-element list. @@ -256,6 +262,9 @@ class VectorParamDesc(ParamDesc): self.ptype.cxx_predecls(code) code('%}') code() + # Make sure the SWIGPY_SLICE_ARG is defined through this inclusion + code('%include "std_container.i"') + code() self.ptype.swig_predecls(code) code() code('%include "std_vector.i"') @@ -961,6 +970,9 @@ class Time(ParamValue): def ini_str(self): return str(self) + def get_config_as_dict(self): + return str(self) + # Enumerated types are a little more complex. The user specifies the # type as Enum(foo) where foo is either a list or dictionary of # alternatives (typically strings, but not necessarily so). (In the diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index b4ccf82c1..38129592c 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -40,6 +40,7 @@ import SimObject import ticks import objects from util import fatal +from util import attrdict # define a MaxTick parameter MaxTick = 2**63 - 1 @@ -71,6 +72,17 @@ def instantiate(ckpt_dir=None): obj.print_ini(ini_file) ini_file.close() + if options.json_config: + try: + import json + json_file = file(os.path.join(options.outdir, options.json_config), 'w') + d = root.get_config_as_dict() + json.dump(d, json_file, indent=4) + json_file.close() + except ImportError: + pass + + # Initialize the global statistics stats.initSimStats() diff --git a/src/sim/System.py b/src/sim/System.py index db214abc0..39505c01a 100644 --- a/src/sim/System.py +++ b/src/sim/System.py @@ -54,8 +54,8 @@ class System(SimObject): physmem = Param.PhysicalMemory("Physical Memory") mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in") memories = VectorParam.PhysicalMemory(Self.all, "All memories is the system") - work_item_id = Param.Int(-1, "specific work item id") + num_work_ids = Param.Int(16, "Number of distinct work item types") work_begin_cpu_id_exit = Param.Int(-1, "work started on specific id, now exit simulation") work_begin_ckpt_count = Param.Counter(0, diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index 749afeb3b..d5bc8fa0e 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -417,6 +417,7 @@ workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid) tc->getCpuPtr()->workItemBegin(); System *sys = tc->getSystemPtr(); const System::Params *params = sys->params(); + sys->workItemBegin(threadid, workid); DPRINTF(WorkItems, "Work Begin workid: %d, threadid %d\n", workid, threadid); @@ -473,6 +474,7 @@ workend(ThreadContext *tc, uint64_t workid, uint64_t threadid) tc->getCpuPtr()->workItemEnd(); System *sys = tc->getSystemPtr(); const System::Params *params = sys->params(); + sys->workItemEnd(threadid, workid); DPRINTF(WorkItems, "Work End workid: %d, threadid %d\n", workid, threadid); diff --git a/src/sim/system.cc b/src/sim/system.cc index 3051cb64b..d3bee1ad1 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2006 The Regents of The University of Michigan * Copyright (c) 2011 Regents of the University of California * All rights reserved. @@ -43,6 +55,7 @@ #include "config/the_isa.hh" #include "cpu/thread_context.hh" #include "debug/Loader.hh" +#include "debug/WorkItems.hh" #include "kern/kernel_stats.hh" #include "mem/mem_object.hh" #include "mem/physical.hh" @@ -68,8 +81,9 @@ System::System(Params *p) memoryMode(p->mem_mode), workItemsBegin(0), workItemsEnd(0), + numWorkIds(p->num_work_ids), _params(p), - totalNumInsts(0), + totalNumInsts(0), instEventQueue("system instruction-based event queue") { // add self to global system list @@ -158,6 +172,9 @@ System::~System() { delete kernelSymtab; delete kernel; + + for (uint32_t j = 0; j < numWorkIds; j++) + delete workItemStats[j]; } void @@ -320,6 +337,37 @@ System::unserialize(Checkpoint *cp, const string §ion) } void +System::regStats() +{ + for (uint32_t j = 0; j < numWorkIds ; j++) { + workItemStats[j] = new Stats::Histogram(); + stringstream namestr; + ccprintf(namestr, "work_item_type%d", j); + workItemStats[j]->init(20) + .name(name() + "." + namestr.str()) + .desc("Run time stat for" + namestr.str()) + .prereq(*workItemStats[j]); + } +} + +void +System::workItemEnd(uint32_t tid, uint32_t workid) +{ + std::pair<uint32_t,uint32_t> p(tid, workid); + if (!lastWorkItemStarted.count(p)) + return; + + Tick samp = curTick() - lastWorkItemStarted[p]; + DPRINTF(WorkItems, "Work item end: %d\t%d\t%lld\n", tid, workid, samp); + + if (workid >= numWorkIds) + fatal("Got workid greater than specified in system configuration\n"); + + workItemStats[workid]->sample(samp); + lastWorkItemStarted.erase(p); +} + +void System::printSystems() { vector<System *>::iterator i = systemList.begin(); diff --git a/src/sim/system.hh b/src/sim/system.hh index 00d8360e0..d675eb727 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -157,14 +157,16 @@ class System : public SimObject Enums::MemoryMode memoryMode; uint64_t workItemsBegin; uint64_t workItemsEnd; + uint32_t numWorkIds; std::vector<bool> activeCpus; public: + virtual void regStats(); /** * Called by pseudo_inst to track the number of work items started by this * system. */ - uint64_t + uint64_t incWorkItemsBegin() { return ++workItemsBegin; @@ -198,6 +200,14 @@ class System : public SimObject return count; } + inline void workItemBegin(uint32_t tid, uint32_t workid) + { + std::pair<uint32_t,uint32_t> p(tid, workid); + lastWorkItemStarted[p] = curTick(); + } + + void workItemEnd(uint32_t tid, uint32_t workid); + /** * Fix up an address used to match PCs for hooking simulator * events on to target function executions. See comment in @@ -285,6 +295,8 @@ class System : public SimObject public: Counter totalNumInsts; EventQueue instEventQueue; + std::map<std::pair<uint32_t,uint32_t>, Tick> lastWorkItemStarted; + std::map<uint32_t, Stats::Histogram*> workItemStats; //////////////////////////////////////////// // |