diff options
36 files changed, 845 insertions, 803 deletions
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 3a705258d..0ec4c9861 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -324,12 +324,7 @@ InOrderCPU::InOrderCPU(Params *params) tid, asid[tid]); - dummyReq[tid] = new ResourceRequest(resPool->getResource(0), - dummyInst[tid], - 0, - 0, - 0, - 0); + dummyReq[tid] = new ResourceRequest(resPool->getResource(0)); } dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0); @@ -361,6 +356,17 @@ InOrderCPU::InOrderCPU(Params *params) InOrderCPU::~InOrderCPU() { delete resPool; + + std::map<SkedID, ThePipeline::RSkedPtr>::iterator sked_it = + skedCache.begin(); + std::map<SkedID, ThePipeline::RSkedPtr>::iterator sked_end = + skedCache.end(); + + while (sked_it != sked_end) { + delete (*sked_it).second; + sked_it++; + } + skedCache.clear(); } std::map<InOrderCPU::SkedID, ThePipeline::RSkedPtr> InOrderCPU::skedCache; @@ -459,13 +465,13 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) W.needs(ExecUnit, ExecutionUnit::ExecuteInst); } + W.needs(Grad, GraduationUnit::GraduateInst); + for (int idx=0; idx < inst->numDestRegs(); idx++) { W.needs(RegManager, UseDefUnit::WriteDestReg, idx); } - W.needs(Grad, GraduationUnit::GraduateInst); - - // Insert Front Schedule into our cache of + // Insert Back Schedule into our cache of // resource schedules addToSkedCache(inst, res_sked); @@ -636,8 +642,7 @@ InOrderCPU::tick() } activityRec.advance(); - // Any squashed requests, events, or insts then remove them now - cleanUpRemovedReqs(); + // Any squashed events, or insts then remove them now cleanUpRemovedEvents(); cleanUpRemovedInsts(); @@ -1436,28 +1441,6 @@ InOrderCPU::cleanUpRemovedInsts() } void -InOrderCPU::cleanUpRemovedReqs() -{ - while (!reqRemoveList.empty()) { - ResourceRequest *res_req = reqRemoveList.front(); - - DPRINTF(RefCount, "[tid:%i] [sn:%lli]: Removing Request " - "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n", - res_req->inst->threadNumber, - res_req->inst->seqNum, - res_req->getStageNum(), - res_req->res->name(), - (res_req->isCompleted()) ? - res_req->getComplSlot() : res_req->getSlot(), - res_req->isCompleted()); - - reqRemoveList.pop(); - - delete res_req; - } -} - -void InOrderCPU::cleanUpRemovedEvents() { while (!cpuEventRemoveList.empty()) { diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 2a5c815e1..2fa6bdc59 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -315,6 +315,7 @@ class InOrderCPU : public BaseCPU void addToSkedCache(DynInstPtr inst, ThePipeline::RSkedPtr inst_sked) { SkedID sked_id = genSkedID(inst); + assert(skedCache.find(sked_id) == skedCache.end()); skedCache[sked_id] = inst_sked; } @@ -593,10 +594,7 @@ class InOrderCPU : public BaseCPU /** Cleans up all instructions on the instruction remove list. */ void cleanUpRemovedInsts(); - /** Cleans up all instructions on the request remove list. */ - void cleanUpRemovedReqs(); - - /** Cleans up all instructions on the CPU event remove list. */ + /** Cleans up all events on the CPU event remove list. */ void cleanUpRemovedEvents(); /** Debug function to print all instructions on the list. */ @@ -626,11 +624,6 @@ class InOrderCPU : public BaseCPU */ std::queue<ListIt> removeList; - /** List of all the resource requests that will be removed at the end - * of this cycle. - */ - std::queue<ResourceRequest*> reqRemoveList; - /** List of all the cpu event requests that will be removed at the end of * the current cycle. */ diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index bc31a8537..b267ac00e 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -44,12 +44,17 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num) stageBufferMax(params->stageWidth), prevStageValid(false), nextStageValid(false), idle(false) { - switchedOutBuffer.resize(ThePipeline::MaxThreads); - switchedOutValid.resize(ThePipeline::MaxThreads); - init(params); } +PipelineStage::~PipelineStage() +{ + for(ThreadID tid = 0; tid < numThreads; tid++) { + skidBuffer[tid].clear(); + stalls[tid].resources.clear(); + } +} + void PipelineStage::init(Params *params) { @@ -66,6 +71,12 @@ PipelineStage::init(Params *params) else lastStallingStage[tid] = NumStages - 1; } + + if ((InOrderCPU::ThreadModel) params->threadModel == + InOrderCPU::SwitchOnCacheMiss) { + switchedOutBuffer.resize(ThePipeline::MaxThreads); + switchedOutValid.resize(ThePipeline::MaxThreads); + } } @@ -190,9 +201,6 @@ PipelineStage::takeOverFrom() stalls[tid].resources.clear(); - while (!insts[tid].empty()) - insts[tid].pop(); - skidBuffer[tid].clear(); } wroteToTimeBuffer = false; @@ -938,8 +946,11 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); ResReqPtr req = cpu->resPool->request(res_num, inst); + assert(req->valid); - if (req->isCompleted()) { + bool req_completed = req->isCompleted(); + bool done_in_pipeline = false; + if (req_completed) { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s " "completed.\n", tid, inst->seqNum, cpu->resPool->name(res_num)); @@ -948,11 +959,10 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) req->stagePasses++; - bool done_in_pipeline = inst->finishSkedEntry(); + done_in_pipeline = inst->finishSkedEntry(); if (done_in_pipeline) { DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] finished " "in pipeline.\n", tid, inst->seqNum); - break; } } else { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." @@ -989,21 +999,18 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) "thread due to cache miss.\n"); cpu->activateNextReadyContext(); } - - // Mark request for deletion - // if it isnt currently being used by a resource - if (!req->hasSlot()) { - DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no " - "slot in resource.\n", inst->seqNum); - - cpu->reqRemoveList.push(req); - } else { - DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, " - "in resource [slot:%i].\n", inst->seqNum, - req->getSlot()); - } - - + } + + // If this request is no longer needs to take up bandwidth in the + // resource, go ahead and free that bandwidth up + if (req->doneInResource) { + req->freeSlot(); + } + + // No longer need to process this instruction if the last + // request it had wasn't completed or if there is nothing + // else for it to do in the pipeline + if (done_in_pipeline || !req_completed) { break; } diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index dfa88de87..ec70fefc5 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -91,10 +91,7 @@ class PipelineStage public: PipelineStage(Params *params, unsigned stage_num); - /** MUST use init() function if this constructor is used. */ - PipelineStage() { } - - virtual ~PipelineStage() { } + virtual ~PipelineStage(); /** PipelineStage initialization. */ void init(Params *params); @@ -268,16 +265,6 @@ class PipelineStage */ unsigned instsProcessed; - /** Queue of all instructions coming from previous stage on this cycle. */ - std::queue<DynInstPtr> insts[ThePipeline::MaxThreads]; - - /** Queue of instructions that are finished processing and ready to go - * next stage. This is used to prevent from processing an instrution more - * than once on any stage. NOTE: It is up to the PROGRAMMER must manage - * this as a queue - */ - std::list<DynInstPtr> instsToNextStage; - /** Skid buffer between previous stage and this one. */ std::list<DynInstPtr> skidBuffer[ThePipeline::MaxThreads]; diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index 75f01adb1..573c0200a 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -51,7 +51,7 @@ class ResourceSked; namespace ThePipeline { // Pipeline Constants const unsigned NumStages = 5; - const ThreadID MaxThreads = 8; + const ThreadID MaxThreads = 1; const unsigned BackEndStartStage = 2; // List of Resources The Pipeline Uses diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc index 98a0727a9..48820b50e 100644 --- a/src/cpu/inorder/reg_dep_map.cc +++ b/src/cpu/inorder/reg_dep_map.cc @@ -45,6 +45,14 @@ RegDepMap::RegDepMap(int size) regMap.resize(size); } +RegDepMap::~RegDepMap() +{ + for (int i = 0; i < regMap.size(); i++) { + regMap[i].clear(); + } + regMap.clear(); +} + string RegDepMap::name() { diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh index fa4fe45f3..047e4d129 100644 --- a/src/cpu/inorder/reg_dep_map.hh +++ b/src/cpu/inorder/reg_dep_map.hh @@ -48,7 +48,7 @@ class RegDepMap public: RegDepMap(int size = TheISA::TotalNumRegs); - ~RegDepMap() { } + ~RegDepMap(); std::string name(); diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 72b45dda8..24211532e 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -31,6 +31,8 @@ #include <vector> #include <list> + +#include "base/str.hh" #include "cpu/inorder/resource.hh" #include "cpu/inorder/cpu.hh" using namespace std; @@ -40,22 +42,42 @@ Resource::Resource(string res_name, int res_id, int res_width, : resName(res_name), id(res_id), width(res_width), latency(res_latency), cpu(_cpu) { + reqs.resize(width); + // Use to deny a instruction a resource. - deniedReq = new ResourceRequest(this, NULL, 0, 0, 0, 0); + deniedReq = new ResourceRequest(this); + deniedReq->valid = true; } Resource::~Resource() { - delete [] resourceEvent; - delete deniedReq; + if (resourceEvent) { + delete [] resourceEvent; + } + + delete deniedReq; + + for (int i = 0; i < width; i++) { + delete reqs[i]; + } } void Resource::init() { - // Set Up Resource Events to Appropriate Resource BandWidth - resourceEvent = new ResourceEvent[width]; + // If the resource has a zero-cycle (no latency) + // function, then no reason to have events + // that will process them for the right tick + if (latency > 0) { + resourceEvent = new ResourceEvent[width]; + } else { + resourceEvent = NULL; + } + + for (int i = 0; i < width; i++) { + reqs[i] = new ResourceRequest(this); + } initSlots(); } @@ -66,7 +88,10 @@ Resource::initSlots() // Add available slot numbers for resource for (int slot_idx = 0; slot_idx < width; slot_idx++) { availSlots.push_back(slot_idx); - resourceEvent[slot_idx].init(this, slot_idx); + + if (resourceEvent) { + resourceEvent[slot_idx].init(this, slot_idx); + } } } @@ -91,42 +116,34 @@ Resource::slotsInUse() void Resource::freeSlot(int slot_idx) { + DPRINTF(Resource, "Deallocating [slot:%i].\n", + slot_idx); + // Put slot number on this resource's free list availSlots.push_back(slot_idx); - // Erase Request Pointer From Request Map - std::map<int, ResReqPtr>::iterator req_it = reqMap.find(slot_idx); - - assert(req_it != reqMap.end()); - reqMap.erase(req_it); - + // Invalidate Request & Reset it's flags + reqs[slot_idx]->clearRequest(); } -// TODO: More efficiently search for instruction's slot within -// resource. int Resource::findSlot(DynInstPtr inst) { - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - int slot_num = -1; - while (map_it != map_end) { - if ((*map_it).second->getInst()->seqNum == - inst->seqNum) { - slot_num = (*map_it).second->getSlot(); + for (int i = 0; i < width; i++) { + if (reqs[i]->valid && + reqs[i]->getInst()->seqNum == inst->seqNum) { + slot_num = reqs[i]->getSlot(); } - map_it++; } - return slot_num; } int Resource::getSlot(DynInstPtr inst) { - int slot_num; + int slot_num = -1; if (slotsAvail() != 0) { slot_num = availSlots[0]; @@ -136,24 +153,6 @@ Resource::getSlot(DynInstPtr inst) assert(slot_num == *vect_it); availSlots.erase(vect_it); - } else { - DPRINTF(Resource, "[tid:%i]: No slots in resource " - "available to service [sn:%i].\n", inst->readTid(), - inst->seqNum); - slot_num = -1; - - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - if ((*map_it).second) { - DPRINTF(Resource, "Currently Serving request from: " - "[tid:%i] [sn:%i].\n", - (*map_it).second->getInst()->readTid(), - (*map_it).second->getInst()->seqNum); - } - map_it++; - } } return slot_num; @@ -183,6 +182,9 @@ Resource::request(DynInstPtr inst) slot_num = getSlot(inst); if (slot_num != -1) { + DPRINTF(Resource, "Allocating [slot:%i] for [tid:%i]: [sn:%i]\n", + slot_num, inst->readTid(), inst->seqNum); + // Get Stage # from Schedule Entry stage_num = inst->curSkedEntry->stageNum; unsigned cmd = inst->curSkedEntry->cmd; @@ -200,10 +202,12 @@ Resource::request(DynInstPtr inst) inst->readTid()); } - reqMap[slot_num] = inst_req; - try_request = true; + } else { + DPRINTF(Resource, "No slot available for [tid:%i]: [sn:%i]\n", + inst->readTid(), inst->seqNum); } + } if (try_request) { @@ -236,32 +240,21 @@ ResReqPtr Resource::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - return new ResourceRequest(this, inst, stage_num, id, slot_num, - cmd); + reqs[slot_num]->setRequest(inst, stage_num, id, slot_num, cmd); + return reqs[slot_num]; } ResReqPtr Resource::findRequest(DynInstPtr inst) { - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - bool found = false; - ResReqPtr req = NULL; - - while (map_it != map_end) { - if ((*map_it).second && - (*map_it).second->getInst() == inst) { - req = (*map_it).second; - //return (*map_it).second; - assert(found == false); - found = true; + for (int i = 0; i < width; i++) { + if (reqs[i]->valid && + reqs[i]->getInst() == inst) { + return reqs[i]; } - map_it++; } - return req; - //return NULL; + return NULL; } void @@ -275,9 +268,9 @@ void Resource::execute(int slot_idx) { DPRINTF(Resource, "[tid:%i]: Executing %s resource.\n", - reqMap[slot_idx]->getTid(), name()); - reqMap[slot_idx]->setCompleted(true); - reqMap[slot_idx]->done(); + reqs[slot_idx]->getTid(), name()); + reqs[slot_idx]->setCompleted(true); + reqs[slot_idx]->done(); } void @@ -293,15 +286,10 @@ void Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { - std::vector<int> slot_remove_list; + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; - - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -316,19 +304,8 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, if (resourceEvent[req_slot_num].scheduled()) unscheduleEvent(req_slot_num); - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } - - map_it++; - } - - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) { - freeSlot(slot_remove_list[i]); } } @@ -350,10 +327,8 @@ Resource::ticks(int num_cycles) void Resource::scheduleExecution(int slot_num) { - int res_latency = getLatency(slot_num); - - if (res_latency >= 1) { - scheduleEvent(slot_num, res_latency); + if (latency >= 1) { + scheduleEvent(slot_num, latency); } else { execute(slot_num); } @@ -363,8 +338,8 @@ void Resource::scheduleEvent(int slot_idx, int delay) { DPRINTF(Resource, "[tid:%i]: Scheduling event for [sn:%i] on tick %i.\n", - reqMap[slot_idx]->inst->readTid(), - reqMap[slot_idx]->inst->seqNum, + reqs[slot_idx]->inst->readTid(), + reqs[slot_idx]->inst->seqNum, cpu->ticks(delay) + curTick()); resourceEvent[slot_idx].scheduleEvent(delay); } @@ -401,32 +376,11 @@ int ResourceRequest::resReqID = 0; int ResourceRequest::maxReqCount = 0; -ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, - int stage_num, int res_idx, int slot_num, - unsigned _cmd) - : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), - resIdx(res_idx), slotNum(slot_num), completed(false), - squashed(false), processing(false), memStall(false) +ResourceRequest::ResourceRequest(Resource *_res) + : res(_res), inst(NULL), stagePasses(0), valid(false), doneInResource(false), + completed(false), squashed(false), processing(false), + memStall(false) { -#ifdef DEBUG - reqID = resReqID++; - res->cpu->resReqCount++; - DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, - res->cpu->resReqCount); - - if (res->cpu->resReqCount > 100) { - fatal("Too many undeleted resource requests. Memory leak?\n"); - } - - if (res->cpu->resReqCount > maxReqCount) { - maxReqCount = res->cpu->resReqCount; - } - -#endif - - stagePasses = 0; - complSlotNum = -1; - } ResourceRequest::~ResourceRequest() @@ -436,6 +390,46 @@ ResourceRequest::~ResourceRequest() DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, res->cpu->resReqCount); #endif + inst = NULL; +} + +std::string +ResourceRequest::name() +{ + return res->name() + "." + to_string(slotNum); +} + +void +ResourceRequest::setRequest(DynInstPtr _inst, int stage_num, + int res_idx, int slot_num, unsigned _cmd) +{ + valid = true; + inst = _inst; + stageNum = stage_num; + resIdx = res_idx; + slotNum = slot_num; + cmd = _cmd; +} + +void +ResourceRequest::clearRequest() +{ + valid = false; + inst = NULL; + stagePasses = 0; + completed = false; + doneInResource = false; + squashed = false; + memStall = false; +} + +void +ResourceRequest::freeSlot() +{ + assert(res); + + // Free Slot So Another Instruction Can Use This Resource + res->freeSlot(slotNum); } void @@ -447,25 +441,7 @@ ResourceRequest::done(bool completed) setCompleted(completed); - // Used for debugging purposes - if (completed) { - complSlotNum = slotNum; - - // Would like to start a convention such as all requests deleted in - // resources/pipeline - // but a little more complex then it seems... - // For now, all COMPLETED requests deleted in resource.. - // all FAILED requests deleted in pipeline stage - // *all SQUASHED requests deleted in resource - res->cpu->reqRemoveList.push(res->reqMap[slotNum]); - } - - // Free Slot So Another Instruction Can Use This Resource - res->freeSlot(slotNum); - - // change slot # to -1, since we check slotNum to see if request is - // still valid - slotNum = -1; + doneInResource = true; } ResourceEvent::ResourceEvent() @@ -493,7 +469,8 @@ ResourceEvent::process() const char * ResourceEvent::description() { - string desc = resource->name() + " event"; + string desc = resource->name() + "-event:slot[" + to_string(slotIdx) + + "]"; return desc.c_str(); } diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index bd9ec48ca..7899a215f 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -221,8 +221,10 @@ class Resource { const int latency; public: - /** Mapping of slot-numbers to the resource-request pointers */ - std::map<int, ResReqPtr> reqMap; + /** List of all Requests the Resource is Servicing. Each request + represents part of the resource's bandwidth + */ + std::vector<ResReqPtr> reqs; /** A list of all the available execution slots for this resource. * This correlates with the actual resource event idx. @@ -245,7 +247,7 @@ class Resource { class ResourceEvent : public Event { public: - /** Pointer to the CPU. */ + /** Pointer to the Resource this is an event for */ Resource *resource; @@ -297,21 +299,29 @@ class ResourceRequest static int maxReqCount; + friend class Resource; + public: - ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num, - int res_idx, int slot_num, unsigned _cmd); + ResourceRequest(Resource *_res); virtual ~ResourceRequest(); + + std::string name(); int reqID; + virtual void setRequest(DynInstPtr _inst, int stage_num, + int res_idx, int slot_num, unsigned _cmd); + + virtual void clearRequest(); + /** Acknowledge that this is a request is done and remove * from resource. */ void done(bool completed = true); - - short stagePasses; + void freeSlot(); + ///////////////////////////////////////////// // // GET RESOURCE REQUEST IDENTIFICATION / INFO @@ -319,11 +329,9 @@ class ResourceRequest ///////////////////////////////////////////// /** Get Resource Index */ int getResIdx() { return resIdx; } - /** Get Slot Number */ int getSlot() { return slotNum; } - int getComplSlot() { return complSlotNum; } bool hasSlot() { return slotNum >= 0; } /** Get Stage Number */ @@ -353,6 +361,12 @@ class ResourceRequest /** Command For This Resource */ unsigned cmd; + short stagePasses; + + bool valid; + + bool doneInResource; + //////////////////////////////////////// // // GET RESOURCE REQUEST STATUS FROM VARIABLES @@ -380,7 +394,6 @@ class ResourceRequest int stageNum; int resIdx; int slotNum; - int complSlotNum; /** Resource Request Status */ bool completed; diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index e1914623a..4e2f930ab 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -55,7 +55,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) memObjects.push_back(ICache); resources.push_back(new FetchUnit("icache_port", ICache, - stage_width * MaxThreads, 0, _cpu, + stage_width * 2 + MaxThreads, 0, _cpu, params)); resources.push_back(new DecodeUnit("Decode-Unit", Decode, @@ -68,7 +68,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) 0, _cpu, params)); resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, - stage_width * MaxThreads, 0, _cpu, + stage_width * 3, 0, _cpu, params)); resources.push_back(new AGENUnit("AGEN-Unit", AGEN, @@ -77,16 +77,16 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, stage_width, 0, _cpu, params)); - resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, - params)); + resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, + stage_width * 2, 0, _cpu, params)); memObjects.push_back(DCache); resources.push_back(new CacheUnit("dcache_port", DCache, - stage_width * MaxThreads, 0, _cpu, + stage_width * 2 + MaxThreads, 0, _cpu, params)); resources.push_back(new GraduationUnit("Graduation-Unit", Grad, - stage_width * MaxThreads, 0, _cpu, + stage_width, 0, _cpu, params)); resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, diff --git a/src/cpu/inorder/resources/agen_unit.cc b/src/cpu/inorder/resources/agen_unit.cc index f1862b94a..764cd9446 100644 --- a/src/cpu/inorder/resources/agen_unit.cc +++ b/src/cpu/inorder/resources/agen_unit.cc @@ -50,8 +50,8 @@ AGENUnit::regStats() void AGENUnit::execute(int slot_num) { - ResourceRequest* agen_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* agen_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; #if TRACING_ON ThreadID tid = inst->readTid(); #endif diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc index 8ca5a9718..5a22e40eb 100644 --- a/src/cpu/inorder/resources/branch_predictor.cc +++ b/src/cpu/inorder/resources/branch_predictor.cc @@ -66,7 +66,7 @@ BranchPredictor::execute(int slot_num) { // After this is working, change this to a reinterpret cast // for performance considerations - ResourceRequest* bpred_req = reqMap[slot_num]; + ResourceRequest* bpred_req = reqs[slot_num]; DynInstPtr inst = bpred_req->inst; ThreadID tid = inst->readTid(); int seq_num = inst->seqNum; diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 8cd105493..b17e5b3da 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -133,6 +133,10 @@ CacheUnit::getPort(const string &if_name, int idx) void CacheUnit::init() { + for (int i = 0; i < width; i++) { + reqs[i] = new CacheRequest(this); + } + // Currently Used to Model TLB Latency. Eventually // Switch to Timing TLB translations. resourceEvent = new CacheUnitEvent[width]; @@ -250,20 +254,16 @@ CacheUnit::removeAddrDependency(DynInstPtr inst) ResReqPtr CacheUnit::findRequest(DynInstPtr inst) { - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - while (map_it != map_end) { + for (int i = 0; i < width; i++) { CacheRequest* cache_req = - dynamic_cast<CacheRequest*>((*map_it).second); + dynamic_cast<CacheRequest*>(reqs[i]); assert(cache_req); - if (cache_req && + if (cache_req->valid && cache_req->getInst() == inst && cache_req->instIdx == inst->curSkedEntry->idx) { return cache_req; } - map_it++; } return NULL; @@ -272,20 +272,16 @@ CacheUnit::findRequest(DynInstPtr inst) ResReqPtr CacheUnit::findRequest(DynInstPtr inst, int idx) { - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - while (map_it != map_end) { + for (int i = 0; i < width; i++) { CacheRequest* cache_req = - dynamic_cast<CacheRequest*>((*map_it).second); + dynamic_cast<CacheRequest*>(reqs[i]); assert(cache_req); - if (cache_req && + if (cache_req->valid && cache_req->getInst() == inst && cache_req->instIdx == idx) { return cache_req; } - map_it++; } return NULL; @@ -297,6 +293,7 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { ScheduleEntry* sched_entry = *inst->curSkedEntry; + CacheRequest* cache_req = dynamic_cast<CacheRequest*>(reqs[slot_num]); if (!inst->validMemAddr()) { panic("Mem. Addr. must be set before requesting cache access\n"); @@ -343,10 +340,10 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, sched_entry->cmd, name()); } - return new CacheRequest(this, inst, stage_num, id, slot_num, - sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId(), - inst->curSkedEntry->idx); + cache_req->setRequest(inst, stage_num, id, slot_num, + sched_entry->cmd, pkt_cmd, + inst->curSkedEntry->idx); + return cache_req; } void @@ -651,8 +648,6 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, if (inst->fault == NoFault) { if (!cache_req->splitAccess) { - // Remove this line since storeData is saved in INST? - cache_req->reqData = new uint8_t[size]; doCacheAccess(inst, write_res); } else { doCacheAccess(inst, write_res, cache_req); @@ -667,16 +662,19 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, void CacheUnit::execute(int slot_num) { - CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqMap[slot_num]); + CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqs[slot_num]); assert(cache_req); - if (cachePortBlocked) { + if (cachePortBlocked && + (cache_req->cmd == InitiateReadData || + cache_req->cmd == InitiateWriteData || + cache_req->cmd == InitSecondSplitRead || + cache_req->cmd == InitSecondSplitWrite)) { DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); - cache_req->setCompleted(false); + cache_req->done(false); return; } - DynInstPtr inst = cache_req->inst; #if TRACING_ON ThreadID tid = inst->readTid(); @@ -693,7 +691,12 @@ CacheUnit::execute(int slot_num) acc_type = "read"; #endif case InitiateWriteData: - + if (cachePortBlocked) { + DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); + cache_req->done(false); + return; + } + DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i] Initiating data %s access to %s for " "addr. %08p\n", tid, inst->seqNum, acc_type, name(), @@ -808,7 +811,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr cache_req; if (split_req == NULL) { - cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]); + cache_req = dynamic_cast<CacheReqPtr>(reqs[inst->getCurResSlot()]); } else{ cache_req = split_req; } @@ -867,7 +870,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, "[tid:%i] [sn:%i] cannot access cache, because port " "is blocked. now waiting to retry request\n", tid, inst->seqNum); - cache_req->setCompleted(false); + cache_req->done(false); cachePortBlocked = true; } else { DPRINTF(InOrderCachePort, @@ -891,7 +894,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, // Make cache request again since access due to // inability to access DPRINTF(InOrderStall, "STALL: \n"); - cache_req->setCompleted(false); + cache_req->done(false); } } @@ -914,7 +917,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) cache_pkt->cacheReq->getTid(), cache_pkt->cacheReq->seqNum); - cache_pkt->cacheReq->done(); + cache_pkt->cacheReq->freeSlot(); delete cache_pkt; cpu->wakeCPU(); @@ -1059,10 +1062,10 @@ CacheUnitEvent::CacheUnitEvent() void CacheUnitEvent::process() { - DynInstPtr inst = resource->reqMap[slotIdx]->inst; - int stage_num = resource->reqMap[slotIdx]->getStageNum(); + DynInstPtr inst = resource->reqs[slotIdx]->inst; + int stage_num = resource->reqs[slotIdx]->getStageNum(); ThreadID tid = inst->threadNumber; - CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqMap[slotIdx]); + CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqs[slotIdx]); DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n", inst->seqNum); @@ -1073,13 +1076,15 @@ CacheUnitEvent::process() tlb_res->tlbBlocked[tid] = false; tlb_res->cpu->pipelineStage[stage_num]-> - unsetResStall(tlb_res->reqMap[slotIdx], tid); + unsetResStall(tlb_res->reqs[slotIdx], tid); req_ptr->tlbStall = false; if (req_ptr->isSquashed()) { - req_ptr->done(); + req_ptr->freeSlot(); } + + tlb_res->cpu->wakeCPU(); } void @@ -1124,15 +1129,10 @@ void CacheUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { - vector<int> slot_remove_list; - - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; - - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -1145,7 +1145,6 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, "squashed, ignoring squash process.\n", req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); - map_it++; continue; } @@ -1159,18 +1158,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, if (cache_req->tlbStall) { tlbBlocked[tid] = false; - int stall_stage = reqMap[req_slot_num]->getStageNum(); + int stall_stage = reqs[req_slot_num]->getStageNum(); cpu->pipelineStage[stall_stage]-> - unsetResStall(reqMap[req_slot_num], tid); + unsetResStall(reqs[req_slot_num], tid); } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } else { DPRINTF(InOrderCachePort, "[tid:%i] Request from [sn:%i] squashed, but still " @@ -1182,14 +1177,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum, req_ptr->getInst()->splitInst); } - } - - map_it++; } - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) - freeSlot(slot_remove_list[i]); } diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index afcb36a24..097b6fa7a 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -219,20 +219,18 @@ class CacheUnitEvent : public ResourceEvent { void process(); }; +//@todo: Move into CacheUnit Class for private access to "valid" field class CacheRequest : public ResourceRequest { public: - CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx, - int slot_num, unsigned cmd, int req_size, - MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx) - : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd), - pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL), - retryPkt(NULL), memAccComplete(false), memAccPending(false), - tlbStall(false), splitAccess(false), splitAccessNum(-1), - split2ndAccess(false), instIdx(idx), fetchBufferFill(false) + CacheRequest(CacheUnit *cres) + : ResourceRequest(cres), memReq(NULL), reqData(NULL), + dataPkt(NULL), retryPkt(NULL), memAccComplete(false), + memAccPending(false), tlbStall(false), splitAccess(false), + splitAccessNum(-1), split2ndAccess(false), + fetchBufferFill(false) { } - virtual ~CacheRequest() { if (reqData && !splitAccess) { @@ -240,6 +238,37 @@ class CacheRequest : public ResourceRequest } } + void setRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, + unsigned _cmd, MemCmd::Command pkt_cmd, int idx) + { + pktCmd = pkt_cmd; + instIdx = idx; + + ResourceRequest::setRequest(_inst, stage_num, res_idx, slot_num, _cmd); + } + + void clearRequest() + { + if (reqData && !splitAccess) { + delete [] reqData; + } + + memReq = NULL; + reqData = NULL; + dataPkt = NULL; + retryPkt = NULL; + memAccComplete = false; + memAccPending = false; + tlbStall = false; + splitAccess = false; + splitAccessNum = -1; + split2ndAccess = false; + instIdx = 0; + fetchBufferFill = false; + + ResourceRequest::clearRequest(); + } + virtual PacketDataPtr getData() { return reqData; } diff --git a/src/cpu/inorder/resources/decode_unit.cc b/src/cpu/inorder/resources/decode_unit.cc index 42857c783..71d33ab90 100644 --- a/src/cpu/inorder/resources/decode_unit.cc +++ b/src/cpu/inorder/resources/decode_unit.cc @@ -49,8 +49,8 @@ DecodeUnit::DecodeUnit(std::string res_name, int res_id, int res_width, void DecodeUnit::execute(int slot_num) { - ResourceRequest* decode_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* decode_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; ThreadID tid = inst->readTid(); switch (decode_req->cmd) diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index 36bf2a4dc..b2540cff8 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -42,7 +42,7 @@ ExecutionUnit::ExecutionUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - lastExecuteTick(0), lastControlTick(0) + lastExecuteTick(0), lastControlTick(0), serializeTick(0) { } void @@ -82,27 +82,52 @@ ExecutionUnit::regStats() void ExecutionUnit::execute(int slot_num) { - ResourceRequest* exec_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* exec_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; Fault fault = NoFault; int seq_num = inst->seqNum; + Tick cur_tick = curTick(); + + if (cur_tick == serializeTick) { + DPRINTF(InOrderExecute, "Can not execute [tid:%i][sn:%i][PC:%s] %s. " + "All instructions are being serialized this cycle\n", + inst->readTid(), seq_num, inst->pcState(), inst->instName()); + exec_req->done(false); + return; + } - DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n", - inst->readTid(), seq_num, inst->pcState(), inst->instName()); switch (exec_req->cmd) { case ExecuteInst: { - if (curTick() != lastExecuteTick) { - lastExecuteTick = curTick(); + if (inst->isNop()) { + DPRINTF(InOrderExecute, "[tid:%i] [sn:%i] [PC:%s] Ignoring execution" + "of %s.\n", inst->readTid(), seq_num, inst->pcState(), + inst->instName()); + inst->setExecuted(); + exec_req->done(); + return; + } else { + DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n", + inst->readTid(), seq_num, inst->pcState(), inst->instName()); } + if (cur_tick != lastExecuteTick) { + lastExecuteTick = cur_tick; + } + + assert(!inst->isMemRef()); + + if (inst->isSerializeAfter()) { + serializeTick = cur_tick; + DPRINTF(InOrderExecute, "Serializing execution after [tid:%i] " + "[sn:%i] [PC:%s] %s.\n", inst->readTid(), seq_num, + inst->pcState(), inst->instName()); + } - if (inst->isMemRef()) { - panic("%s not configured to handle memory ops.\n", resName); - } else if (inst->isControl()) { - if (lastControlTick == curTick()) { + if (inst->isControl()) { + if (lastControlTick == cur_tick) { DPRINTF(InOrderExecute, "Can not Execute More than One Control " "Inst Per Cycle. Blocking Request.\n"); exec_req->done(false); diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh index a6694ddb5..b03a6655e 100644 --- a/src/cpu/inorder/resources/execution_unit.hh +++ b/src/cpu/inorder/resources/execution_unit.hh @@ -76,6 +76,7 @@ class ExecutionUnit : public Resource { Stats::Scalar executions; Tick lastExecuteTick; Tick lastControlTick; + Tick serializeTick; }; diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 6f84a333d..d23ea0a82 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -62,13 +62,17 @@ FetchSeqUnit::init() { resourceEvent = new FetchSeqEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new ResourceRequest(this); + } + initSlots(); } void FetchSeqUnit::execute(int slot_num) { - ResourceRequest* fs_req = reqMap[slot_num]; + ResourceRequest* fs_req = reqs[slot_num]; DynInstPtr inst = fs_req->inst; ThreadID tid = inst->readTid(); int stage_num = fs_req->getStageNum(); @@ -96,7 +100,7 @@ FetchSeqUnit::execute(int slot_num) fs_req->done(); } else { DPRINTF(InOrderStall, "STALL: [tid:%i]: NPC not valid\n", tid); - fs_req->setCompleted(false); + fs_req->done(false); } } break; diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc index 0a5483aff..a0d830ecf 100644 --- a/src/cpu/inorder/resources/fetch_unit.cc +++ b/src/cpu/inorder/resources/fetch_unit.cc @@ -56,6 +56,31 @@ FetchUnit::FetchUnit(string res_name, int res_id, int res_width, predecoder(NULL) { } +FetchUnit::~FetchUnit() +{ + std::list<FetchBlock*>::iterator fetch_it = fetchBuffer.begin(); + std::list<FetchBlock*>::iterator end_it = fetchBuffer.end(); + while (fetch_it != end_it) { + delete (*fetch_it)->block; + delete *fetch_it; + fetch_it++; + } + fetchBuffer.clear(); + + + std::list<FetchBlock*>::iterator pend_it = pendingFetch.begin(); + std::list<FetchBlock*>::iterator pend_end = pendingFetch.end(); + while (pend_it != pend_end) { + if ((*pend_it)->block) { + delete (*pend_it)->block; + } + + delete *pend_it; + pend_it++; + } + pendingFetch.clear(); +} + void FetchUnit::createMachInst(std::list<FetchBlock*>::iterator fetch_it, DynInstPtr inst) @@ -119,32 +144,23 @@ FetchUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { ScheduleEntry* sched_entry = *inst->curSkedEntry; + CacheRequest* cache_req = dynamic_cast<CacheRequest*>(reqs[slot_num]); if (!inst->validMemAddr()) { panic("Mem. Addr. must be set before requesting cache access\n"); } - MemCmd::Command pkt_cmd; - - switch (sched_entry->cmd) - { - case InitiateFetch: - pkt_cmd = MemCmd::ReadReq; + assert(sched_entry->cmd == InitiateFetch); - DPRINTF(InOrderCachePort, - "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n", - inst->readTid(), inst->seqNum, inst->getMemAddr()); - break; + DPRINTF(InOrderCachePort, + "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->getMemAddr()); - default: - panic("%i: Unexpected request type (%i) to %s", curTick(), - sched_entry->cmd, name()); - } + cache_req->setRequest(inst, stage_num, id, slot_num, + sched_entry->cmd, MemCmd::ReadReq, + inst->curSkedEntry->idx); - return new CacheRequest(this, inst, stage_num, id, slot_num, - sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId(), - inst->curSkedEntry->idx); + return cache_req; } void @@ -214,12 +230,12 @@ FetchUnit::markBlockUsed(std::list<FetchBlock*>::iterator block_it) void FetchUnit::execute(int slot_num) { - CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqMap[slot_num]); + CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqs[slot_num]); assert(cache_req); - if (cachePortBlocked) { + if (cachePortBlocked && cache_req->cmd == InitiateFetch) { DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); - cache_req->setCompleted(false); + cache_req->done(false); return; } @@ -270,7 +286,7 @@ FetchUnit::execute(int slot_num) // If not, block this request. if (pendingFetch.size() >= fetchBuffSize) { DPRINTF(InOrderCachePort, "No room available in fetch buffer.\n"); - cache_req->setCompleted(false); + cache_req->done(); return; } @@ -337,6 +353,8 @@ FetchUnit::execute(int slot_num) return; } + delete [] (*repl_it)->block; + delete *repl_it; fetchBuffer.erase(repl_it); } @@ -414,6 +432,7 @@ FetchUnit::processCacheCompletion(PacketPtr pkt) cache_pkt->cacheReq->seqNum); cache_pkt->cacheReq->done(); + cache_pkt->cacheReq->freeSlot(); delete cache_pkt; cpu->wakeCPU(); @@ -514,6 +533,10 @@ FetchUnit::squashCacheRequest(CacheReqPtr req_ptr) DPRINTF(InOrderCachePort, "[sn:%i] Removing Pending Fetch " "for block %08p (cnt=%i)\n", inst->seqNum, block_addr, (*block_it)->cnt); + if ((*block_it)->block) { + delete [] (*block_it)->block; + } + delete *block_it; pendingFetch.erase(block_it); } } diff --git a/src/cpu/inorder/resources/fetch_unit.hh b/src/cpu/inorder/resources/fetch_unit.hh index 035f3f4a1..fa133b9eb 100644 --- a/src/cpu/inorder/resources/fetch_unit.hh +++ b/src/cpu/inorder/resources/fetch_unit.hh @@ -55,6 +55,8 @@ class FetchUnit : public CacheUnit FetchUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); + virtual ~FetchUnit(); + typedef ThePipeline::DynInstPtr DynInstPtr; typedef TheISA::ExtMachInst ExtMachInst; diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 362641b54..edc2fb3ff 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -37,8 +37,7 @@ GraduationUnit::GraduationUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - lastCycleGrad(0), numCycleGrad(0) - + lastNonSpecTick(0) { for (ThreadID tid = 0; tid < ThePipeline::MaxThreads; tid++) { nonSpecInstActive[tid] = &cpu->nonSpecInstActive[tid]; @@ -49,8 +48,8 @@ GraduationUnit::GraduationUnit(std::string res_name, int res_id, int res_width, void GraduationUnit::execute(int slot_num) { - ResourceRequest* grad_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* grad_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; ThreadID tid = inst->readTid(); int stage_num = inst->curSkedEntry->stageNum; @@ -58,15 +57,18 @@ GraduationUnit::execute(int slot_num) { case GraduateInst: { - // Make sure this is the last thing on the resource schedule - // @todo: replace this check - // assert(inst->resSched.size() == 1); + if (lastNonSpecTick == curTick()) { + DPRINTF(InOrderGraduation, "Unable to graduate [sn:%i]. " + "Only 1 nonspec inst. per cycle can graduate.\n"); + grad_req->done(false); + return; + } - // Handle Any Faults Before Graduating Instruction + // Handle Any Faults Before Graduating Instruction if (inst->fault != NoFault) { cpu->trap(inst->fault, tid, inst); grad_req->setCompleted(false); - return; + return; } DPRINTF(InOrderGraduation, @@ -81,6 +83,7 @@ GraduationUnit::execute(int slot_num) DPRINTF(InOrderGraduation, "[tid:%i] Non-speculative inst [sn:%i] graduated\n", tid, inst->seqNum); + lastNonSpecTick = curTick(); } if (inst->traceData) { diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh index aae41993f..59631bfcb 100644 --- a/src/cpu/inorder/resources/graduation_unit.hh +++ b/src/cpu/inorder/resources/graduation_unit.hh @@ -57,9 +57,7 @@ class GraduationUnit : public Resource { void execute(int slot_num); protected: - Tick lastCycleGrad; - int numCycleGrad; - + Tick lastNonSpecTick; bool *nonSpecInstActive[ThePipeline::MaxThreads]; InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads]; diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc index 988fcd4da..46f5cce72 100644 --- a/src/cpu/inorder/resources/inst_buffer.cc +++ b/src/cpu/inorder/resources/inst_buffer.cc @@ -62,7 +62,7 @@ InstBuffer::regStats() void InstBuffer::execute(int slot_idx) { - ResReqPtr ib_req = reqMap[slot_idx]; + ResReqPtr ib_req = reqs[slot_idx]; DynInstPtr inst = ib_req->inst; ThreadID tid = inst->readTid(); int stage_num = ib_req->getStageNum(); diff --git a/src/cpu/inorder/resources/mult_div_unit.cc b/src/cpu/inorder/resources/mult_div_unit.cc index 042fb590b..ad8b2b47b 100644 --- a/src/cpu/inorder/resources/mult_div_unit.cc +++ b/src/cpu/inorder/resources/mult_div_unit.cc @@ -76,6 +76,10 @@ MultDivUnit::init() // Set Up Resource Events to Appropriate Resource BandWidth resourceEvent = new MDUEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new ResourceRequest(this); + } + initSlots(); } @@ -92,7 +96,7 @@ void MultDivUnit::freeSlot(int slot_idx) { DPRINTF(InOrderMDU, "Freeing slot for inst:%i\n | slots-free:%i | " - "slots-used:%i\n", reqMap[slot_idx]->getInst()->seqNum, + "slots-used:%i\n", reqs[slot_idx]->getInst()->seqNum, slotsAvail(), slotsInUse()); Resource::freeSlot(slot_idx); @@ -132,7 +136,7 @@ MultDivUnit::getSlot(DynInstPtr inst) // If we have this instruction's request already then return if (slot_num != -1 && - inst->curSkedEntry->cmd == reqMap[slot_num]->cmd) + inst->curSkedEntry->cmd == reqs[slot_num]->cmd) return slot_num; unsigned repeat_rate = 0; @@ -202,8 +206,8 @@ MultDivUnit::getDivOpSize(DynInstPtr inst) void MultDivUnit::execute(int slot_num) { - ResourceRequest* mult_div_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* mult_div_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; switch (mult_div_req->cmd) { @@ -275,8 +279,8 @@ MultDivUnit::execute(int slot_num) void MultDivUnit::exeMulDiv(int slot_num) { - ResourceRequest* mult_div_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* mult_div_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; inst->fault = inst->execute(); @@ -310,7 +314,7 @@ MDUEvent::process() mdu_res->exeMulDiv(slotIdx); - ResourceRequest* mult_div_req = resource->reqMap[slotIdx]; + ResourceRequest* mult_div_req = resource->reqs[slotIdx]; mult_div_req->done(); } diff --git a/src/cpu/inorder/resources/tlb_unit.cc b/src/cpu/inorder/resources/tlb_unit.cc index 2e19ea928..37aec2209 100644 --- a/src/cpu/inorder/resources/tlb_unit.cc +++ b/src/cpu/inorder/resources/tlb_unit.cc @@ -72,6 +72,10 @@ TLBUnit::init() { resourceEvent = new TLBUnitEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new TLBUnitRequest(this); + } + initSlots(); } @@ -90,8 +94,9 @@ TLBUnit::getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - return new TLBUnitRequest(this, _inst, stage_num, res_idx, slot_num, - cmd); + TLBUnitRequest *tlb_req = dynamic_cast<TLBUnitRequest*>(reqs[slot_num]); + tlb_req->setRequest(inst, stage_num, id, slot_num, cmd); + return ud_req; } void @@ -99,7 +104,7 @@ TLBUnit::execute(int slot_idx) { // After this is working, change this to a reinterpret cast // for performance considerations - TLBUnitRequest* tlb_req = dynamic_cast<TLBUnitRequest*>(reqMap[slot_idx]); + TLBUnitRequest* tlb_req = dynamic_cast<TLBUnitRequest*>(reqs[slot_idx]); assert(tlb_req != 0x0); DynInstPtr inst = tlb_req->inst; @@ -200,8 +205,8 @@ TLBUnitEvent::TLBUnitEvent() void TLBUnitEvent::process() { - DynInstPtr inst = resource->reqMap[slotIdx]->inst; - int stage_num = resource->reqMap[slotIdx]->getStageNum(); + DynInstPtr inst = resource->reqs[slotIdx]->inst; + int stage_num = resource->reqs[slotIdx]->getStageNum(); ThreadID tid = inst->threadNumber; DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n", @@ -212,31 +217,18 @@ TLBUnitEvent::process() tlb_res->tlbBlocked[tid] = false; - tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid); - - // Effectively NOP the instruction but still allow it - // to commit - //while (!inst->resSched.empty() && - // inst->curSkedEntry->stageNum != ThePipeline::NumStages - 1) { - //inst->resSched.pop(); - //} + tlb_res->cpu->pipelineStage[stage_num]-> + unsetResStall(tlb_res->reqs[slotIdx], tid); } void TLBUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { - //@TODO: Figure out a way to consolidate common parts - // of this squash code - std::vector<int> slot_remove_list; - - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -250,26 +242,16 @@ TLBUnit::squash(DynInstPtr inst, int stage_num, tlbBlocked[tid] = false; - int stall_stage = reqMap[req_slot_num]->getStageNum(); + int stall_stage = reqs[req_slot_num]->getStageNum(); - cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid); + cpu->pipelineStage[stall_stage]-> + unsetResStall(reqs[req_slot_num], tid); if (resourceEvent[req_slot_num].scheduled()) unscheduleEvent(req_slot_num); - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } - - map_it++; - } - - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) { - freeSlot(slot_remove_list[i]); } } diff --git a/src/cpu/inorder/resources/tlb_unit.hh b/src/cpu/inorder/resources/tlb_unit.hh index eb1bf55f0..904ac3eba 100644 --- a/src/cpu/inorder/resources/tlb_unit.hh +++ b/src/cpu/inorder/resources/tlb_unit.hh @@ -99,9 +99,15 @@ class TLBUnitRequest : public ResourceRequest { typedef ThePipeline::DynInstPtr DynInstPtr; public: - TLBUnitRequest(TLBUnit *res, DynInstPtr inst, int stage_num, int res_idx, int slot_num, - unsigned _cmd) - : ResourceRequest(res, inst, stage_num, res_idx, slot_num, _cmd) + TLBUnitRequest(TLBUnit *res) + : ResourceRequest(res), memReq(NULL) + { + } + + RequestPtr memReq; + + void setRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, + unsigned _cmd) { Addr aligned_addr; int req_size; @@ -131,9 +137,10 @@ class TLBUnitRequest : public ResourceRequest { inst->readTid()); memReq = inst->dataMemReq; } + + ResourceRequest::setRequest(inst, stage_num, res_idx, slot_num, _cmd); } - RequestPtr memReq; }; diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 538b20246..19246a30b 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -88,33 +88,48 @@ UseDefUnit::regStats() Resource::regStats(); } +void +UseDefUnit::init() +{ + // Set Up Resource Events to Appropriate Resource BandWidth + if (latency > 0) { + resourceEvent = new ResourceEvent[width]; + } else { + resourceEvent = NULL; + } + + for (int i = 0; i < width; i++) { + reqs[i] = new UseDefRequest(this); + } + + initSlots(); +} + ResReqPtr UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - return new UseDefRequest(this, inst, stage_num, id, slot_num, cmd, - inst->curSkedEntry->idx); + UseDefRequest *ud_req = dynamic_cast<UseDefRequest*>(reqs[slot_num]); + ud_req->setRequest(inst, stage_num, id, slot_num, cmd, + inst->curSkedEntry->idx); + return ud_req; } ResReqPtr UseDefUnit::findRequest(DynInstPtr inst) { - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - UseDefRequest* ud_req = - dynamic_cast<UseDefRequest*>((*map_it).second); + for (int i = 0; i < width; i++) { + UseDefRequest* ud_req = + dynamic_cast<UseDefRequest*>(reqs[i]); assert(ud_req); - if (ud_req && + if (ud_req->valid && ud_req->getInst() == inst && ud_req->cmd == inst->curSkedEntry->cmd && ud_req->useDefIdx == inst->curSkedEntry->idx) { return ud_req; } - map_it++; } return NULL; @@ -125,7 +140,7 @@ UseDefUnit::execute(int slot_idx) { // After this is working, change this to a reinterpret cast // for performance considerations - UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>(reqMap[slot_idx]); + UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>(reqs[slot_idx]); assert(ud_req); DynInstPtr inst = ud_req->inst; @@ -408,15 +423,10 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, DPRINTF(InOrderUseDef, "[tid:%i]: Updating Due To Squash After [sn:%i].\n", tid, squash_seq_num); - std::vector<int> slot_remove_list; - - map<int, ResReqPtr>::iterator map_it = reqMap.begin(); - map<int, ResReqPtr>::iterator map_end = reqMap.end(); + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; - - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -431,20 +441,9 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, unscheduleEvent(req_slot_num); } - - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } - - map_it++; - } - - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) { - freeSlot(slot_remove_list[i]); } if (outReadSeqNum[tid] >= squash_seq_num) { diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh index d2cc55315..21770cec6 100644 --- a/src/cpu/inorder/resources/use_def.hh +++ b/src/cpu/inorder/resources/use_def.hh @@ -56,6 +56,8 @@ class UseDefUnit : public Resource { UseDefUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); + void init(); + ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned cmd); @@ -96,14 +98,20 @@ class UseDefUnit : public Resource { typedef ThePipeline::DynInstPtr DynInstPtr; public: - UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, - int res_idx, int slot_num, unsigned cmd, - int use_def_idx) - : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd), - useDefIdx(use_def_idx) + UseDefRequest(UseDefUnit *res) + : ResourceRequest(res) { } int useDefIdx; + + void setRequest(DynInstPtr _inst, int stage_num, int res_idx, + int slot_num, unsigned _cmd, int idx) + { + useDefIdx = idx; + + ResourceRequest::setRequest(_inst, stage_num, res_idx, slot_num, + _cmd); + } }; protected: diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout index 1ec8b66f1..55fcb321a 100755 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout @@ -5,12 +5,12 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Feb 7 2011 01:47:18 -M5 revision 4b4b02c5553c 7929 default qtip reupdatestats.patch tip -M5 started Feb 7 2011 01:47:38 -M5 executing on burrito +M5 compiled Feb 18 2011 15:40:30 +M5 revision Unknown +M5 started Feb 18 2011 18:53:22 +M5 executing on m55-001.pool command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. -Exiting @ tick 43686968500 because halt instruction encountered +Exiting @ tick 43687852500 because target called exit() diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt index 5d00e7290..883ec05af 100644 --- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt +++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt @@ -1,25 +1,25 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 106274 # Simulator instruction rate (inst/s) -host_mem_usage 1642336 # Number of bytes of host memory used -host_seconds 831.26 # Real time elapsed on the host -host_tick_rate 52555245 # Simulator tick rate (ticks/s) +host_inst_rate 140237 # Simulator instruction rate (inst/s) +host_mem_usage 237028 # Number of bytes of host memory used +host_seconds 629.94 # Real time elapsed on the host +host_tick_rate 69352666 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 88340674 # Number of instructions simulated -sim_seconds 0.043687 # Number of seconds simulated -sim_ticks 43686968500 # Number of ticks simulated +sim_insts 88340673 # Number of instructions simulated +sim_seconds 0.043688 # Number of seconds simulated +sim_ticks 43687852500 # Number of ticks simulated system.cpu.AGEN-Unit.agens 35033051 # Number of Address Generations -system.cpu.Branch-Predictor.BTBHitPct 40.125175 # BTB Hit Percentage -system.cpu.Branch-Predictor.BTBHits 4678518 # Number of BTB hits -system.cpu.Branch-Predictor.BTBLookups 11659807 # Number of BTB lookups +system.cpu.Branch-Predictor.BTBHitPct 40.125186 # BTB Hit Percentage +system.cpu.Branch-Predictor.BTBHits 4678520 # Number of BTB hits +system.cpu.Branch-Predictor.BTBLookups 11659809 # Number of BTB lookups system.cpu.Branch-Predictor.RASInCorrect 1539 # Number of incorrect RAS predictions. system.cpu.Branch-Predictor.condIncorrect 753993 # Number of conditional branches incorrect -system.cpu.Branch-Predictor.condPredicted 9173158 # Number of conditional branches predicted -system.cpu.Branch-Predictor.lookups 14237669 # Number of BP lookups +system.cpu.Branch-Predictor.condPredicted 9173160 # Number of conditional branches predicted +system.cpu.Branch-Predictor.lookups 14237671 # Number of BP lookups system.cpu.Branch-Predictor.predictedNotTaken 6139595 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 8098074 # Number of Branches Predicted As Taken (True). +system.cpu.Branch-Predictor.predictedTaken 8098076 # Number of Branches Predicted As Taken (True). system.cpu.Branch-Predictor.usedRAS 1660495 # Number of times the RAS was used to get a target. -system.cpu.Execution-Unit.executions 53620617 # Number of Instructions Executed. +system.cpu.Execution-Unit.executions 44841137 # Number of Instructions Executed. system.cpu.Execution-Unit.mispredictPct 5.481801 # Percentage of Incorrect Branches Predicts system.cpu.Execution-Unit.mispredicted 753993 # Number of Branches Incorrectly Predicted system.cpu.Execution-Unit.predicted 13000484 # Number of Branches Incorrectly Predicted @@ -27,43 +27,43 @@ system.cpu.Execution-Unit.predictedNotTakenIncorrect 550902 system.cpu.Execution-Unit.predictedTakenIncorrect 203091 # Number of Branches Incorrectly Predicted As Taken. system.cpu.Mult-Div-Unit.divides 0 # Number of Divide Operations Executed system.cpu.Mult-Div-Unit.multiplies 41101 # Number of Multipy Operations Executed -system.cpu.RegFile-Manager.regFileAccesses 145605016 # Number of Total Accesses (Read+Write) to the Register File -system.cpu.RegFile-Manager.regFileReads 93058135 # Number of Reads from Register File +system.cpu.RegFile-Manager.regFileAccesses 145605009 # Number of Total Accesses (Read+Write) to the Register File +system.cpu.RegFile-Manager.regFileReads 93058128 # Number of Reads from Register File system.cpu.RegFile-Manager.regFileWrites 52546881 # Number of Writes to Register File -system.cpu.RegFile-Manager.regForwards 13517269 # Number of Registers Read Through Forwarding Logic -system.cpu.activity 70.714707 # Percentage of cycles cpu is active +system.cpu.RegFile-Manager.regForwards 13517276 # Number of Registers Read Through Forwarding Logic +system.cpu.activity 70.715162 # Percentage of cycles cpu is active system.cpu.comBranches 13754477 # Number of Branches instructions committed system.cpu.comFloats 151453 # Number of Floating Point instructions committed system.cpu.comInts 30791227 # Number of Integer instructions committed system.cpu.comLoads 20276638 # Number of Load instructions committed -system.cpu.comNonSpec 4584 # Number of Non-Speculative instructions committed +system.cpu.comNonSpec 4583 # Number of Non-Speculative instructions committed system.cpu.comNops 8748916 # Number of Nop instructions committed system.cpu.comStores 14613377 # Number of Store instructions committed -system.cpu.committedInsts 88340674 # Number of Instructions Simulated (Per-Thread) -system.cpu.committedInsts_total 88340674 # Number of Instructions Simulated (Total) +system.cpu.committedInsts 88340673 # Number of Instructions Simulated (Per-Thread) +system.cpu.committedInsts_total 88340673 # Number of Instructions Simulated (Total) system.cpu.contextSwitches 1 # Number of context switches -system.cpu.cpi 0.989057 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 0.989057 # CPI: Total CPI of All Threads +system.cpu.cpi 0.989077 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 0.989077 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 20276638 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 43413.349504 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 34421.543297 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 34421.526841 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 20182230 # number of ReadReq hits system.cpu.dcache.ReadReq_miss_latency 4098567500 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.004656 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 94408 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 33642 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 2091659500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 2091658500 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.002997 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 60766 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 14613377 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 50157.670646 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 49503.458051 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 50157.576620 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 49503.360543 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 14405989 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 10402099000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 10402079500 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.014192 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 207388 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 63810 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 7107607500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 7107593500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.009825 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 143578 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -75,31 +75,31 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 # system.cpu.dcache.blocked_cycles::no_targets 2727000 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 34890015 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 48047.908190 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 45018.532475 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 48047.843576 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 45018.459069 # average overall mshr miss latency system.cpu.dcache.demand_hits 34588219 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 14500666500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 14500647000 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.008650 # miss rate for demand accesses system.cpu.dcache.demand_misses 301796 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 97452 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 9199267000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 9199252000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.005857 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 204344 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.occ_%::0 0.994103 # Average percentage of cache occupancy -system.cpu.dcache.occ_blocks::0 4071.844776 # Average occupied blocks per context +system.cpu.dcache.occ_blocks::0 4071.844772 # Average occupied blocks per context system.cpu.dcache.overall_accesses 34890015 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 48047.908190 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 45018.532475 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 48047.843576 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 45018.459069 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 34588219 # number of overall hits -system.cpu.dcache.overall_miss_latency 14500666500 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 14500647000 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.008650 # miss rate for overall accesses system.cpu.dcache.overall_misses 301796 # number of overall misses system.cpu.dcache.overall_mshr_hits 97452 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 9199267000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 9199252000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.005857 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 204344 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -107,9 +107,9 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 200248 # number of replacements system.cpu.dcache.sampled_refs 204344 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 4071.844776 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 4071.844772 # Cycle average of tags in use system.cpu.dcache.total_refs 34588219 # Total number of references to valid blocks. -system.cpu.dcache.warmup_cycle 497786000 # Cycle when the warmup percentage was hit. +system.cpu.dcache.warmup_cycle 497796000 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 161214 # number of writebacks system.cpu.dtb.data_accesses 34987415 # DTB accesses system.cpu.dtb.data_acv 0 # DTB access violations @@ -127,51 +127,51 @@ system.cpu.dtb.write_accesses 14620629 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 14613377 # DTB write hits system.cpu.dtb.write_misses 7252 # DTB write misses -system.cpu.icache.ReadReq_accesses 11384473 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 18619.899316 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 15557.624423 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 11286741 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1819760000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_accesses 11384439 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 18620.927639 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 15557.720286 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 11286707 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1819860500 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.008585 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 97732 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 9063 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1379479000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 1379487500 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.007789 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 88669 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets 18115.384615 # average number of cycles each access was blocked -system.cpu.icache.avg_refs 127.292157 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 127.291774 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 39 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 706500 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 11384473 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 18619.899316 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 15557.624423 # average overall mshr miss latency -system.cpu.icache.demand_hits 11286741 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1819760000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_accesses 11384439 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 18620.927639 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 15557.720286 # average overall mshr miss latency +system.cpu.icache.demand_hits 11286707 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1819860500 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.008585 # miss rate for demand accesses system.cpu.icache.demand_misses 97732 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 9063 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1379479000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 1379487500 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.007789 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 88669 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.occ_%::0 0.918761 # Average percentage of cache occupancy -system.cpu.icache.occ_blocks::0 1881.622790 # Average occupied blocks per context -system.cpu.icache.overall_accesses 11384473 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 18619.899316 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 15557.624423 # average overall mshr miss latency +system.cpu.icache.occ_%::0 0.918759 # Average percentage of cache occupancy +system.cpu.icache.occ_blocks::0 1881.619179 # Average occupied blocks per context +system.cpu.icache.overall_accesses 11384439 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 18620.927639 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 15557.720286 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 11286741 # number of overall hits -system.cpu.icache.overall_miss_latency 1819760000 # number of overall miss cycles +system.cpu.icache.overall_hits 11286707 # number of overall hits +system.cpu.icache.overall_miss_latency 1819860500 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.008585 # miss rate for overall accesses system.cpu.icache.overall_misses 97732 # number of overall misses system.cpu.icache.overall_mshr_hits 9063 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1379479000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 1379487500 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.007789 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 88669 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -179,20 +179,20 @@ system.cpu.icache.overall_mshr_uncacheable_misses 0 system.cpu.icache.replacements 86622 # number of replacements system.cpu.icache.sampled_refs 88668 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 1881.622790 # Cycle average of tags in use -system.cpu.icache.total_refs 11286741 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 1881.619179 # Cycle average of tags in use +system.cpu.icache.total_refs 11286707 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 25587714 # Number of cycles cpu's stages were not processed -system.cpu.ipc 1.011064 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 1.011064 # IPC: Total IPC of All Threads +system.cpu.idleCycles 25587834 # Number of cycles cpu's stages were not processed +system.cpu.ipc 1.011044 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 1.011044 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 11389750 # ITB accesses +system.cpu.itb.fetch_accesses 11389716 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 11384494 # ITB hits +system.cpu.itb.fetch_hits 11384460 # ITB hits system.cpu.itb.fetch_misses 5256 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -203,23 +203,23 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 143582 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52040.936228 # average ReadExReq miss latency -system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.851808 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_avg_miss_latency 52040.829752 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.848005 # average ReadExReq mshr miss latency system.cpu.l2cache.ReadExReq_hits 12097 # number of ReadExReq hits -system.cpu.l2cache.ReadExReq_miss_latency 6842602500 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 6842588500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 0.915748 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 131485 # number of ReadExReq misses -system.cpu.l2cache.ReadExReq_mshr_miss_latency 5259512000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_latency 5259511500 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.915748 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 131485 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 149430 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52294.227145 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40025.874305 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52294.157340 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40025.851037 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 106453 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 2247449000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 2247446000 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.287606 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 42977 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1720192000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 1720191000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.287606 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 42977 # number of ReadReq MSHR misses system.cpu.l2cache.Writeback_accesses 161214 # number of Writeback accesses(hits+misses) @@ -233,33 +233,33 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 293012 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52103.331958 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40007.015854 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52103.234515 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40007.007257 # average overall mshr miss latency system.cpu.l2cache.demand_hits 118550 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 9090051500 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 9090034500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.595409 # miss rate for demand accesses system.cpu.l2cache.demand_misses 174462 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 6979704000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 6979702500 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.595409 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 174462 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.occ_%::0 0.093045 # Average percentage of cache occupancy +system.cpu.l2cache.occ_%::0 0.093044 # Average percentage of cache occupancy system.cpu.l2cache.occ_%::1 0.476016 # Average percentage of cache occupancy -system.cpu.l2cache.occ_blocks::0 3048.903015 # Average occupied blocks per context -system.cpu.l2cache.occ_blocks::1 15598.107451 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::0 3048.873160 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::1 15598.097053 # Average occupied blocks per context system.cpu.l2cache.overall_accesses 293012 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52103.331958 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40007.015854 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52103.234515 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40007.007257 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 118550 # number of overall hits -system.cpu.l2cache.overall_miss_latency 9090051500 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 9090034500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.595409 # miss rate for overall accesses system.cpu.l2cache.overall_misses 174462 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 6979704000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 6979702500 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.595409 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 174462 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -267,35 +267,35 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 148090 # number of replacements system.cpu.l2cache.sampled_refs 173435 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 18647.010465 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 18646.970214 # Cycle average of tags in use system.cpu.l2cache.total_refs 134496 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 120516 # number of writebacks -system.cpu.numCycles 87373938 # number of cpu cycles simulated +system.cpu.numCycles 87375706 # number of cpu cycles simulated system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed system.cpu.numWorkItemsStarted 0 # number of work items this cpu started -system.cpu.runCycles 61786224 # Number of cycles cpu stages are processed. +system.cpu.runCycles 61787872 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.stage-0.idleCycles 42492197 # Number of cycles 0 instructions are processed. -system.cpu.stage-0.runCycles 44881741 # Number of cycles 1+ instructions are processed. -system.cpu.stage-0.utilization 51.367424 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-1.idleCycles 48180975 # Number of cycles 0 instructions are processed. -system.cpu.stage-1.runCycles 39192963 # Number of cycles 1+ instructions are processed. -system.cpu.stage-1.utilization 44.856583 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-2.idleCycles 46081271 # Number of cycles 0 instructions are processed. -system.cpu.stage-2.runCycles 41292667 # Number of cycles 1+ instructions are processed. -system.cpu.stage-2.utilization 47.259707 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-3.idleCycles 63475501 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.idleCycles 42493951 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 44881755 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 51.366400 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 48181868 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 39193838 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 44.856677 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 46079607 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 41296099 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 47.262678 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 63477269 # Number of cycles 0 instructions are processed. system.cpu.stage-3.runCycles 23898437 # Number of cycles 1+ instructions are processed. -system.cpu.stage-3.utilization 27.351906 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-4.idleCycles 39335442 # Number of cycles 0 instructions are processed. -system.cpu.stage-4.runCycles 48038496 # Number of cycles 1+ instructions are processed. -system.cpu.stage-4.utilization 54.980349 # Percentage of cycles stage was utilized (processing insts). -system.cpu.threadCycles 69006043 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) -system.cpu.timesIdled 289198 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.stage-3.utilization 27.351352 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 39338499 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 48037207 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 54.977761 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 69007682 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.timesIdled 289197 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 4583 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout index 2bd9f8140..d80de6314 100755 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout @@ -5,10 +5,10 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Feb 7 2011 01:47:18 -M5 revision 4b4b02c5553c 7929 default qtip reupdatestats.patch tip -M5 started Feb 7 2011 01:47:37 -M5 executing on burrito +M5 compiled Feb 18 2011 15:40:30 +M5 revision Unknown +M5 started Feb 18 2011 19:04:15 +M5 executing on m55-001.pool command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sav Couldn't unlink build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sv2 @@ -28,4 +28,4 @@ Authors: Carl Sechen, Bill Swartz 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 -122 123 124 Exiting @ tick 40531473000 because halt instruction encountered +122 123 124 Exiting @ tick 40531279000 because target called exit() diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt index e197ea18e..b78683303 100644 --- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt +++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt @@ -1,25 +1,25 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 105929 # Simulator instruction rate (inst/s) -host_mem_usage 1434716 # Number of bytes of host memory used -host_seconds 867.59 # Real time elapsed on the host -host_tick_rate 46717114 # Simulator tick rate (ticks/s) +host_inst_rate 137731 # Simulator instruction rate (inst/s) +host_mem_usage 254052 # Number of bytes of host memory used +host_seconds 667.27 # Real time elapsed on the host +host_tick_rate 60742348 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 91903057 # Number of instructions simulated +sim_insts 91903056 # Number of instructions simulated sim_seconds 0.040531 # Number of seconds simulated -sim_ticks 40531473000 # Number of ticks simulated +sim_ticks 40531279000 # Number of ticks simulated system.cpu.AGEN-Unit.agens 27308571 # Number of Address Generations -system.cpu.Branch-Predictor.BTBHitPct 59.146475 # BTB Hit Percentage +system.cpu.Branch-Predictor.BTBHitPct 59.146483 # BTB Hit Percentage system.cpu.Branch-Predictor.BTBHits 4489525 # Number of BTB hits -system.cpu.Branch-Predictor.BTBLookups 7590520 # Number of BTB lookups +system.cpu.Branch-Predictor.BTBLookups 7590519 # Number of BTB lookups system.cpu.Branch-Predictor.RASInCorrect 138 # Number of incorrect RAS predictions. system.cpu.Branch-Predictor.condIncorrect 2806970 # Number of conditional branches incorrect system.cpu.Branch-Predictor.condPredicted 7883251 # Number of conditional branches predicted -system.cpu.Branch-Predictor.lookups 11539981 # Number of BP lookups +system.cpu.Branch-Predictor.lookups 11539980 # Number of BP lookups system.cpu.Branch-Predictor.predictedNotTaken 4913265 # Number of Branches Predicted As Not Taken (False). -system.cpu.Branch-Predictor.predictedTaken 6626716 # Number of Branches Predicted As Taken (True). +system.cpu.Branch-Predictor.predictedTaken 6626715 # Number of Branches Predicted As Taken (True). system.cpu.Branch-Predictor.usedRAS 1029619 # Number of times the RAS was used to get a target. -system.cpu.Execution-Unit.executions 66407277 # Number of Instructions Executed. +system.cpu.Execution-Unit.executions 57928840 # Number of Instructions Executed. system.cpu.Execution-Unit.mispredictPct 27.409983 # Percentage of Incorrect Branches Predicts system.cpu.Execution-Unit.mispredicted 2806970 # Number of Branches Incorrectly Predicted system.cpu.Execution-Unit.predicted 7433715 # Number of Branches Incorrectly Predicted @@ -27,43 +27,43 @@ system.cpu.Execution-Unit.predictedNotTakenIncorrect 1384945 system.cpu.Execution-Unit.predictedTakenIncorrect 1422025 # Number of Branches Incorrectly Predicted As Taken. system.cpu.Mult-Div-Unit.divides 0 # Number of Divide Operations Executed system.cpu.Mult-Div-Unit.multiplies 458252 # Number of Multipy Operations Executed -system.cpu.RegFile-Manager.regFileAccesses 152685933 # Number of Total Accesses (Read+Write) to the Register File -system.cpu.RegFile-Manager.regFileReads 84258572 # Number of Reads from Register File +system.cpu.RegFile-Manager.regFileAccesses 152685930 # Number of Total Accesses (Read+Write) to the Register File +system.cpu.RegFile-Manager.regFileReads 84258569 # Number of Reads from Register File system.cpu.RegFile-Manager.regFileWrites 68427361 # Number of Writes to Register File -system.cpu.RegFile-Manager.regForwards 38185925 # Number of Registers Read Through Forwarding Logic -system.cpu.activity 91.670105 # Percentage of cycles cpu is active +system.cpu.RegFile-Manager.regForwards 38185928 # Number of Registers Read Through Forwarding Logic +system.cpu.activity 91.670040 # Percentage of cycles cpu is active system.cpu.comBranches 10240685 # Number of Branches instructions committed system.cpu.comFloats 3775974 # Number of Floating Point instructions committed system.cpu.comInts 43665352 # Number of Integer instructions committed system.cpu.comLoads 19996198 # Number of Load instructions committed -system.cpu.comNonSpec 390 # Number of Non-Speculative instructions committed +system.cpu.comNonSpec 389 # Number of Non-Speculative instructions committed system.cpu.comNops 7723346 # Number of Nop instructions committed system.cpu.comStores 6501103 # Number of Store instructions committed -system.cpu.committedInsts 91903057 # Number of Instructions Simulated (Per-Thread) -system.cpu.committedInsts_total 91903057 # Number of Instructions Simulated (Total) +system.cpu.committedInsts 91903056 # Number of Instructions Simulated (Per-Thread) +system.cpu.committedInsts_total 91903056 # Number of Instructions Simulated (Total) system.cpu.contextSwitches 1 # Number of context switches -system.cpu.cpi 0.882048 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 0.882048 # CPI: Total CPI of All Threads +system.cpu.cpi 0.882044 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 0.882044 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 19996198 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 51751.953125 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48809.473684 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 51752.929688 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48810.526316 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 19995686 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 26497000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 26497500 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.000026 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 512 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 37 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 23184500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 23185000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.000024 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 475 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 6501103 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 55922.090261 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 52793.478261 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_miss_latency 55921.258907 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 52792.620137 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 6496893 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 235432000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency 235428500 # number of WriteReq miss cycles system.cpu.dcache.WriteReq_miss_rate 0.000648 # miss rate for WriteReq accesses system.cpu.dcache.WriteReq_misses 4210 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 2462 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 92283000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency 92281500 # number of WriteReq MSHR miss cycles system.cpu.dcache.WriteReq_mshr_miss_rate 0.000269 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 1748 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -75,31 +75,31 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 # system.cpu.dcache.blocked_cycles::no_targets 1373500 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 26497301 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 55469.927997 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 51942.195232 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 55469.292673 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 51941.745389 # average overall mshr miss latency system.cpu.dcache.demand_hits 26492579 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 261929000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 261926000 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.000178 # miss rate for demand accesses system.cpu.dcache.demand_misses 4722 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 2499 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 115467500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 115466500 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.000084 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 2223 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.occ_%::0 0.351931 # Average percentage of cache occupancy -system.cpu.dcache.occ_blocks::0 1441.507978 # Average occupied blocks per context +system.cpu.dcache.occ_blocks::0 1441.508051 # Average occupied blocks per context system.cpu.dcache.overall_accesses 26497301 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 55469.927997 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 51942.195232 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 55469.292673 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 51941.745389 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 26492579 # number of overall hits -system.cpu.dcache.overall_miss_latency 261929000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 261926000 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.000178 # miss rate for overall accesses system.cpu.dcache.overall_misses 4722 # number of overall misses system.cpu.dcache.overall_mshr_hits 2499 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 115467500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 115466500 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.000084 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 2223 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -107,7 +107,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 157 # number of replacements system.cpu.dcache.sampled_refs 2223 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 1441.507978 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 1441.508051 # Cycle average of tags in use system.cpu.dcache.total_refs 26492579 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 107 # number of writebacks @@ -127,51 +127,51 @@ system.cpu.dtb.write_accesses 6501126 # DT system.cpu.dtb.write_acv 0 # DTB write access violations system.cpu.dtb.write_hits 6501103 # DTB write hits system.cpu.dtb.write_misses 23 # DTB write misses -system.cpu.icache.ReadReq_accesses 9759566 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 26777.900606 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 23139.891881 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 9749163 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 278570500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_accesses 9759564 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 26779.967317 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 23139.993880 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 9749161 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 278592000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.001066 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 10403 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 599 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 226863500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 226864500 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.001005 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 9804 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles::no_targets 18409.090909 # average number of cycles each access was blocked -system.cpu.icache.avg_refs 994.406671 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 994.406467 # Average number of references to valid blocks. system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked::no_targets 11 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles::no_targets 202500 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 9759566 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 26777.900606 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 23139.891881 # average overall mshr miss latency -system.cpu.icache.demand_hits 9749163 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 278570500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_accesses 9759564 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 26779.967317 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 23139.993880 # average overall mshr miss latency +system.cpu.icache.demand_hits 9749161 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 278592000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.001066 # miss rate for demand accesses system.cpu.icache.demand_misses 10403 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 599 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 226863500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 226864500 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.001005 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 9804 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.icache.occ_%::0 0.729171 # Average percentage of cache occupancy -system.cpu.icache.occ_blocks::0 1493.341258 # Average occupied blocks per context -system.cpu.icache.overall_accesses 9759566 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 26777.900606 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 23139.891881 # average overall mshr miss latency +system.cpu.icache.occ_blocks::0 1493.341252 # Average occupied blocks per context +system.cpu.icache.overall_accesses 9759564 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 26779.967317 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 23139.993880 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 9749163 # number of overall hits -system.cpu.icache.overall_miss_latency 278570500 # number of overall miss cycles +system.cpu.icache.overall_hits 9749161 # number of overall hits +system.cpu.icache.overall_miss_latency 278592000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.001066 # miss rate for overall accesses system.cpu.icache.overall_misses 10403 # number of overall misses system.cpu.icache.overall_mshr_hits 599 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 226863500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 226864500 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.001005 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 9804 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -179,20 +179,20 @@ system.cpu.icache.overall_mshr_uncacheable_misses 0 system.cpu.icache.replacements 7919 # number of replacements system.cpu.icache.sampled_refs 9804 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 1493.341258 # Cycle average of tags in use -system.cpu.icache.total_refs 9749163 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 1493.341252 # Cycle average of tags in use +system.cpu.icache.total_refs 9749161 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 6752458 # Number of cycles cpu's stages were not processed -system.cpu.ipc 1.133725 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 1.133725 # IPC: Total IPC of All Threads +system.cpu.idleCycles 6752479 # Number of cycles cpu's stages were not processed +system.cpu.ipc 1.133730 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 1.133730 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits system.cpu.itb.data_misses 0 # DTB misses -system.cpu.itb.fetch_accesses 9759621 # ITB accesses +system.cpu.itb.fetch_accesses 9759619 # ITB accesses system.cpu.itb.fetch_acv 0 # ITB acv -system.cpu.itb.fetch_hits 9759574 # ITB hits +system.cpu.itb.fetch_hits 9759572 # ITB hits system.cpu.itb.fetch_misses 47 # ITB misses system.cpu.itb.read_accesses 0 # DTB read accesses system.cpu.itb.read_acv 0 # DTB read access violations @@ -203,23 +203,23 @@ system.cpu.itb.write_acv 0 # DT system.cpu.itb.write_hits 0 # DTB write hits system.cpu.itb.write_misses 0 # DTB write misses system.cpu.l2cache.ReadExReq_accesses 1748 # number of ReadExReq accesses(hits+misses) -system.cpu.l2cache.ReadExReq_avg_miss_latency 52356.562137 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_miss_latency 52355.691057 # average ReadExReq miss latency system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40114.401858 # average ReadExReq mshr miss latency system.cpu.l2cache.ReadExReq_hits 26 # number of ReadExReq hits -system.cpu.l2cache.ReadExReq_miss_latency 90158000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency 90156500 # number of ReadExReq miss cycles system.cpu.l2cache.ReadExReq_miss_rate 0.985126 # miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_misses 1722 # number of ReadExReq misses system.cpu.l2cache.ReadExReq_mshr_miss_latency 69077000 # number of ReadExReq MSHR miss cycles system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.985126 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 1722 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 10279 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52322.450249 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40125.777363 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52322.761194 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40125.621891 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 7063 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 168269000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 168270000 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.312871 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 3216 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 129044500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 129044000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.312871 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 3216 # number of ReadReq MSHR misses system.cpu.l2cache.Writeback_accesses 107 # number of Writeback accesses(hits+misses) @@ -233,14 +233,14 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 12027 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52334.345889 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40121.810450 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52334.244633 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40121.709194 # average overall mshr miss latency system.cpu.l2cache.demand_hits 7089 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 258427000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 258426500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.410576 # miss rate for demand accesses system.cpu.l2cache.demand_misses 4938 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 198121500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 198121000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.410576 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 4938 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed @@ -248,18 +248,18 @@ system.cpu.l2cache.mshr_cap_events 0 # nu system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.occ_%::0 0.066327 # Average percentage of cache occupancy system.cpu.l2cache.occ_%::1 0.000542 # Average percentage of cache occupancy -system.cpu.l2cache.occ_blocks::0 2173.408404 # Average occupied blocks per context -system.cpu.l2cache.occ_blocks::1 17.762794 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::0 2173.408531 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::1 17.762817 # Average occupied blocks per context system.cpu.l2cache.overall_accesses 12027 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52334.345889 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40121.810450 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52334.244633 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40121.709194 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 7089 # number of overall hits -system.cpu.l2cache.overall_miss_latency 258427000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 258426500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.410576 # miss rate for overall accesses system.cpu.l2cache.overall_misses 4938 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 198121500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 198121000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.410576 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 4938 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -267,35 +267,35 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 3282 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 2191.171198 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 2191.171348 # Cycle average of tags in use system.cpu.l2cache.total_refs 7072 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 81062947 # number of cpu cycles simulated +system.cpu.numCycles 81062559 # number of cpu cycles simulated system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed system.cpu.numWorkItemsStarted 0 # number of work items this cpu started -system.cpu.runCycles 74310489 # Number of cycles cpu stages are processed. +system.cpu.runCycles 74310080 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.stage-0.idleCycles 27951481 # Number of cycles 0 instructions are processed. -system.cpu.stage-0.runCycles 53111466 # Number of cycles 1+ instructions are processed. -system.cpu.stage-0.utilization 65.518795 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-1.idleCycles 33263015 # Number of cycles 0 instructions are processed. -system.cpu.stage-1.runCycles 47799932 # Number of cycles 1+ instructions are processed. -system.cpu.stage-1.utilization 58.966438 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-2.idleCycles 32674388 # Number of cycles 0 instructions are processed. -system.cpu.stage-2.runCycles 48388559 # Number of cycles 1+ instructions are processed. -system.cpu.stage-2.utilization 59.692573 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-3.idleCycles 63236669 # Number of cycles 0 instructions are processed. -system.cpu.stage-3.runCycles 17826278 # Number of cycles 1+ instructions are processed. -system.cpu.stage-3.utilization 21.990661 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-4.idleCycles 26883449 # Number of cycles 0 instructions are processed. -system.cpu.stage-4.runCycles 54179498 # Number of cycles 1+ instructions are processed. -system.cpu.stage-4.utilization 66.836329 # Percentage of cycles stage was utilized (processing insts). -system.cpu.threadCycles 80608290 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) -system.cpu.timesIdled 10787 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.stage-0.idleCycles 27951091 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 53111468 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 65.519111 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 33262621 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 47799938 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 58.966727 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 32674404 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 48388155 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 59.692361 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 63236282 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.runCycles 17826277 # Number of cycles 1+ instructions are processed. +system.cpu.stage-3.utilization 21.990765 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 26883065 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 54179494 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 66.836644 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 80607865 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.timesIdled 10786 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 389 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout index 254c4b8b1..fa50fea55 100755 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Feb 7 2011 01:47:18 -M5 revision 4b4b02c5553c 7929 default qtip reupdatestats.patch tip -M5 started Feb 7 2011 01:47:37 -M5 executing on burrito +M5 compiled Feb 18 2011 15:40:30 +M5 revision Unknown +M5 started Feb 18 2011 18:52:59 +M5 executing on m55-001.pool command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello world! -Exiting @ tick 22288500 because target called exit() +Exiting @ tick 22294500 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt index 4b7effb4d..bb298d30a 100644 --- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt @@ -1,37 +1,37 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 76381 # Simulator instruction rate (inst/s) -host_mem_usage 190468 # Number of bytes of host memory used -host_seconds 0.08 # Real time elapsed on the host -host_tick_rate 264969940 # Simulator tick rate (ticks/s) +host_inst_rate 97475 # Simulator instruction rate (inst/s) +host_mem_usage 190320 # Number of bytes of host memory used +host_seconds 0.07 # Real time elapsed on the host +host_tick_rate 337940129 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 6404 # Number of instructions simulated sim_seconds 0.000022 # Number of seconds simulated -sim_ticks 22288500 # Number of ticks simulated -system.cpu.AGEN-Unit.agens 2187 # Number of Address Generations +sim_ticks 22294500 # Number of ticks simulated +system.cpu.AGEN-Unit.agens 2186 # Number of Address Generations system.cpu.Branch-Predictor.BTBHitPct 23.015873 # BTB Hit Percentage system.cpu.Branch-Predictor.BTBHits 87 # Number of BTB hits system.cpu.Branch-Predictor.BTBLookups 378 # Number of BTB lookups system.cpu.Branch-Predictor.RASInCorrect 0 # Number of incorrect RAS predictions. -system.cpu.Branch-Predictor.condIncorrect 543 # Number of conditional branches incorrect +system.cpu.Branch-Predictor.condIncorrect 542 # Number of conditional branches incorrect system.cpu.Branch-Predictor.condPredicted 995 # Number of conditional branches predicted system.cpu.Branch-Predictor.lookups 1423 # Number of BP lookups system.cpu.Branch-Predictor.predictedNotTaken 1183 # Number of Branches Predicted As Not Taken (False). system.cpu.Branch-Predictor.predictedTaken 240 # Number of Branches Predicted As Taken (True). system.cpu.Branch-Predictor.usedRAS 125 # Number of times the RAS was used to get a target. -system.cpu.Execution-Unit.executions 4617 # Number of Instructions Executed. -system.cpu.Execution-Unit.mispredictPct 51.615970 # Percentage of Incorrect Branches Predicts -system.cpu.Execution-Unit.mispredicted 543 # Number of Branches Incorrectly Predicted +system.cpu.Execution-Unit.executions 4596 # Number of Instructions Executed. +system.cpu.Execution-Unit.mispredictPct 51.569933 # Percentage of Incorrect Branches Predicts +system.cpu.Execution-Unit.mispredicted 542 # Number of Branches Incorrectly Predicted system.cpu.Execution-Unit.predicted 509 # Number of Branches Incorrectly Predicted -system.cpu.Execution-Unit.predictedNotTakenIncorrect 538 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedNotTakenIncorrect 537 # Number of Branches Incorrectly Predicted As Not Taken). system.cpu.Execution-Unit.predictedTakenIncorrect 5 # Number of Branches Incorrectly Predicted As Taken. system.cpu.Mult-Div-Unit.divides 0 # Number of Divide Operations Executed system.cpu.Mult-Div-Unit.multiplies 1 # Number of Multipy Operations Executed -system.cpu.RegFile-Manager.regFileAccesses 10532 # Number of Total Accesses (Read+Write) to the Register File -system.cpu.RegFile-Manager.regFileReads 5949 # Number of Reads from Register File +system.cpu.RegFile-Manager.regFileAccesses 10530 # Number of Total Accesses (Read+Write) to the Register File +system.cpu.RegFile-Manager.regFileReads 5947 # Number of Reads from Register File system.cpu.RegFile-Manager.regFileWrites 4583 # Number of Writes to Register File system.cpu.RegFile-Manager.regForwards 2845 # Number of Registers Read Through Forwarding Logic -system.cpu.activity 16.048275 # Percentage of cycles cpu is active +system.cpu.activity 16.075353 # Percentage of cycles cpu is active system.cpu.comBranches 1051 # Number of Branches instructions committed system.cpu.comFloats 2 # Number of Floating Point instructions committed system.cpu.comInts 3265 # Number of Integer instructions committed @@ -42,17 +42,17 @@ system.cpu.comStores 865 # Nu system.cpu.committedInsts 6404 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 6404 # Number of Instructions Simulated (Total) system.cpu.contextSwitches 1 # Number of context switches -system.cpu.cpi 6.960962 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 6.960962 # CPI: Total CPI of All Threads +system.cpu.cpi 6.962836 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 6.962836 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 1185 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56781.250000 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53784.210526 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 56786.458333 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53789.473684 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 1089 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 5451000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 5451500 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.081013 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 96 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 1 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 5109500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 5110000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.080169 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 95 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 865 # number of WriteReq accesses(hits+misses) @@ -75,31 +75,31 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 # system.cpu.dcache.blocked_cycles::no_targets 162000 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 2050 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56661.157025 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53687.500000 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 56663.223140 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53690.476190 # average overall mshr miss latency system.cpu.dcache.demand_hits 1808 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 13712000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 13712500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.118049 # miss rate for demand accesses system.cpu.dcache.demand_misses 242 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 74 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 9019500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 9020000 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.081951 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 168 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.occ_%::0 0.024901 # Average percentage of cache occupancy -system.cpu.dcache.occ_blocks::0 101.993452 # Average occupied blocks per context +system.cpu.dcache.occ_%::0 0.024898 # Average percentage of cache occupancy +system.cpu.dcache.occ_blocks::0 101.981030 # Average occupied blocks per context system.cpu.dcache.overall_accesses 2050 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56661.157025 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53687.500000 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 56663.223140 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53690.476190 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 1808 # number of overall hits -system.cpu.dcache.overall_miss_latency 13712000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 13712500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.118049 # miss rate for overall accesses system.cpu.dcache.overall_misses 242 # number of overall misses system.cpu.dcache.overall_mshr_hits 74 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 9019500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 9020000 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.081951 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 168 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -107,7 +107,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 168 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 101.993452 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 101.981030 # Cycle average of tags in use system.cpu.dcache.total_refs 1808 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -128,10 +128,10 @@ system.cpu.dtb.write_acv 0 # DT system.cpu.dtb.write_hits 865 # DTB write hits system.cpu.dtb.write_misses 3 # DTB write misses system.cpu.icache.ReadReq_accesses 955 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55326.979472 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_miss_latency 55322.580645 # average ReadReq miss latency system.cpu.icache.ReadReq_avg_mshr_miss_latency 53094.684385 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 614 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 18866500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 18865000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.357068 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 341 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 40 # number of ReadReq MSHR hits @@ -147,10 +147,10 @@ system.cpu.icache.blocked_cycles::no_mshrs 0 # system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 955 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55326.979472 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency 55322.580645 # average overall miss latency system.cpu.icache.demand_avg_mshr_miss_latency 53094.684385 # average overall mshr miss latency system.cpu.icache.demand_hits 614 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 18866500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 18865000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.357068 # miss rate for demand accesses system.cpu.icache.demand_misses 341 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 40 # number of demand (read+write) MSHR hits @@ -160,14 +160,14 @@ system.cpu.icache.demand_mshr_misses 301 # nu system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.occ_%::0 0.066887 # Average percentage of cache occupancy -system.cpu.icache.occ_blocks::0 136.984147 # Average occupied blocks per context +system.cpu.icache.occ_%::0 0.066877 # Average percentage of cache occupancy +system.cpu.icache.occ_blocks::0 136.964505 # Average occupied blocks per context system.cpu.icache.overall_accesses 955 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55326.979472 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency 55322.580645 # average overall miss latency system.cpu.icache.overall_avg_mshr_miss_latency 53094.684385 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.icache.overall_hits 614 # number of overall hits -system.cpu.icache.overall_miss_latency 18866500 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 18865000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.357068 # miss rate for overall accesses system.cpu.icache.overall_misses 341 # number of overall misses system.cpu.icache.overall_mshr_hits 40 # number of overall MSHR hits @@ -179,13 +179,13 @@ system.cpu.icache.overall_mshr_uncacheable_misses 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 300 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 136.984147 # Cycle average of tags in use +system.cpu.icache.tagsinuse 136.964505 # Cycle average of tags in use system.cpu.icache.total_refs 614 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 37424 # Number of cycles cpu's stages were not processed -system.cpu.ipc 0.143658 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.143658 # IPC: Total IPC of All Threads +system.cpu.idleCycles 37422 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.143620 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.143620 # IPC: Total IPC of All Threads system.cpu.itb.data_accesses 0 # DTB accesses system.cpu.itb.data_acv 0 # DTB access violations system.cpu.itb.data_hits 0 # DTB hits @@ -243,8 +243,8 @@ system.cpu.l2cache.demand_mshr_misses 468 # nu system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.occ_%::0 0.005889 # Average percentage of cache occupancy -system.cpu.l2cache.occ_blocks::0 192.975400 # Average occupied blocks per context +system.cpu.l2cache.occ_%::0 0.005888 # Average percentage of cache occupancy +system.cpu.l2cache.occ_blocks::0 192.950109 # Average occupied blocks per context system.cpu.l2cache.overall_accesses 469 # number of overall (read+write) accesses system.cpu.l2cache.overall_avg_miss_latency 52243.589744 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 40087.606838 # average overall mshr miss latency @@ -262,34 +262,34 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 394 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 192.975400 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 192.950109 # Cycle average of tags in use system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 44578 # number of cpu cycles simulated +system.cpu.numCycles 44590 # number of cpu cycles simulated system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed system.cpu.numWorkItemsStarted 0 # number of work items this cpu started -system.cpu.runCycles 7154 # Number of cycles cpu stages are processed. +system.cpu.runCycles 7168 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.stage-0.idleCycles 39836 # Number of cycles 0 instructions are processed. -system.cpu.stage-0.runCycles 4742 # Number of cycles 1+ instructions are processed. -system.cpu.stage-0.utilization 10.637534 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-1.idleCycles 40747 # Number of cycles 0 instructions are processed. -system.cpu.stage-1.runCycles 3831 # Number of cycles 1+ instructions are processed. -system.cpu.stage-1.utilization 8.593925 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-2.idleCycles 40491 # Number of cycles 0 instructions are processed. -system.cpu.stage-2.runCycles 4087 # Number of cycles 1+ instructions are processed. -system.cpu.stage-2.utilization 9.168200 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-3.idleCycles 43168 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.idleCycles 39847 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 4743 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 10.636914 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 40758 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 3832 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 8.593855 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 40488 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 4102 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 9.199372 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 43180 # Number of cycles 0 instructions are processed. system.cpu.stage-3.runCycles 1410 # Number of cycles 1+ instructions are processed. -system.cpu.stage-3.utilization 3.162995 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-4.idleCycles 40170 # Number of cycles 0 instructions are processed. -system.cpu.stage-4.runCycles 4408 # Number of cycles 1+ instructions are processed. -system.cpu.stage-4.utilization 9.888286 # Percentage of cycles stage was utilized (processing insts). -system.cpu.threadCycles 11304 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-3.utilization 3.162144 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 40181 # Number of cycles 0 instructions are processed. +system.cpu.stage-4.runCycles 4409 # Number of cycles 1+ instructions are processed. +system.cpu.stage-4.utilization 9.887867 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 11319 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.timesIdled 425 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout index 2ad70ea48..41a76071a 100755 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout @@ -5,13 +5,13 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Feb 7 2011 01:55:51 -M5 revision 4b4b02c5553c 7929 default qtip reupdatestats.patch tip -M5 started Feb 7 2011 01:56:02 -M5 executing on burrito +M5 compiled Feb 18 2011 18:35:15 +M5 revision Unknown +M5 started Feb 18 2011 18:52:36 +M5 executing on m55-001.pool command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing Global frequency set at 1000000000000 ticks per second info: Entering event queue @ 0. Starting simulation... info: Increasing stack size by one page. Hello World! -Exiting @ tick 21534000 because target called exit() +Exiting @ tick 21538000 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt index 87307e90b..ac0fe4aec 100644 --- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt @@ -1,37 +1,37 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 1339 # Simulator instruction rate (inst/s) -host_mem_usage 191872 # Number of bytes of host memory used -host_seconds 4.35 # Real time elapsed on the host -host_tick_rate 4946645 # Simulator tick rate (ticks/s) +host_inst_rate 94112 # Simulator instruction rate (inst/s) +host_mem_usage 191540 # Number of bytes of host memory used +host_seconds 0.06 # Real time elapsed on the host +host_tick_rate 346291258 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5827 # Number of instructions simulated sim_seconds 0.000022 # Number of seconds simulated -sim_ticks 21534000 # Number of ticks simulated +sim_ticks 21538000 # Number of ticks simulated system.cpu.AGEN-Unit.agens 2404 # Number of Address Generations system.cpu.Branch-Predictor.BTBHitPct 14.054054 # BTB Hit Percentage system.cpu.Branch-Predictor.BTBHits 26 # Number of BTB hits system.cpu.Branch-Predictor.BTBLookups 185 # Number of BTB lookups system.cpu.Branch-Predictor.RASInCorrect 30 # Number of incorrect RAS predictions. -system.cpu.Branch-Predictor.condIncorrect 845 # Number of conditional branches incorrect +system.cpu.Branch-Predictor.condIncorrect 844 # Number of conditional branches incorrect system.cpu.Branch-Predictor.condPredicted 778 # Number of conditional branches predicted system.cpu.Branch-Predictor.lookups 1066 # Number of BP lookups system.cpu.Branch-Predictor.predictedNotTaken 949 # Number of Branches Predicted As Not Taken (False). system.cpu.Branch-Predictor.predictedTaken 117 # Number of Branches Predicted As Taken (True). system.cpu.Branch-Predictor.usedRAS 86 # Number of times the RAS was used to get a target. -system.cpu.Execution-Unit.executions 3963 # Number of Instructions Executed. -system.cpu.Execution-Unit.mispredictPct 92.148310 # Percentage of Incorrect Branches Predicts -system.cpu.Execution-Unit.mispredicted 845 # Number of Branches Incorrectly Predicted +system.cpu.Execution-Unit.executions 3261 # Number of Instructions Executed. +system.cpu.Execution-Unit.mispredictPct 92.139738 # Percentage of Incorrect Branches Predicts +system.cpu.Execution-Unit.mispredicted 844 # Number of Branches Incorrectly Predicted system.cpu.Execution-Unit.predicted 72 # Number of Branches Incorrectly Predicted -system.cpu.Execution-Unit.predictedNotTakenIncorrect 813 # Number of Branches Incorrectly Predicted As Not Taken). +system.cpu.Execution-Unit.predictedNotTakenIncorrect 812 # Number of Branches Incorrectly Predicted As Not Taken). system.cpu.Execution-Unit.predictedTakenIncorrect 32 # Number of Branches Incorrectly Predicted As Taken. system.cpu.Mult-Div-Unit.divides 1 # Number of Divide Operations Executed system.cpu.Mult-Div-Unit.multiplies 3 # Number of Multipy Operations Executed -system.cpu.RegFile-Manager.regFileAccesses 10006 # Number of Total Accesses (Read+Write) to the Register File -system.cpu.RegFile-Manager.regFileReads 6596 # Number of Reads from Register File +system.cpu.RegFile-Manager.regFileAccesses 10004 # Number of Total Accesses (Read+Write) to the Register File +system.cpu.RegFile-Manager.regFileReads 6594 # Number of Reads from Register File system.cpu.RegFile-Manager.regFileWrites 3410 # Number of Writes to Register File system.cpu.RegFile-Manager.regForwards 1378 # Number of Registers Read Through Forwarding Logic -system.cpu.activity 13.935777 # Percentage of cycles cpu is active +system.cpu.activity 13.954082 # Percentage of cycles cpu is active system.cpu.comBranches 916 # Number of Branches instructions committed system.cpu.comFloats 0 # Number of Floating Point instructions committed system.cpu.comInts 2155 # Number of Integer instructions committed @@ -42,17 +42,17 @@ system.cpu.comStores 925 # Nu system.cpu.committedInsts 5827 # Number of Instructions Simulated (Per-Thread) system.cpu.committedInsts_total 5827 # Number of Instructions Simulated (Total) system.cpu.contextSwitches 1 # Number of context switches -system.cpu.cpi 7.391282 # CPI: Cycles Per Instruction (Per-Thread) -system.cpu.cpi_total 7.391282 # CPI: Total CPI of All Threads +system.cpu.cpi 7.392655 # CPI: Cycles Per Instruction (Per-Thread) +system.cpu.cpi_total 7.392655 # CPI: Total CPI of All Threads system.cpu.dcache.ReadReq_accesses 1164 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 56681.818182 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53683.908046 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 56676.136364 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53678.160920 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 1076 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 4988000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 4987500 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.075601 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 88 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 1 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 4670500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 4670000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.074742 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 87 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 925 # number of WriteReq accesses(hits+misses) @@ -75,31 +75,31 @@ system.cpu.dcache.blocked_cycles::no_mshrs 0 # system.cpu.dcache.blocked_cycles::no_targets 265500 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 2089 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 56298.342541 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 53666.666667 # average overall mshr miss latency +system.cpu.dcache.demand_avg_miss_latency 56295.580110 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 53663.043478 # average overall mshr miss latency system.cpu.dcache.demand_hits 1908 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 10190000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 10189500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate 0.086644 # miss rate for demand accesses system.cpu.dcache.demand_misses 181 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 43 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 7406000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 7405500 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0.066060 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 138 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.occ_%::0 0.021745 # Average percentage of cache occupancy -system.cpu.dcache.occ_blocks::0 89.066455 # Average occupied blocks per context +system.cpu.dcache.occ_blocks::0 89.067186 # Average occupied blocks per context system.cpu.dcache.overall_accesses 2089 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 56298.342541 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 53666.666667 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 56295.580110 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 53663.043478 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 1908 # number of overall hits -system.cpu.dcache.overall_miss_latency 10190000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 10189500 # number of overall miss cycles system.cpu.dcache.overall_miss_rate 0.086644 # miss rate for overall accesses system.cpu.dcache.overall_misses 181 # number of overall misses system.cpu.dcache.overall_mshr_hits 43 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 7406000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 7405500 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0.066060 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 138 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -107,7 +107,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 89.066455 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 89.067186 # Cycle average of tags in use system.cpu.dcache.total_refs 1908 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -121,14 +121,14 @@ system.cpu.dtb.write_accesses 0 # DT system.cpu.dtb.write_hits 0 # DTB write hits system.cpu.dtb.write_misses 0 # DTB write misses system.cpu.icache.ReadReq_accesses 853 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 55526.246719 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 53153.605016 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_miss_latency 55527.559055 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 53156.739812 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 472 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 21155500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 21156000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.446659 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 381 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 62 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 16956000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 16957000 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.373974 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 319 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -140,31 +140,31 @@ system.cpu.icache.blocked_cycles::no_mshrs 0 # system.cpu.icache.blocked_cycles::no_targets 62000 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 853 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 55526.246719 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 53153.605016 # average overall mshr miss latency +system.cpu.icache.demand_avg_miss_latency 55527.559055 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 53156.739812 # average overall mshr miss latency system.cpu.icache.demand_hits 472 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 21155500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 21156000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.446659 # miss rate for demand accesses system.cpu.icache.demand_misses 381 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 62 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 16956000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 16957000 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.373974 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 319 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.occ_%::0 0.070944 # Average percentage of cache occupancy -system.cpu.icache.occ_blocks::0 145.293265 # Average occupied blocks per context +system.cpu.icache.occ_%::0 0.070945 # Average percentage of cache occupancy +system.cpu.icache.occ_blocks::0 145.295903 # Average occupied blocks per context system.cpu.icache.overall_accesses 853 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 55526.246719 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 53153.605016 # average overall mshr miss latency +system.cpu.icache.overall_avg_miss_latency 55527.559055 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 53156.739812 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.icache.overall_hits 472 # number of overall hits -system.cpu.icache.overall_miss_latency 21155500 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 21156000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.446659 # miss rate for overall accesses system.cpu.icache.overall_misses 381 # number of overall misses system.cpu.icache.overall_mshr_hits 62 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 16956000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 16957000 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.373974 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 319 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -172,13 +172,13 @@ system.cpu.icache.overall_mshr_uncacheable_misses 0 system.cpu.icache.replacements 13 # number of replacements system.cpu.icache.sampled_refs 319 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 145.293265 # Cycle average of tags in use +system.cpu.icache.tagsinuse 145.295903 # Cycle average of tags in use system.cpu.icache.total_refs 472 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 37067 # Number of cycles cpu's stages were not processed -system.cpu.ipc 0.135295 # IPC: Instructions Per Cycle (Per-Thread) -system.cpu.ipc_total 0.135295 # IPC: Total IPC of All Threads +system.cpu.idleCycles 37066 # Number of cycles cpu's stages were not processed +system.cpu.ipc 0.135269 # IPC: Instructions Per Cycle (Per-Thread) +system.cpu.ipc_total 0.135269 # IPC: Total IPC of All Threads system.cpu.itb.accesses 0 # DTB accesses system.cpu.itb.hits 0 # DTB hits system.cpu.itb.misses 0 # DTB misses @@ -198,13 +198,13 @@ system.cpu.l2cache.ReadExReq_mshr_miss_latency 2052000 system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses system.cpu.l2cache.ReadExReq_mshr_misses 51 # number of ReadExReq MSHR misses system.cpu.l2cache.ReadReq_accesses 406 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 52355.198020 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40152.227723 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_avg_miss_latency 52357.673267 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40153.465347 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 21151500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 21152500 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 0.995074 # miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_misses 404 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 16221500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency 16222000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 0.995074 # mshr miss rate for ReadReq accesses system.cpu.l2cache.ReadReq_mshr_misses 404 # number of ReadReq MSHR misses system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked @@ -216,31 +216,31 @@ system.cpu.l2cache.blocked_cycles::no_mshrs 0 # system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 457 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 52368.131868 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 40161.538462 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_miss_latency 52370.329670 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 40162.637363 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 23827500 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 23828500 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.995624 # miss rate for demand accesses system.cpu.l2cache.demand_misses 455 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 18273500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 18274000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 0.995624 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 455 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.occ_%::0 0.006169 # Average percentage of cache occupancy -system.cpu.l2cache.occ_blocks::0 202.148379 # Average occupied blocks per context +system.cpu.l2cache.occ_blocks::0 202.151439 # Average occupied blocks per context system.cpu.l2cache.overall_accesses 457 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 52368.131868 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 40161.538462 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_miss_latency 52370.329670 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 40162.637363 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 23827500 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 23828500 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.995624 # miss rate for overall accesses system.cpu.l2cache.overall_misses 455 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 18273500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 18274000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 0.995624 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 455 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -248,34 +248,34 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.sampled_refs 404 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 202.148379 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 202.151439 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 43069 # number of cpu cycles simulated +system.cpu.numCycles 43077 # number of cpu cycles simulated system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed system.cpu.numWorkItemsStarted 0 # number of work items this cpu started -system.cpu.runCycles 6002 # Number of cycles cpu stages are processed. +system.cpu.runCycles 6011 # Number of cycles cpu stages are processed. system.cpu.smtCommittedInsts 0 # Number of SMT Instructions Simulated (Per-Thread) system.cpu.smtCycles 0 # Total number of cycles that the CPU was in SMT-mode system.cpu.smt_cpi no_value # CPI: Total SMT-CPI system.cpu.smt_ipc no_value # IPC: Total SMT-IPC -system.cpu.stage-0.idleCycles 39196 # Number of cycles 0 instructions are processed. -system.cpu.stage-0.runCycles 3873 # Number of cycles 1+ instructions are processed. -system.cpu.stage-0.utilization 8.992547 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-1.idleCycles 40152 # Number of cycles 0 instructions are processed. -system.cpu.stage-1.runCycles 2917 # Number of cycles 1+ instructions are processed. -system.cpu.stage-1.utilization 6.772853 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-2.idleCycles 40243 # Number of cycles 0 instructions are processed. -system.cpu.stage-2.runCycles 2826 # Number of cycles 1+ instructions are processed. -system.cpu.stage-2.utilization 6.561564 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-3.idleCycles 41749 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.idleCycles 39203 # Number of cycles 0 instructions are processed. +system.cpu.stage-0.runCycles 3874 # Number of cycles 1+ instructions are processed. +system.cpu.stage-0.utilization 8.993198 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-1.idleCycles 40159 # Number of cycles 0 instructions are processed. +system.cpu.stage-1.runCycles 2918 # Number of cycles 1+ instructions are processed. +system.cpu.stage-1.utilization 6.773916 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-2.idleCycles 40245 # Number of cycles 0 instructions are processed. +system.cpu.stage-2.runCycles 2832 # Number of cycles 1+ instructions are processed. +system.cpu.stage-2.utilization 6.574274 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-3.idleCycles 41757 # Number of cycles 0 instructions are processed. system.cpu.stage-3.runCycles 1320 # Number of cycles 1+ instructions are processed. -system.cpu.stage-3.utilization 3.064849 # Percentage of cycles stage was utilized (processing insts). -system.cpu.stage-4.idleCycles 39866 # Number of cycles 0 instructions are processed. +system.cpu.stage-3.utilization 3.064280 # Percentage of cycles stage was utilized (processing insts). +system.cpu.stage-4.idleCycles 39874 # Number of cycles 0 instructions are processed. system.cpu.stage-4.runCycles 3203 # Number of cycles 1+ instructions are processed. -system.cpu.stage-4.utilization 7.436904 # Percentage of cycles stage was utilized (processing insts). -system.cpu.threadCycles 10184 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) +system.cpu.stage-4.utilization 7.435522 # Percentage of cycles stage was utilized (processing insts). +system.cpu.threadCycles 10193 # Total Number of Cycles A Thread Was Active in CPU (Per-Thread) system.cpu.timesIdled 427 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 8 # Number of system calls |