From 59bf0e7eb41494b7de033aa4737da026adddc215 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: Timesync: Make sure timesync event is setup after curTick is unserialized Setup initial timesync event in initState or loadState so that curTick has been updated to the new value, otherwise the event is scheduled in the past. --- src/sim/root.cc | 13 ++++++++++++- src/sim/root.hh | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/sim/root.cc b/src/sim/root.cc index 1dc9b6058..d51fcbda6 100644 --- a/src/sim/root.cc +++ b/src/sim/root.cc @@ -108,7 +108,18 @@ Root::Root(RootParams *p) : SimObject(p), _enabled(false), assert(_root == NULL); _root = this; lastTime.setTimer(); - timeSyncEnable(p->time_sync_enable); +} + +void +Root::initState() +{ + timeSyncEnable(params()->time_sync_enable); +} + +void +Root::loadState(Checkpoint *cp) +{ + timeSyncEnable(params()->time_sync_enable); } Root * diff --git a/src/sim/root.hh b/src/sim/root.hh index 2beced9d4..76a508c19 100644 --- a/src/sim/root.hh +++ b/src/sim/root.hh @@ -95,7 +95,22 @@ class Root : public SimObject /// Set the threshold for time remaining to spin wait. void timeSyncSpinThreshold(Time newThreshold); - Root(RootParams *p); + typedef RootParams Params; + const Params * + params() const + { + return dynamic_cast(_params); + } + + Root(Params *p); + + /** Schedule the timesync event at loadState() so that curTick is correct + */ + void loadState(Checkpoint *cp); + + /** Schedule the timesync event at initState() when not unserializing + */ + void initState(); }; #endif // __SIM_ROOT_HH__ -- cgit v1.2.3 From 453dbc772dba92dbceb44eaeef3c617d17d63e84 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: ARM: Fix timer calculations. The timer calculations were a bit off so time would run faster than it otherwise should --- src/dev/arm/timer_sp804.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/dev/arm/timer_sp804.cc b/src/dev/arm/timer_sp804.cc index 04668d268..e6d2657ea 100644 --- a/src/dev/arm/timer_sp804.cc +++ b/src/dev/arm/timer_sp804.cc @@ -178,11 +178,11 @@ Sp804::Timer::restartCounter(uint32_t val) if (!control.timerEnable) return; - Tick time = clock << power(16, control.timerPrescale); + Tick time = clock * power(16, control.timerPrescale); if (control.timerSize) - time *= bits(val,15,0); - else time *= val; + else + time *= bits(val,15,0); if (zeroEvent.scheduled()) { DPRINTF(Timer, "-- Event was already schedule, de-scheduling\n"); -- cgit v1.2.3 From e2507407b17188dca802082434cfe0230d9bfa61 Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: O3: Enhance data address translation by supporting hardware page table walkers. Some ISAs (like ARM) relies on hardware page table walkers. For those ISAs, when a TLB miss occurs, initiateTranslation() can return with NoFault but with the translation unfinished. Instructions experiencing a delayed translation due to a hardware page table walk are deferred until the translation completes and kept into the IQ. In order to keep track of them, the IQ has been augmented with a queue of the outstanding delayed memory instructions. When their translation completes, instructions are re-executed (only their initiateAccess() was already executed; their DTB translation is now skipped). The IEW stage has been modified to support such a 2-pass execution. --- src/arch/arm/tlb.cc | 2 + src/cpu/base_dyn_inst.hh | 125 ++++++++++++++++++++++++++++++++---------- src/cpu/base_dyn_inst_impl.hh | 15 +++++ src/cpu/o3/fetch.hh | 4 ++ src/cpu/o3/iew_impl.hh | 21 +++++++ src/cpu/o3/inst_queue.hh | 28 ++++++++++ src/cpu/o3/inst_queue_impl.hh | 53 +++++++++++++++++- src/cpu/o3/lsq_unit_impl.hh | 10 +++- src/cpu/simple/timing.hh | 4 ++ src/cpu/translation.hh | 32 +++++++++-- src/sim/tlb.hh | 18 ++++++ 11 files changed, 276 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index e5f5b36f6..230c56200 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -696,6 +696,8 @@ TLB::translateTiming(RequestPtr req, ThreadContext *tc, #endif if (!delay) translation->finish(fault, req, tc, mode); + else + translation->markDelayed(); return fault; } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 0c566ec65..8b6662d70 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * Copyright (c) 2009 The University of Edinburgh * All rights reserved. @@ -150,6 +162,29 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Finish a DTB address translation. */ void finishTranslation(WholeTranslationState *state); + /** True if the DTB address translation has started. */ + bool translationStarted; + + /** True if the DTB address translation has completed. */ + bool translationCompleted; + + /** + * Returns true if the DTB address translation is being delayed due to a hw + * page table walk. + */ + bool isTranslationDelayed() const + { + return (translationStarted && !translationCompleted); + } + + /** + * Saved memory requests (needed when the DTB address translation is + * delayed due to a hw page table walk). + */ + RequestPtr savedReq; + RequestPtr savedSreqLow; + RequestPtr savedSreqHigh; + /** @todo: Consider making this private. */ public: /** The sequence number of the instruction. */ @@ -835,33 +870,42 @@ BaseDynInst::readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags) { reqMade = true; - Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - + Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); - - if (fault == NoFault) { - effAddr = req->getVaddr(); - effAddrValid = true; - fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); + if (reqMade && translationStarted) { + req = savedReq; + sreqLow = savedSreqLow; + sreqHigh = savedSreqHigh; } else { - // Commit will have to clean up whatever happened. Set this - // instruction as executed. - this->setExecuted(); + req = new Request(asid, addr, size, flags, this->pc.instAddr(), + thread->contextId(), threadNumber); + + // Only split the request if the ISA supports unaligned accesses. + if (TheISA::HasUnalignedMemAcc) { + splitRequest(req, sreqLow, sreqHigh); + } + initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); } - if (fault != NoFault) { - // Return a fixed value to keep simulation deterministic even - // along misspeculated paths. - if (data) - bzero(data, size); + if (translationCompleted) { + if (fault == NoFault) { + effAddr = req->getVaddr(); + effAddrValid = true; + fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); + } else { + // Commit will have to clean up whatever happened. Set this + // instruction as executed. + this->setExecuted(); + } + + if (fault != NoFault) { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + if (data) + bzero(data, size); + } } if (traceData) { @@ -897,19 +941,26 @@ BaseDynInst::writeBytes(uint8_t *data, unsigned size, } reqMade = true; - Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - + Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); + if (reqMade && translationStarted) { + req = savedReq; + sreqLow = savedSreqLow; + sreqHigh = savedSreqHigh; + } else { + req = new Request(asid, addr, size, flags, this->pc.instAddr(), + thread->contextId(), threadNumber); + + // Only split the request if the ISA supports unaligned accesses. + if (TheISA::HasUnalignedMemAcc) { + splitRequest(req, sreqLow, sreqHigh); + } + initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); } - initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); - if (fault == NoFault) { + if (fault == NoFault && translationCompleted) { effAddr = req->getVaddr(); effAddrValid = true; fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx); @@ -953,6 +1004,8 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, uint64_t *res, BaseTLB::Mode mode) { + translationStarted = true; + if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { WholeTranslationState *state = new WholeTranslationState(req, NULL, res, mode); @@ -961,6 +1014,12 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, DataTranslation > *trans = new DataTranslation >(this, state); cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); + if (!translationCompleted) { + // Save memory requests. + savedReq = state->mainReq; + savedSreqLow = state->sreqLow; + savedSreqHigh = state->sreqHigh; + } } else { WholeTranslationState *state = new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode); @@ -973,6 +1032,12 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode); cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode); + if (!translationCompleted) { + // Save memory requests. + savedReq = state->mainReq; + savedSreqLow = state->sreqLow; + savedSreqHigh = state->sreqHigh; + } } } @@ -998,6 +1063,8 @@ BaseDynInst::finishTranslation(WholeTranslationState *state) state->deleteReqs(); } delete state; + + translationCompleted = true; } #endif // __CPU_BASE_DYN_INST_HH__ diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 74f199d5f..7e4d25322 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -107,6 +119,9 @@ BaseDynInst::initVars() effAddrValid = false; physEffAddr = 0; + translationStarted = false; + translationCompleted = false; + isUncacheable = false; reqMade = false; readyRegs = 0; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 92691720b..647c48a76 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -136,6 +136,10 @@ class DefaultFetch : fetch(_fetch) {} + void + markDelayed() + {} + void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode) diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3f3761ff3..03f73c798 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1241,12 +1241,33 @@ DefaultIEW::executeInsts() // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); + + if (inst->isTranslationDelayed() && + fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(IEW, "Execute: Delayed translation, deferring " + "load.\n"); + instQueue.deferMemInst(inst); + continue; + } + if (inst->isDataPrefetch() || inst->isInstPrefetch()) { fault = NoFault; } } else if (inst->isStore()) { fault = ldstQueue.executeStore(inst); + if (inst->isTranslationDelayed() && + fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(IEW, "Execute: Delayed translation, deferring " + "store.\n"); + instQueue.deferMemInst(inst); + continue; + } + // If the store had a fault then it may not have a mem req if (fault != NoFault || inst->readPredicate() == false || !inst->isStoreConditional()) { diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index be936e204..64df35743 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -180,6 +192,11 @@ class InstructionQueue */ DynInstPtr getInstToExecute(); + /** Returns a memory instruction that was referred due to a delayed DTB + * translation if it is now ready to execute. + */ + DynInstPtr getDeferredMemInstToExecute(); + /** * Records the instruction as the producer of a register without * adding it to the rest of the IQ. @@ -223,6 +240,12 @@ class InstructionQueue /** Completes a memory operation. */ void completeMemInst(DynInstPtr &completed_inst); + /** + * Defers a memory instruction when its DTB translation incurs a hw + * page table walk. + */ + void deferMemInst(DynInstPtr &deferred_inst); + /** Indicates an ordering violation between a store and a load. */ void violation(DynInstPtr &store, DynInstPtr &faulting_load); @@ -284,6 +307,11 @@ class InstructionQueue /** List of instructions that are ready to be executed. */ std::list instsToExecute; + /** List of instructions waiting for their DTB translation to + * complete (hw page table walk in progress). + */ + std::list deferredMemInsts; + /** * Struct for comparing entries to be added to the priority queue. * This gives reverse ordering to the instructions in terms of diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 91cb2f0c8..d6da4b818 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -397,6 +409,7 @@ InstructionQueue::resetState() } nonSpecInsts.clear(); listOrder.clear(); + deferredMemInsts.clear(); } template @@ -733,6 +746,15 @@ InstructionQueue::scheduleReadyInsts() IssueStruct *i2e_info = issueToExecuteQueue->access(0); + DynInstPtr deferred_mem_inst; + int total_deferred_mem_issued = 0; + while (total_deferred_mem_issued < totalWidth && + (deferred_mem_inst = getDeferredMemInstToExecute()) != NULL) { + issueToExecuteQueue->access(0)->size++; + instsToExecute.push_back(deferred_mem_inst); + total_deferred_mem_issued++; + } + // Have iterator to head of the list // While I haven't exceeded bandwidth or reached the end of the list, // Try to get a FU that can do what this op needs. @@ -745,7 +767,7 @@ InstructionQueue::scheduleReadyInsts() ListOrderIt order_end_it = listOrder.end(); int total_issued = 0; - while (total_issued < totalWidth && + while (total_issued < (totalWidth - total_deferred_mem_issued) && iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -858,7 +880,7 @@ InstructionQueue::scheduleReadyInsts() iqInstsIssued+= total_issued; // If we issued any instructions, tell the CPU we had activity. - if (total_issued) { + if (total_issued || total_deferred_mem_issued) { cpu->activityThisCycle(); } else { DPRINTF(IQ, "Not able to schedule any instructions.\n"); @@ -1021,6 +1043,11 @@ void InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) { DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + + // Reset DTB translation state + resched_inst->translationStarted = false; + resched_inst->translationCompleted = false; + resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); } @@ -1049,6 +1076,28 @@ InstructionQueue::completeMemInst(DynInstPtr &completed_inst) count[tid]--; } +template +void +InstructionQueue::deferMemInst(DynInstPtr &deferred_inst) +{ + deferredMemInsts.push_back(deferred_inst); +} + +template +typename Impl::DynInstPtr +InstructionQueue::getDeferredMemInstToExecute() +{ + for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); + ++it) { + if ((*it)->translationCompleted) { + DynInstPtr ret = *it; + deferredMemInsts.erase(it); + return ret; + } + } + return NULL; +} + template void InstructionQueue::violation(DynInstPtr &store, diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index dd3604ffe..b5d337935 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -445,12 +445,16 @@ LSQUnit::executeLoad(DynInstPtr &inst) Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", - inst->pcState(),inst->seqNum); + inst->pcState(), inst->seqNum); assert(!inst->isSquashed()); load_fault = inst->initiateAcc(); + if (inst->isTranslationDelayed() && + load_fault == NoFault) + return load_fault; + // If the instruction faulted or predicated false, then we need to send it // along to commit without the instruction completing. if (load_fault != NoFault || inst->readPredicate() == false) { @@ -532,6 +536,10 @@ LSQUnit::executeStore(DynInstPtr &store_inst) Fault store_fault = store_inst->initiateAcc(); + if (store_inst->isTranslationDelayed() && + store_fault == NoFault) + return store_fault; + if (store_inst->readPredicate() == false) store_inst->forwardOldRegs(); diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 2b0c8942a..098db5f5a 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -107,6 +107,10 @@ class TimingSimpleCPU : public BaseSimpleCPU : cpu(_cpu) {} + void + markDelayed() + {} + void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode) diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh index 7db7c381a..60953540f 100644 --- a/src/cpu/translation.hh +++ b/src/cpu/translation.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2009 The University of Edinburgh * All rights reserved. @@ -53,6 +65,7 @@ class WholeTranslationState Fault faults[2]; public: + bool delay; bool isSplit; RequestPtr mainReq; RequestPtr sreqLow; @@ -67,8 +80,8 @@ class WholeTranslationState */ WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) - : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL), - sreqHigh(NULL), data(_data), res(_res), mode(_mode) + : outstanding(1), delay(false), isSplit(false), mainReq(_req), + sreqLow(NULL), sreqHigh(NULL), data(_data), res(_res), mode(_mode) { faults[0] = faults[1] = NoFault; assert(mode == BaseTLB::Read || mode == BaseTLB::Write); @@ -82,8 +95,9 @@ class WholeTranslationState WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow, RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) - : outstanding(2), isSplit(true), mainReq(_req), sreqLow(_sreqLow), - sreqHigh(_sreqHigh), data(_data), res(_res), mode(_mode) + : outstanding(2), delay(false), isSplit(true), mainReq(_req), + sreqLow(_sreqLow), sreqHigh(_sreqHigh), data(_data), res(_res), + mode(_mode) { faults[0] = faults[1] = NoFault; assert(mode == BaseTLB::Read || mode == BaseTLB::Write); @@ -220,6 +234,16 @@ class DataTranslation : public BaseTLB::Translation { } + /** + * Signal the translation state that the translation has been delayed due + * to a hw page table walk. Split requests are transparently handled. + */ + void + markDelayed() + { + state->delay = true; + } + /** * Finish this part of the translation and indicate that the whole * translation is complete if the state says so. diff --git a/src/sim/tlb.hh b/src/sim/tlb.hh index 1512bc0fa..253f12072 100644 --- a/src/sim/tlb.hh +++ b/src/sim/tlb.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * @@ -64,6 +76,12 @@ class BaseTLB : public SimObject virtual ~Translation() {} + /** + * Signal that the translation has been delayed due to a hw page table + * walk. + */ + virtual void markDelayed() = 0; + /* * The memory for this object may be dynamically allocated, and it may * be responsible for cleaning itself up which will happen in this -- cgit v1.2.3 From 1411cb0b0f01577c74d0f181404138cb43ce8ac8 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: SimpleCPU: Fix a case where a DTLB fault redirects fetch and an I-side walk occurs. This change fixes an issue where a DTLB fault occurs and redirects fetch to handle the fault and the ITLB requires a walk which delays translation. In this case the status of the cpu isn't updated appropriately, and an additional instruction fetch occurs. Eventually this hits an assert as multiple instruction fetches are occuring in the system and when the second one returns the processor is in the wrong state. Some asserts below are removed because it was always true (typo) and the state after the initiateAcc() the processor could be in any valid state when a d-side fault occurs. --- src/cpu/simple/timing.cc | 12 ++++++------ src/cpu/simple/timing.hh | 5 ++++- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 453699f84..ab1ff91e8 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -752,6 +752,7 @@ TimingSimpleCPU::sendFetch(Fault fault, RequestPtr req, ThreadContext *tc) } else { delete req; // fetch fault: advance directly to next instruction (fault handler) + _status = Running; advanceInst(fault); } @@ -805,12 +806,11 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) if (curStaticInst && curStaticInst->isMemRef()) { // load or store: just send to dcache Fault fault = curStaticInst->initiateAcc(this, traceData); - if (_status != Running) { - // instruction will complete in dcache response callback - assert(_status == DcacheWaitResponse || - _status == DcacheRetry || DTBWaitResponse); - assert(fault == NoFault); - } else { + + // If we're not running now the instruction will complete in a dcache + // response callback or the instruction faulted and has started an + // ifetch + if (_status == Running) { if (fault != NoFault && traceData) { // If there was a fault, we shouldn't trace this instruction. delete traceData; diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 098db5f5a..a7a3eb7c3 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -109,7 +109,10 @@ class TimingSimpleCPU : public BaseSimpleCPU void markDelayed() - {} + { + assert(cpu->_status == Running); + cpu->_status = ITBWaitResponse; + } void finish(Fault fault, RequestPtr req, ThreadContext *tc, -- cgit v1.2.3 From 74eff1b71b7f2075e72a06e611d07ea37638e0b6 Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: O3: Fix a few bugs in the TableWalker object. Uncacheable requests were set as such only in atomic mode. currState->delayed is checked in place of currState->timing for resetting currState in atomic mode. --- src/arch/arm/table_walker.cc | 17 +++++++++-------- src/arch/arm/table_walker.hh | 20 ++++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc index 6b2113639..e6dd728dd 100644 --- a/src/arch/arm/table_walker.cc +++ b/src/arch/arm/table_walker.cc @@ -208,19 +208,20 @@ TableWalker::processWalk() return f; } + Request::Flags flag = 0; + if (currState->sctlr.c == 0) { + flag = Request::UNCACHEABLE; + } + if (currState->timing) { port->dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t), &doL1DescEvent, (uint8_t*)&currState->l1Desc.data, - currState->tc->getCpuPtr()->ticks(1)); + currState->tc->getCpuPtr()->ticks(1), flag); DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n", stateQueueL1.size()); stateQueueL1.push_back(currState); currState = NULL; } else { - Request::Flags flag = 0; - if (currState->sctlr.c == 0){ - flag = Request::UNCACHEABLE; - } port->dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t), NULL, (uint8_t*)&currState->l1Desc.data, currState->tc->getCpuPtr()->ticks(1), flag); @@ -472,7 +473,7 @@ TableWalker::doL1Descriptor() switch (currState->l1Desc.type()) { case L1Descriptor::Ignore: case L1Descriptor::Reserved: - if (!currState->delayed) { + if (!currState->timing) { currState->tc = NULL; currState->req = NULL; } @@ -577,7 +578,7 @@ TableWalker::doL2Descriptor() if (currState->l2Desc.invalid()) { DPRINTF(TLB, "L2 descriptor invalid, causing fault\n"); - if (!currState->delayed) { + if (!currState->timing) { currState->tc = NULL; currState->req = NULL; } @@ -622,7 +623,7 @@ TableWalker::doL2Descriptor() memAttrs(currState->tc, te, currState->sctlr, currState->l2Desc.texcb(), currState->l2Desc.shareable()); - if (!currState->delayed) { + if (!currState->timing) { currState->tc = NULL; currState->req = NULL; } diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh index 267a7ad26..96a39cc61 100644 --- a/src/arch/arm/table_walker.hh +++ b/src/arch/arm/table_walker.hh @@ -93,14 +93,14 @@ class TableWalker : public MemObject { if (supersection()) panic("Super sections not implemented\n"); - return mbits(data, 31,20); + return mbits(data, 31, 20); } /** Return the physcal address of the entry, bits in position*/ Addr paddr(Addr va) const { if (supersection()) panic("Super sections not implemented\n"); - return mbits(data, 31,20) | mbits(va, 20, 0); + return mbits(data, 31, 20) | mbits(va, 19, 0); } @@ -109,7 +109,7 @@ class TableWalker : public MemObject { if (supersection()) panic("Super sections not implemented\n"); - return bits(data, 31,20); + return bits(data, 31, 20); } /** Is the translation global (no asid used)? */ @@ -127,19 +127,19 @@ class TableWalker : public MemObject /** Three bit access protection flags */ uint8_t ap() const { - return (bits(data, 15) << 2) | bits(data,11,10); + return (bits(data, 15) << 2) | bits(data, 11, 10); } /** Domain Client/Manager: ARM DDI 0406B: B3-31 */ uint8_t domain() const { - return bits(data,8,5); + return bits(data, 8, 5); } /** Address of L2 descriptor if it exists */ Addr l2Addr() const { - return mbits(data, 31,10); + return mbits(data, 31, 10); } /** Memory region attributes: ARM DDI 0406B: B3-32. @@ -149,7 +149,7 @@ class TableWalker : public MemObject */ uint8_t texcb() const { - return bits(data, 2) | bits(data,3) << 1 | bits(data, 14, 12) << 2; + return bits(data, 2) | bits(data, 3) << 1 | bits(data, 14, 12) << 2; } /** If the section is shareable. See texcb() comment. */ @@ -187,7 +187,7 @@ class TableWalker : public MemObject /** Is the entry invalid */ bool invalid() const { - return bits(data, 1,0) == 0;; + return bits(data, 1, 0) == 0; } /** What is the size of the mapping? */ @@ -218,8 +218,8 @@ class TableWalker : public MemObject uint8_t texcb() const { return large() ? - (bits(data, 2) | (bits(data,3) << 1) | (bits(data, 14, 12) << 2)) : - (bits(data, 2) | (bits(data,3) << 1) | (bits(data, 8, 6) << 2)); + (bits(data, 2) | (bits(data, 3) << 1) | (bits(data, 14, 12) << 2)) : + (bits(data, 2) | (bits(data, 3) << 1) | (bits(data, 8, 6) << 2)); } /** Return the physical frame, bits shifted right */ -- cgit v1.2.3 From a05032f4df8be28c0ceaefba03114f3e7a5f6ab4 Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: O3: Fix pipeline restart when a table walk completes in the fetch stage. When a table walk is initiated by the fetch stage, the CPU can potentially move to the idle state and never wake up. The fetch stage must call cpu->wakeCPU() when a translation completes (in finishTranslation()). --- src/cpu/o3/fetch_impl.hh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index d0c83d586..2e4e4819e 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -604,6 +604,9 @@ DefaultFetch::finishTranslation(Fault fault, RequestPtr mem_req) ThreadID tid = mem_req->threadId(); Addr block_PC = mem_req->getVaddr(); + // Wake up CPU if it was idle + cpu->wakeCPU(); + // If translation was successful, attempt to read the icache block. if (fault == NoFault) { // Build packet here. @@ -654,6 +657,9 @@ DefaultFetch::finishTranslation(Fault fault, RequestPtr mem_req) instruction->fault = fault; wroteToTimeBuffer = true; + DPRINTF(Activity, "Activity this cycle.\n"); + cpu->activityThisCycle(); + fetchStatus[tid] = TrapPending; DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); -- cgit v1.2.3 From ded4d319f275aa6e1518f760d67b9e0519b31565 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: Serialization: Allow serialization of stl lists --- src/sim/serialize.cc | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/sim/serialize.hh | 8 ++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index d28f335be..44fe7b2e7 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -201,6 +201,23 @@ arrayParamOut(ostream &os, const string &name, const vector ¶m) os << "\n"; } +template +void +arrayParamOut(ostream &os, const string &name, const list ¶m) +{ + typename list::const_iterator it = param.begin(); + + os << name << "="; + if (param.size() > 0) + showParam(os, *it); + it++; + while (it != param.end()) { + os << " "; + showParam(os, *it); + it++; + } + os << "\n"; +} template void @@ -326,6 +343,37 @@ arrayParamIn(Checkpoint *cp, const string §ion, } } +template +void +arrayParamIn(Checkpoint *cp, const string §ion, + const string &name, list ¶m) +{ + string str; + if (!cp->find(section, name, str)) { + fatal("Can't unserialize '%s:%s'\n", section, name); + } + param.clear(); + + vector tokens; + tokenize(tokens, str, ' '); + + for (vector::size_type i = 0; i < tokens.size(); i++) { + T scalar_value = 0; + if (!parseParam(tokens[i], scalar_value)) { + string err("could not parse \""); + + err += str; + err += "\""; + + fatal(err); + } + + // assign parsed value to vector + param.push_back(scalar_value); + } +} + + void objParamIn(Checkpoint *cp, const string §ion, const string &name, SimObject * ¶m) @@ -356,7 +404,13 @@ arrayParamOut(ostream &os, const string &name, \ const vector ¶m); \ template void \ arrayParamIn(Checkpoint *cp, const string §ion, \ - const string &name, vector ¶m); + const string &name, vector ¶m); \ +template void \ +arrayParamOut(ostream &os, const string &name, \ + const list ¶m); \ +template void \ +arrayParamIn(Checkpoint *cp, const string §ion, \ + const string &name, list ¶m); INSTANTIATE_PARAM_TEMPLATES(char) INSTANTIATE_PARAM_TEMPLATES(signed char) diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 5ea632ea4..6be8ce3b6 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -69,6 +69,10 @@ template void arrayParamOut(std::ostream &os, const std::string &name, const std::vector ¶m); +template +void arrayParamOut(std::ostream &os, const std::string &name, + const std::list ¶m); + template void arrayParamIn(Checkpoint *cp, const std::string §ion, const std::string &name, T *param, unsigned size); @@ -77,6 +81,10 @@ template void arrayParamIn(Checkpoint *cp, const std::string §ion, const std::string &name, std::vector ¶m); +template +void arrayParamIn(Checkpoint *cp, const std::string §ion, + const std::string &name, std::list ¶m); + void objParamIn(Checkpoint *cp, const std::string §ion, const std::string &name, SimObject * ¶m); -- cgit v1.2.3 From d33c1d95929356682fb06083d1da2d66605649f4 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: VNC: Add VNC server to M5 --- src/base/SConscript | 1 + src/base/bitmap.cc | 82 ++++++ src/base/bitmap.hh | 114 ++++++++ src/base/compiler.hh | 2 + src/base/vnc/SConscript | 48 ++++ src/base/vnc/VncServer.py | 45 +++ src/base/vnc/convert.cc | 139 +++++++++ src/base/vnc/convert.hh | 141 ++++++++++ src/base/vnc/vncserver.cc | 703 ++++++++++++++++++++++++++++++++++++++++++++++ src/base/vnc/vncserver.hh | 475 +++++++++++++++++++++++++++++++ 10 files changed, 1750 insertions(+) create mode 100644 src/base/bitmap.cc create mode 100644 src/base/bitmap.hh create mode 100644 src/base/vnc/SConscript create mode 100644 src/base/vnc/VncServer.py create mode 100644 src/base/vnc/convert.cc create mode 100644 src/base/vnc/convert.hh create mode 100644 src/base/vnc/vncserver.cc create mode 100644 src/base/vnc/vncserver.hh (limited to 'src') diff --git a/src/base/SConscript b/src/base/SConscript index 2bb6b13ab..3f069bf9e 100644 --- a/src/base/SConscript +++ b/src/base/SConscript @@ -35,6 +35,7 @@ if env['CP_ANNOTATE']: Source('cp_annotate.cc') Source('atomicio.cc') Source('bigint.cc') +Source('bitmap.cc') Source('callback.cc') Source('circlebuf.cc') Source('cprintf.cc') diff --git a/src/base/bitmap.cc b/src/base/bitmap.cc new file mode 100644 index 000000000..0d2a9302b --- /dev/null +++ b/src/base/bitmap.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: William Wang + * Ali Saidi + */ + +#include + +#include "base/bitmap.hh" +#include "base/misc.hh" + +// bitmap class ctor +Bitmap::Bitmap(VideoConvert::Mode _mode, uint16_t w, uint16_t h, uint8_t *d) + : mode(_mode), height(h), width(w), data(d), + vc(mode, VideoConvert::rgb8888, width, height) +{ +} + +void +Bitmap::write(std::ostream *bmp) +{ + assert(data); + + // For further information see: http://en.wikipedia.org/wiki/BMP_file_format + Magic magic = {{'B','M'}}; + Header header = {sizeof(VideoConvert::Rgb8888) * width * height , 0, 0, 54}; + Info info = {sizeof(Info), width, height, 1, + sizeof(VideoConvert::Rgb8888) * 8, 0, + sizeof(VideoConvert::Rgb8888) * width * height, 1, 1, 0, 0}; + + bmp->write(reinterpret_cast(&magic), sizeof(magic)); + bmp->write(reinterpret_cast(&header), sizeof(header)); + bmp->write(reinterpret_cast(&info), sizeof(info)); + + uint8_t *tmp = vc.convert(data); + uint32_t *tmp32 = (uint32_t*)tmp; + + // BMP start store data left to right starting with the bottom row + // so we need to do some creative flipping + for (int i = height - 1; i >= 0; i--) + for (int j = 0; j < width; j++) + bmp->write((char*)&tmp32[i * width + j], sizeof(uint32_t)); + + bmp->flush(); + + delete [] tmp; +} + diff --git a/src/base/bitmap.hh b/src/base/bitmap.hh new file mode 100644 index 000000000..9dfaa87a1 --- /dev/null +++ b/src/base/bitmap.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: William Wang + * Ali Saidi + */ +#ifndef __BASE_BITMAP_HH__ +#define __BASE_BITMAP_HH__ + +#include + +#include "base/vnc/convert.hh" + +/** + * @file Declaration of a class that writes a frame buffer to a bitmap + */ + + +// write frame buffer into a bitmap picture +class Bitmap +{ + public: + /** Create a Bitmap creator that takes data in the given mode & size + * and outputs to an fstream + * @param mode the type of data that is being provided + * @param h the hight of the image + * @param w the width of the image + * @param d the data for the image in mode + */ + Bitmap(VideoConvert::Mode mode, uint16_t w, uint16_t h, uint8_t *d); + + /** Provide the converter with the data that should be output. It will be + * converted into rgb8888 and write out when write() is called. + * @param d the data + */ + void rawData(uint8_t* d) { data = d; } + + /** Write the provided data into the fstream provided + * @param bmp stream to write to + */ + void write(std::ostream *bmp); + + private: + VideoConvert::Mode mode; + uint16_t height; + uint16_t width; + uint8_t *data; + + VideoConvert vc; + + struct Magic + { + unsigned char magic_number[2]; + }; + + struct Header + { + uint32_t size; + uint16_t reserved1; + uint16_t reserved2; + uint32_t offset; + }; + + struct Info + { + uint32_t Size; + uint32_t Width; + uint32_t Height; + uint16_t Planes; + uint16_t BitCount; + uint32_t Compression; + uint32_t SizeImage; + uint32_t XPelsPerMeter; + uint32_t YPelsPerMeter; + uint32_t ClrUsed; + uint32_t ClrImportant; + }; +}; + +#endif // __BASE_BITMAP_HH__ + diff --git a/src/base/compiler.hh b/src/base/compiler.hh index 2c655af60..3315fb2f7 100644 --- a/src/base/compiler.hh +++ b/src/base/compiler.hh @@ -41,6 +41,7 @@ #define M5_PRAGMA_NORETURN(x) #define M5_DUMMY_RETURN #define M5_VAR_USED __attribute__((unused)) +#define M5_ATTR_PACKED __attribute__ ((__packed__)) #elif defined(__SUNPRO_CC) // this doesn't do anything with sun cc, but why not #define M5_ATTR_NORETURN __sun_attr__((__noreturn__)) @@ -48,6 +49,7 @@ #define DO_PRAGMA(x) _Pragma(#x) #define M5_VAR_USED #define M5_PRAGMA_NORETURN(x) DO_PRAGMA(does_not_return(x)) +#define M5_ATTR_PACKED __attribute__ ((__packed__)) #else #error "Need to define compiler options in base/compiler.hh" #endif diff --git a/src/base/vnc/SConscript b/src/base/vnc/SConscript new file mode 100644 index 000000000..c92676555 --- /dev/null +++ b/src/base/vnc/SConscript @@ -0,0 +1,48 @@ +# -*- mode:python -*- + +# Copyright (c) 2010 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: William Wang + +Import('*') + +if env['FULL_SYSTEM']: + SimObject('VncServer.py') + Source('vncserver.cc') + TraceFlag('VNC') + +Source('convert.cc') + diff --git a/src/base/vnc/VncServer.py b/src/base/vnc/VncServer.py new file mode 100644 index 000000000..21eb3ed28 --- /dev/null +++ b/src/base/vnc/VncServer.py @@ -0,0 +1,45 @@ +# Copyright (c) 2010 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: William Wang + +from m5.SimObject import SimObject +from m5.params import * +from m5.proxy import * + +class VncServer(SimObject): + type = 'VncServer' + port = Param.TcpPort(5900, "listen port") + number = Param.Int(0, "vnc client number") diff --git a/src/base/vnc/convert.cc b/src/base/vnc/convert.cc new file mode 100644 index 000000000..ea7a9b1c5 --- /dev/null +++ b/src/base/vnc/convert.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * William Wang + */ + +#include + +#include "base/misc.hh" +#include "base/vnc/convert.hh" + +/** @file + * This file provides conversion functions for a variety of video modes + */ + +VideoConvert::VideoConvert(Mode input_mode, Mode output_mode, int _width, + int _height) + : inputMode(input_mode), outputMode(output_mode), width(_width), + height(_height) +{ + if (inputMode != bgr565 && inputMode != rgb565 && inputMode != bgr8888) + fatal("Only support converting from bgr565, rdb565, and bgr8888\n"); + + if (outputMode != rgb8888) + fatal("Only support converting to rgb8888\n"); + + assert(0 < height && height < 4000); + assert(0 < width && width < 4000); +} + +VideoConvert::~VideoConvert() +{ +} + +uint8_t* +VideoConvert::convert(uint8_t *fb) +{ + switch (inputMode) { + case bgr565: + return m565rgb8888(fb, true); + case rgb565: + return m565rgb8888(fb, false); + case bgr8888: + return bgr8888rgb8888(fb); + default: + panic("Unimplemented Mode\n"); + } +} + +uint8_t* +VideoConvert::m565rgb8888(uint8_t *fb, bool bgr) +{ + uint8_t *out = new uint8_t[area() * sizeof(uint32_t)]; + uint32_t *out32 = (uint32_t*)out; + + uint16_t *in16 = (uint16_t*)fb; + + for (int x = 0; x < area(); x++) { + Bgr565 inpx; + Rgb8888 outpx = 0; + + inpx = in16[x]; + + if (bgr) { + outpx.red = inpx.blue << 3; + outpx.green = inpx.green << 2; + outpx.blue = inpx.red << 3; + } else { + outpx.blue = inpx.blue << 3; + outpx.green = inpx.green << 2; + outpx.red = inpx.red << 3; + } + + out32[x] = outpx; + } + + return out; +} + + +uint8_t* +VideoConvert::bgr8888rgb8888(uint8_t *fb) +{ + uint8_t *out = new uint8_t[area() * sizeof(uint32_t)]; + uint32_t *out32 = (uint32_t*)out; + + uint32_t *in32 = (uint32_t*)fb; + + for (int x = 0; x < area(); x++) { + Rgb8888 outpx = 0; + Bgr8888 inpx; + + + inpx = in32[x]; + + outpx.red = inpx.blue; + outpx.green = inpx.green; + outpx.blue = inpx.red; + + out32[x] = outpx; + } + + return out; +} + diff --git a/src/base/vnc/convert.hh b/src/base/vnc/convert.hh new file mode 100644 index 000000000..68a21d677 --- /dev/null +++ b/src/base/vnc/convert.hh @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +/** @file + * This file provides conversion functions for a variety of video modes + */ + +#ifndef __BASE_VNC_CONVERT_HH__ +#define __BASE_VNC_CONVERT_HH__ + +#include "base/bitunion.hh" + +class VideoConvert +{ + public: + enum Mode { + UnknownMode, + bgr565, + rgb565, + bgr8888, + rgb8888, + rgb888, + bgr888, + bgr444, + bgr4444, + rgb444, + rgb4444, + }; + + // supports bpp32 RGB (bmp) and bpp16 5:6:5 mode BGR (linux) + BitUnion32(Rgb8888) + Bitfield<7,0> blue; + Bitfield<15,8> green; + Bitfield<23,16> red; + Bitfield<31,24> alpha; + EndBitUnion(Rgb8888) + + BitUnion32(Bgr8888) + Bitfield<7,0> red; + Bitfield<15,8> green; + Bitfield<23,16> blue; + Bitfield<31,24> alpha; + EndBitUnion(Bgr8888) + + BitUnion16(Bgr565) + Bitfield<4,0> red; + Bitfield<10,5> green; + Bitfield<15,11> blue; + EndBitUnion(Bgr565) + + BitUnion16(Rgb565) + Bitfield<4,0> red; + Bitfield<10,5> green; + Bitfield<15,11> blue; + EndBitUnion(Rgb565) + + /** Setup the converter with the given parameters + * @param input_mode type of data that will be provided + * @param output_mode type of data that should be output + * @param _width width of the frame buffer + * @param _height height of the frame buffer + */ + VideoConvert(Mode input_mode, Mode output_mode, int _width, int _height); + + /** Destructor + */ + ~VideoConvert(); + + /** Convert the provided frame buffer data into the format specified in the + * constructor. + * @param fb the frame buffer to convert + * @return the converted data (user must free) + */ + uint8_t* convert(uint8_t *fb); + + /** Return the number of pixels that this buffer specifies + * @return number of pixels + */ + int area() { return width * height; } + + private: + + /** + * Convert a bgr8888 input to rgb8888. + * @param fb the data to convert + * @return converted data + */ + uint8_t* bgr8888rgb8888(uint8_t *fb); + + /** + * Convert a bgr565 or rgb565 input to rgb8888. + * @param fb the data to convert + * @param bgr true if the input data is bgr565 + * @return converted data + */ + uint8_t* m565rgb8888(uint8_t *fb, bool bgr); + + Mode inputMode; + Mode outputMode; + int width; + int height; +}; + +#endif // __BASE_VNC_CONVERT_HH__ + diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc new file mode 100644 index 000000000..8936fa67b --- /dev/null +++ b/src/base/vnc/vncserver.cc @@ -0,0 +1,703 @@ +/* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * William Wang + */ + +/** @file + * Implementiation of a VNC server + */ + +#include + +#include +#include +#include +#include +#include + +#include "base/atomicio.hh" +#include "base/misc.hh" +#include "base/socket.hh" +#include "base/trace.hh" +#include "base/vnc/vncserver.hh" +#include "sim/byteswap.hh" + +using namespace std; + +/** + * Poll event for the listen socket + */ +VncServer::ListenEvent::ListenEvent(VncServer *vs, int fd, int e) + : PollEvent(fd, e), vncserver(vs) +{ +} + +void +VncServer::ListenEvent::process(int revent) +{ + vncserver->accept(); +} + +/** + * Poll event for the data socket + */ +VncServer::DataEvent::DataEvent(VncServer *vs, int fd, int e) + : PollEvent(fd, e), vncserver(vs) +{ +} + +void +VncServer::DataEvent::process(int revent) +{ + if (revent & POLLIN) + vncserver->data(); + else if (revent & POLLNVAL) + vncserver->detach(); +} + +/** + * VncServer + */ +VncServer::VncServer(const Params *p) + : SimObject(p), listenEvent(NULL), dataEvent(NULL), number(p->number), + dataFd(-1), _videoWidth(1), _videoHeight(1), clientRfb(0), keyboard(NULL), + mouse(NULL), sendUpdate(false), videoMode(VideoConvert::UnknownMode), + vc(NULL) +{ + if (p->port) + listen(p->port); + + curState = WaitForProtocolVersion; + + + // currently we only support this one pixel format + // unpacked 32bit rgb (rgb888 + 8 bits of nothing/alpha) + // keep it around for telling the client and making + // sure the client cooperates + pixelFormat.bpp = 32; + pixelFormat.depth = 24; + pixelFormat.bigendian = 0; + pixelFormat.truecolor = 1; + pixelFormat.redmax = 0xff; + pixelFormat.greenmax = 0xff; + pixelFormat.bluemax = 0xff; + pixelFormat.redshift = 16; + pixelFormat.greenshift = 8; + pixelFormat.blueshift = 0; + + + DPRINTF(VNC, "Vnc server created at port %d\n", p->port); +} + +VncServer::~VncServer() +{ + if (dataFd != -1) + ::close(dataFd); + + if (listenEvent) + delete listenEvent; + + if (dataEvent) + delete dataEvent; +} + + +//socket creation and vnc client attach +void +VncServer::listen(int port) +{ + if (ListenSocket::allDisabled()) { + warn_once("Sockets disabled, not accepting vnc client connections"); + return; + } + + while (!listener.listen(port, true)) { + DPRINTF(VNC, + "can't bind address vnc server port %d in use PID %d\n", + port, getpid()); + port++; + } + + int p1, p2; + p2 = name().rfind('.') - 1; + p1 = name().rfind('.', p2); + ccprintf(cerr, "Listening for %s connection on port %d\n", + name().substr(p1 + 1, p2 - p1), port); + + listenEvent = new ListenEvent(this, listener.getfd(), POLLIN); + pollQueue.schedule(listenEvent); +} + +// attach a vnc client +void +VncServer::accept() +{ + if (!listener.islistening()) + panic("%s: cannot accept a connection if not listening!", name()); + + int fd = listener.accept(true); + if (dataFd != -1) { + char message[] = "vnc server already attached!\n"; + atomic_write(fd, message, sizeof(message)); + ::close(fd); + return; + } + + dataFd = fd; + + // Send our version number to the client + write((uint8_t*)vncVersion(), strlen(vncVersion())); + + // read the client response + dataEvent = new DataEvent(this, dataFd, POLLIN); + pollQueue.schedule(dataEvent); + + inform("VNC client attached\n"); +} + +// data called by data event +void +VncServer::data() +{ + // We have new data, see if we can handle it + size_t len; + DPRINTF(VNC, "Vnc client message recieved\n"); + + switch (curState) { + case WaitForProtocolVersion: + checkProtocolVersion(); + break; + case WaitForSecurityResponse: + checkSecurity(); + break; + case WaitForClientInit: + // Don't care about shared, just need to read it out of the socket + uint8_t shared; + len = read(&shared); + assert(len == 1); + + // Send our idea of the frame buffer + sendServerInit(); + + break; + case NormalPhase: + uint8_t message_type; + len = read(&message_type); + if (!len) { + detach(); + return; + } + assert(len == 1); + + switch (message_type) { + case ClientSetPixelFormat: + setPixelFormat(); + break; + case ClientSetEncodings: + setEncodings(); + break; + case ClientFrameBufferUpdate: + requestFbUpdate(); + break; + case ClientKeyEvent: + recvKeyboardInput(); + break; + case ClientPointerEvent: + recvPointerInput(); + break; + case ClientCutText: + recvCutText(); + break; + default: + panic("Unimplemented message type recv from client: %d\n", + message_type); + break; + } + break; + default: + panic("Unknown vnc server state\n"); + } +} + + +// read from socket +size_t +VncServer::read(uint8_t *buf, size_t len) +{ + if (dataFd < 0) + panic("vnc not properly attached.\n"); + + size_t ret; + do { + ret = ::read(dataFd, buf, len); + } while (ret == -1 && errno == EINTR); + + + if (ret <= 0){ + DPRINTF(VNC, "Read failed.\n"); + detach(); + return 0; + } + + return ret; +} + +size_t +VncServer::read1(uint8_t *buf, size_t len) +{ + size_t read_len M5_VAR_USED; + read_len = read(buf + 1, len - 1); + assert(read_len == len - 1); + return read_len; +} + + +template +size_t +VncServer::read(T* val) +{ + return read((uint8_t*)val, sizeof(T)); +} + +// write to socket +size_t +VncServer::write(const uint8_t *buf, size_t len) +{ + if (dataFd < 0) + panic("Vnc client not properly attached.\n"); + + ssize_t ret; + ret = atomic_write(dataFd, buf, len); + + if (ret < len) + detach(); + + return ret; +} + +template +size_t +VncServer::write(T* val) +{ + return write((uint8_t*)val, sizeof(T)); +} + +size_t +VncServer::write(const char* str) +{ + return write((uint8_t*)str, strlen(str)); +} + +// detach a vnc client +void +VncServer::detach() +{ + if (dataFd != -1) { + ::close(dataFd); + dataFd = -1; + } + + if (!dataEvent || !dataEvent->queued()) + return; + + pollQueue.remove(dataEvent); + delete dataEvent; + dataEvent = NULL; + curState = WaitForProtocolVersion; + + inform("VNC client detached\n"); + DPRINTF(VNC, "detach vnc client %d\n", number); +} + +void +VncServer::sendError(const char* error_msg) +{ + uint32_t len = strlen(error_msg); + write(&len); + write(error_msg); +} + +void +VncServer::checkProtocolVersion() +{ + assert(curState == WaitForProtocolVersion); + + size_t len M5_VAR_USED; + char version_string[13]; + + // Null terminate the message so it's easier to work with + version_string[12] = 0; + + len = read((uint8_t*)version_string, 12); + assert(len == 12); + + uint32_t major, minor; + + // Figure out the major/minor numbers + if (sscanf(version_string, "RFB %03d.%03d\n", &major, &minor) != 2) { + warn(" Malformed protocol version %s\n", version_string); + sendError("Malformed protocol version\n"); + detach(); + } + + DPRINTF(VNC, "Client request protocol version %d.%d\n", major, minor); + + // If it's not 3.X we don't support it + if (major != 3 || minor < 2) { + warn("Unsupported VNC client version... disconnecting\n"); + uint8_t err = AuthInvalid; + write(&err); + detach(); + } + // Auth is different based on version number + if (minor < 7) { + uint32_t sec_type = htobe((uint32_t)AuthNone); + write(&sec_type); + } else { + uint8_t sec_cnt = 1; + uint8_t sec_type = htobe((uint8_t)AuthNone); + write(&sec_cnt); + write(&sec_type); + } + + // Wait for client to respond + curState = WaitForSecurityResponse; +} + +void +VncServer::checkSecurity() +{ + assert(curState == WaitForSecurityResponse); + + uint8_t security_type; + size_t len M5_VAR_USED = read(&security_type); + + assert(len == 1); + + if (security_type != AuthNone) { + warn("Unknown VNC security type\n"); + sendError("Unknown security type\n"); + } + + DPRINTF(VNC, "Sending security auth OK\n"); + + uint32_t success = htobe(VncOK); + write(&success); + curState = WaitForClientInit; +} + +void +VncServer::sendServerInit() +{ + ServerInitMsg msg; + + DPRINTF(VNC, "Sending server init message to client\n"); + + msg.fbWidth = htobe(videoWidth()); + msg.fbHeight = htobe(videoHeight()); + + msg.px.bpp = htobe(pixelFormat.bpp); + msg.px.depth = htobe(pixelFormat.depth); + msg.px.bigendian = htobe(pixelFormat.bigendian); + msg.px.truecolor = htobe(pixelFormat.truecolor); + msg.px.redmax = htobe(pixelFormat.redmax); + msg.px.greenmax = htobe(pixelFormat.greenmax); + msg.px.bluemax = htobe(pixelFormat.bluemax); + msg.px.redshift = htobe(pixelFormat.redshift); + msg.px.greenshift = htobe(pixelFormat.greenshift); + msg.px.blueshift = htobe(pixelFormat.blueshift); + memset(msg.px.padding, 0, 3); + msg.namelen = 2; + msg.namelen = htobe(msg.namelen); + memcpy(msg.name, "M5", 2); + + write(&msg); + curState = NormalPhase; +} + + +void +VncServer::setPixelFormat() +{ + DPRINTF(VNC, "Received pixel format from client message\n"); + + PixelFormatMessage pfm; + read1((uint8_t*)&pfm, sizeof(PixelFormatMessage)); + + DPRINTF(VNC, " -- bpp = %d; depth = %d; be = %d\n", pfm.px.bpp, + pfm.px.depth, pfm.px.bigendian); + DPRINTF(VNC, " -- true color = %d red,green,blue max = %d,%d,%d\n", + pfm.px.truecolor, betoh(pfm.px.redmax), betoh(pfm.px.greenmax), + betoh(pfm.px.bluemax)); + DPRINTF(VNC, " -- red,green,blue shift = %d,%d,%d\n", pfm.px.redshift, + pfm.px.greenshift, pfm.px.blueshift); + + if (betoh(pfm.px.bpp) != pixelFormat.bpp || + betoh(pfm.px.depth) != pixelFormat.depth || + betoh(pfm.px.bigendian) != pixelFormat.bigendian || + betoh(pfm.px.truecolor) != pixelFormat.truecolor || + betoh(pfm.px.redmax) != pixelFormat.redmax || + betoh(pfm.px.greenmax) != pixelFormat.greenmax || + betoh(pfm.px.bluemax) != pixelFormat.bluemax || + betoh(pfm.px.redshift) != pixelFormat.redshift || + betoh(pfm.px.greenshift) != pixelFormat.greenshift || + betoh(pfm.px.blueshift) != pixelFormat.blueshift) + fatal("VNC client doesn't support true color raw encoding\n"); +} + +void +VncServer::setEncodings() +{ + DPRINTF(VNC, "Received supported encodings from client\n"); + + PixelEncodingsMessage pem; + read1((uint8_t*)&pem, sizeof(PixelEncodingsMessage)); + + pem.num_encodings = betoh(pem.num_encodings); + + DPRINTF(VNC, " -- %d encoding present\n", pem.num_encodings); + supportsRawEnc = supportsResizeEnc = false; + + for (int x = 0; x < pem.num_encodings; x++) { + int32_t encoding; + size_t len M5_VAR_USED; + len = read(&encoding); + assert(len == sizeof(encoding)); + DPRINTF(VNC, " -- supports %d\n", betoh(encoding)); + + switch (betoh(encoding)) { + case EncodingRaw: + supportsRawEnc = true; + break; + case EncodingDesktopSize: + supportsResizeEnc = true; + break; + } + } + + if (!supportsRawEnc) + fatal("VNC clients must always support raw encoding\n"); +} + +void +VncServer::requestFbUpdate() +{ + DPRINTF(VNC, "Received frame buffer update request from client\n"); + + FrameBufferUpdateReq fbr; + read1((uint8_t*)&fbr, sizeof(FrameBufferUpdateReq)); + + fbr.x = betoh(fbr.x); + fbr.y = betoh(fbr.y); + fbr.width = betoh(fbr.width); + fbr.height = betoh(fbr.height); + + DPRINTF(VNC, " -- x = %d y = %d w = %d h = %d\n", fbr.x, fbr.y, fbr.width, + fbr.height); + + sendFrameBufferUpdate(); +} + +void +VncServer::recvKeyboardInput() +{ + DPRINTF(VNC, "Received keyboard input from client\n"); + KeyEventMessage kem; + read1((uint8_t*)&kem, sizeof(KeyEventMessage)); + + kem.key = betoh(kem.key); + DPRINTF(VNC, " -- received key code %d (%s)\n", kem.key, kem.down_flag ? + "down" : "up"); + + if (keyboard) + keyboard->keyPress(kem.key, kem.down_flag); +} + +void +VncServer::recvPointerInput() +{ + DPRINTF(VNC, "Received pointer input from client\n"); + PointerEventMessage pem; + + read1((uint8_t*)&pem, sizeof(PointerEventMessage));; + + pem.x = betoh(pem.x); + pem.y = betoh(pem.y); + DPRINTF(VNC, " -- pointer at x = %d y = %d buttons = %#x\n", pem.x, pem.y, + pem.button_mask); + + if (mouse) + mouse->mouseAt(pem.x, pem.y, pem.button_mask); +} + +void +VncServer::recvCutText() +{ + DPRINTF(VNC, "Received client copy buffer message\n"); + + ClientCutTextMessage cct; + read1((uint8_t*)&cct, sizeof(ClientCutTextMessage)); + + char str[1025]; + size_t data_len = betoh(cct.length); + DPRINTF(VNC, "String length %d\n", data_len); + while (data_len > 0) { + size_t len; + size_t bytes_to_read = data_len > 1024 ? 1024 : data_len; + len = read((uint8_t*)&str, bytes_to_read); + str[bytes_to_read] = 0; + data_len -= len; + assert(data_len >= 0); + DPRINTF(VNC, "Buffer: %s\n", str); + } + +} + + +void +VncServer::sendFrameBufferUpdate() +{ + + if (!clientRfb || dataFd <= 0 || curState != NormalPhase || !sendUpdate) { + DPRINTF(VNC, "NOT sending framebuffer update\n"); + return; + } + + assert(vc); + + // The client will request data constantly, unless we throttle it + sendUpdate = false; + + DPRINTF(VNC, "Sending framebuffer update\n"); + + FrameBufferUpdate fbu; + FrameBufferRect fbr; + + fbu.type = ServerFrameBufferUpdate; + fbu.num_rects = 1; + fbr.x = 0; + fbr.y = 0; + fbr.width = videoWidth(); + fbr.height = videoHeight(); + fbr.encoding = EncodingRaw; + + // fix up endian + fbu.num_rects = htobe(fbu.num_rects); + fbr.x = htobe(fbr.x); + fbr.y = htobe(fbr.y); + fbr.width = htobe(fbr.width); + fbr.height = htobe(fbr.height); + fbr.encoding = htobe(fbr.encoding); + + // send headers to client + write(&fbu); + write(&fbr); + + assert(clientRfb); + + uint8_t *tmp = vc->convert(clientRfb); + write(tmp, videoWidth() * videoHeight() * sizeof(uint32_t)); + delete [] tmp; + +} + +void +VncServer::sendFrameBufferResized() +{ + assert(clientRfb && dataFd > 0 && curState == NormalPhase); + DPRINTF(VNC, "Sending framebuffer resize\n"); + + FrameBufferUpdate fbu; + FrameBufferRect fbr; + + fbu.type = ServerFrameBufferUpdate; + fbu.num_rects = 1; + fbr.x = 0; + fbr.y = 0; + fbr.width = videoWidth(); + fbr.height = videoHeight(); + fbr.encoding = EncodingDesktopSize; + + // fix up endian + fbu.num_rects = htobe(fbu.num_rects); + fbr.x = htobe(fbr.x); + fbr.y = htobe(fbr.y); + fbr.width = htobe(fbr.width); + fbr.height = htobe(fbr.height); + fbr.encoding = htobe(fbr.encoding); + + // send headers to client + write(&fbu); + write(&fbr); + + // No actual data is sent in this message +} + +void +VncServer::setFrameBufferParams(VideoConvert::Mode mode, int width, int height) +{ + DPRINTF(VNC, "Updating video params: mode: %d width: %d height: %d\n", mode, + width, height); + + if (mode != videoMode || width != videoWidth() || height != videoHeight()) { + videoMode = mode; + _videoWidth = width; + _videoHeight = height; + + if (vc) + delete vc; + + vc = new VideoConvert(mode, VideoConvert::rgb8888, videoWidth(), + videoHeight()); + + if (dataFd > 0 && clientRfb && curState == NormalPhase) { + if (supportsResizeEnc) + sendFrameBufferResized(); + else + // The frame buffer changed size and we can't update the client + detach(); + } + } +} + +// create the VNC server object +VncServer * +VncServerParams::create() +{ + return new VncServer(this); +} diff --git a/src/base/vnc/vncserver.hh b/src/base/vnc/vncserver.hh new file mode 100644 index 000000000..23b097b11 --- /dev/null +++ b/src/base/vnc/vncserver.hh @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * William Wang + */ + +/** @file + * Declaration of a VNC server + */ + +#ifndef __DEV_VNC_SERVER_HH__ +#define __DEV_VNC_SERVER_HH__ + +#include + +#include "base/circlebuf.hh" +#include "base/pollevent.hh" +#include "base/socket.hh" +#include "base/vnc/convert.hh" +#include "cpu/intr_control.hh" +#include "sim/sim_object.hh" +#include "params/VncServer.hh" + +/** + * A device that expects to receive input from the vnc server should derrive + * (through mulitple inheritence if necessary from VncKeyboard or VncMouse + * and call setKeyboard() or setMouse() respectively on the vnc server. + */ +class VncKeyboard +{ + public: + /** + * Called when the vnc server receives a key press event from the + * client. + * @param key the key passed is an x11 keysym + * @param down is the key now down or up? + */ + virtual void keyPress(uint32_t key, bool down) = 0; +}; + +class VncMouse +{ + public: + /** + * called whenever the mouse moves or it's button state changes + * buttons is a simple mask with each button (0-8) corresponding to + * a bit position in the byte with 1 being down and 0 being up + * @param x the x position of the mouse + * @param y the y position of the mouse + * @param buttos the button state as described above + */ + virtual void mouseAt(uint16_t x, uint16_t y, uint8_t buttons) = 0; +}; + +class VncServer : public SimObject +{ + public: + + /** + * \defgroup VncConstants A set of constants and structs from the VNC spec + * @{ + */ + /** Authentication modes */ + const static uint32_t AuthInvalid = 0; + const static uint32_t AuthNone = 1; + + /** Error conditions */ + const static uint32_t VncOK = 0; + + /** Client -> Server message IDs */ + enum ClientMessages { + ClientSetPixelFormat = 0, + ClientSetEncodings = 2, + ClientFrameBufferUpdate = 3, + ClientKeyEvent = 4, + ClientPointerEvent = 5, + ClientCutText = 6 + }; + + /** Server -> Client message IDs */ + enum ServerMessages { + ServerFrameBufferUpdate = 0, + ServerSetColorMapEntries = 1, + ServerBell = 2, + ServerCutText = 3 + }; + + /** Encoding types */ + enum EncodingTypes { + EncodingRaw = 0, + EncodingCopyRect = 1, + EncodingHextile = 5, + EncodingDesktopSize = -223 + }; + + /** keyboard/mouse support */ + enum MouseEvents { + MouseLeftButton = 0x1, + MouseRightButton = 0x2, + MouseMiddleButton = 0x4 + }; + + const char* vncVersion() const + { + return "RFB 003.008\n"; + } + + enum ConnectionState { + WaitForProtocolVersion, + WaitForSecurityResponse, + WaitForClientInit, + InitializationPhase, + NormalPhase + }; + + struct PixelFormat { + uint8_t bpp; + uint8_t depth; + uint8_t bigendian; + uint8_t truecolor; + uint16_t redmax; + uint16_t greenmax; + uint16_t bluemax; + uint8_t redshift; + uint8_t greenshift; + uint8_t blueshift; + uint8_t padding[3]; + } M5_ATTR_PACKED; + + struct ServerInitMsg { + uint16_t fbWidth; + uint16_t fbHeight; + PixelFormat px; + uint32_t namelen; + char name[2]; // just to put M5 in here + } M5_ATTR_PACKED; + + struct PixelFormatMessage { + uint8_t type; + uint8_t padding[3]; + PixelFormat px; + } M5_ATTR_PACKED; + + struct PixelEncodingsMessage { + uint8_t type; + uint8_t padding; + uint16_t num_encodings; + } M5_ATTR_PACKED; + + struct FrameBufferUpdateReq { + uint8_t type; + uint8_t incremental; + uint16_t x; + uint16_t y; + uint16_t width; + uint16_t height; + } M5_ATTR_PACKED; + + struct KeyEventMessage { + uint8_t type; + uint8_t down_flag; + uint8_t padding[2]; + uint32_t key; + } M5_ATTR_PACKED; + + struct PointerEventMessage { + uint8_t type; + uint8_t button_mask; + uint16_t x; + uint16_t y; + } M5_ATTR_PACKED; + + struct ClientCutTextMessage { + uint8_t type; + uint8_t padding[3]; + uint32_t length; + } M5_ATTR_PACKED; + + struct FrameBufferUpdate { + uint8_t type; + uint8_t padding; + uint16_t num_rects; + } M5_ATTR_PACKED; + + struct FrameBufferRect { + uint16_t x; + uint16_t y; + uint16_t width; + uint16_t height; + int32_t encoding; + } M5_ATTR_PACKED; + + struct ServerCutText { + uint8_t type; + uint8_t padding[3]; + uint32_t length; + } M5_ATTR_PACKED; + + /** @} */ + + protected: + /** ListenEvent to accept a vnc client connection */ + class ListenEvent: public PollEvent + { + protected: + VncServer *vncserver; + + public: + ListenEvent(VncServer *vs, int fd, int e); + void process(int revent); + }; + + friend class ListenEvent; + ListenEvent *listenEvent; + + /** DataEvent to read data from vnc */ + class DataEvent: public PollEvent + { + protected: + VncServer *vncserver; + + public: + DataEvent(VncServer *vs, int fd, int e); + void process(int revent); + }; + + friend class DataEvent; + DataEvent *dataEvent; + + int number; + int dataFd; // data stream file describer + + ListenSocket listener; + + void listen(int port); + void accept(); + void data(); + void detach(); + + public: + typedef VncServerParams Params; + VncServer(const Params *p); + ~VncServer(); + + // RFB + protected: + + /** The rfb prototol state the connection is in */ + ConnectionState curState; + + /** the width of the frame buffer we are sending to the client */ + uint16_t _videoWidth; + + /** the height of the frame buffer we are sending to the client */ + uint16_t _videoHeight; + + /** pointer to the actual data that is stored in the frame buffer device */ + uint8_t* clientRfb; + + /** The device to notify when we get key events */ + VncKeyboard *keyboard; + + /** The device to notify when we get mouse events */ + VncMouse *mouse; + + /** An update needs to be sent to the client. Without doing this the + * client will constantly request data that is pointless */ + bool sendUpdate; + + /** The one and only pixel format we support */ + PixelFormat pixelFormat; + + /** If the vnc client supports receiving raw data. It always should */ + bool supportsRawEnc; + + /** If the vnc client supports the desktop resize command */ + bool supportsResizeEnc; + + /** The mode of data we're getting frame buffer in */ + VideoConvert::Mode videoMode; + + /** The video converter that transforms data for us */ + VideoConvert *vc; + + protected: + /** + * vnc client Interface + */ + + /** Send an error message to the client + * @param error_msg text to send describing the error + */ + void sendError(const char* error_msg); + + /** Read some data from the client + * @param buf the data to read + * @param len the amount of data to read + * @return length read + */ + size_t read(uint8_t *buf, size_t len); + + /** Read len -1 bytes from the client into the buffer provided + 1 + * assert that we read enough bytes. This function exists to handle + * reading all of the protocol structs above when we've already read + * the first byte which describes which one we're reading + * @param buf the address of the buffer to add one to and read data into + * @param len the amount of data + 1 to read + * @return length read + */ + size_t read1(uint8_t *buf, size_t len); + + + /** Templated version of the read function above to + * read simple data to the client + * @param val data to recv from the client + */ + template size_t read(T* val); + + + /** Write a buffer to the client. + * @param buf buffer to send + * @param len length of the buffer + * @return number of bytes sent + */ + size_t write(const uint8_t *buf, size_t len); + + /** Templated version of the write function above to + * write simple data to the client + * @param val data to send to the client + */ + template size_t write(T* val); + + /** Send a string to the client + * @param str string to transmit + */ + size_t write(const char* str); + + /** Check the client's protocol verion for compatibility and send + * the security types we support + */ + void checkProtocolVersion(); + + /** Check that the security exchange was successful + */ + void checkSecurity(); + + /** Send client our idea about what the frame buffer looks like */ + void sendServerInit(); + + /** Send an error message to the client when something goes wrong + * @param error_msg error to send + */ + void sendError(std::string error_msg); + + /** Send a updated frame buffer to the client. + * @todo this doesn't do anything smart and just sends the entire image + */ + void sendFrameBufferUpdate(); + + /** Receive pixel foramt message from client and process it. */ + void setPixelFormat(); + + /** Receive encodings message from client and process it. */ + void setEncodings(); + + /** Receive message from client asking for updated frame buffer */ + void requestFbUpdate(); + + /** Receive message from client providing new keyboard input */ + void recvKeyboardInput(); + + /** Recv message from client providing new mouse movement or button click */ + void recvPointerInput(); + + /** Receive message from client that there is text in it's paste buffer. + * This is a no-op at the moment, but perhaps we would want to be able to + * paste it at some point. + */ + void recvCutText(); + + /** Tell the client that the frame buffer resized. This happens when the + * simulated system changes video modes (E.g. X11 starts). + */ + void sendFrameBufferResized(); + + public: + /** Set the address of the frame buffer we are going to show. + * To avoid copying, just have the display controller + * tell us where the data is instead of constanly copying it around + * @param rfb frame buffer that we're going to use + */ + void + setFramebufferAddr(uint8_t* rfb) + { + clientRfb = rfb; + } + + /** Set up the device that would like to receive notifications when keys are + * pressed in the vnc client keyboard + * @param _keyboard an object that derrives from VncKeyboard + */ + void setKeyboard(VncKeyboard *_keyboard) { keyboard = _keyboard; } + + /** Setup the device that would like to receive notifications when mouse + * movements or button presses are received from the vnc client. + * @param _mouse an object that derrives from VncMouse + */ + void setMouse(VncMouse *_mouse) { mouse = _mouse; } + + /** The frame buffer uses this call to notify the vnc server that + * the frame buffer has been updated and a new image needs to be sent to the + * client + */ + void + setDirty() + { + sendUpdate = true; + sendFrameBufferUpdate(); + } + + /** What is the width of the screen we're displaying. + * This is used for pointer/tablet devices that need to know to calculate + * the correct value to send to the device driver. + * @return the width of the simulated screen + */ + uint16_t videoWidth() { return _videoWidth; } + + /** What is the height of the screen we're displaying. + * This is used for pointer/tablet devices that need to know to calculate + * the correct value to send to the device driver. + * @return the height of the simulated screen + */ + uint16_t videoHeight() { return _videoHeight; } + + /** Set the mode of the data the frame buffer will be sending us + * @param mode the mode + */ + void setFrameBufferParams(VideoConvert::Mode mode, int width, int height); +}; + +#endif -- cgit v1.2.3 From d4df9e763ce9cb540af9d5bd59498833dfa1726e Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 11 Feb 2011 18:29:36 -0600 Subject: VNC/ARM: Use VNC server and add support to boot into X11 --- src/dev/SConscript | 1 + src/dev/arm/RealView.py | 24 +- src/dev/arm/amba_device.cc | 8 + src/dev/arm/amba_device.hh | 13 + src/dev/arm/kmi.cc | 266 ++++++++++++++++---- src/dev/arm/kmi.hh | 103 +++++--- src/dev/arm/pl111.cc | 612 ++++++++++++++++++++++++--------------------- src/dev/arm/pl111.hh | 178 ++++++------- src/dev/arm/rv_ctrl.cc | 30 +++ src/dev/arm/rv_ctrl.hh | 10 +- src/dev/ps2.cc | 200 +++++++++++++++ src/dev/ps2.hh | 94 +++++++ 12 files changed, 1071 insertions(+), 468 deletions(-) create mode 100644 src/dev/ps2.cc create mode 100644 src/dev/ps2.hh (limited to 'src') diff --git a/src/dev/SConscript b/src/dev/SConscript index 7cdea7961..5243da683 100644 --- a/src/dev/SConscript +++ b/src/dev/SConscript @@ -69,6 +69,7 @@ if env['FULL_SYSTEM']: Source('pcidev.cc') Source('pktfifo.cc') Source('platform.cc') + Source('ps2.cc') Source('simple_disk.cc') Source('sinic.cc') Source('terminal.cc') diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py index cdc06e4ef..ef3f68a88 100644 --- a/src/dev/arm/RealView.py +++ b/src/dev/arm/RealView.py @@ -52,6 +52,14 @@ class AmbaDevice(BasicPioDevice): abstract = True amba_id = Param.UInt32("ID of AMBA device for kernel detection") +class AmbaIntDevice(AmbaDevice): + type = 'AmbaIntDevice' + abstract = True + gic = Param.Gic(Parent.any, "Gic to use for interrupting") + int_num = Param.UInt32("Interrupt number that connects to GIC") + int_delay = Param.Latency("100ns", + "Time between action and interrupt generation by device") + class AmbaDmaDevice(DmaDevice): type = 'AmbaDmaDevice' abstract = True @@ -94,16 +102,17 @@ class Sp804(AmbaDevice): clock1 = Param.Clock('1MHz', "Clock speed of the input") amba_id = 0x00141804 -class Pl050(AmbaDevice): +class Pl050(AmbaIntDevice): type = 'Pl050' - gic = Param.Gic(Parent.any, "Gic to use for interrupting") - int_num = Param.UInt32("Interrupt number that connects to GIC") - int_delay = Param.Latency("100ns", "Time between action and interrupt generation by UART") + vnc = Param.VncServer(Parent.any, "Vnc server for remote frame buffer display") + is_mouse = Param.Bool(False, "Is this interface a mouse, if not a keyboard") + int_delay = '1us' amba_id = 0x00141050 class Pl111(AmbaDmaDevice): type = 'Pl111' clock = Param.Clock('24MHz', "Clock speed of the input") + vnc = Param.VncServer(Parent.any, "Vnc server for remote frame buffer display") amba_id = 0x00141111 class RealView(Platform): @@ -121,7 +130,7 @@ class RealViewPBX(RealView): timer1 = Sp804(int_num0=37, int_num1=37, pio_addr=0x10012000) clcd = Pl111(pio_addr=0x10020000, int_num=55) kmi0 = Pl050(pio_addr=0x10006000, int_num=52) - kmi1 = Pl050(pio_addr=0x10007000, int_num=53) + kmi1 = Pl050(pio_addr=0x10007000, int_num=53, is_mouse=True) l2x0_fake = IsaFake(pio_addr=0x1f002000, pio_size=0xfff) flash_fake = IsaFake(pio_addr=0x40000000, pio_size=0x4000000) @@ -140,7 +149,7 @@ class RealViewPBX(RealView): aaci_fake = AmbaFake(pio_addr=0x10004000) mmc_fake = AmbaFake(pio_addr=0x10005000) rtc_fake = AmbaFake(pio_addr=0x10017000, amba_id=0x41031) - + cf0_fake = IsaFake(pio_addr=0x18000000, pio_size=0xfff) # Attach I/O devices that are on chip @@ -175,6 +184,7 @@ class RealViewPBX(RealView): self.mmc_fake.pio = bus.port self.rtc_fake.pio = bus.port self.flash_fake.pio = bus.port + self.cf0_fake.pio = bus.port # Reference for memory map and interrupt number # RealView Emulation Baseboard User Guide (ARM DUI 0143B) @@ -187,7 +197,7 @@ class RealViewEB(RealView): timer1 = Sp804(int_num0=37, int_num1=37, pio_addr=0x10012000) clcd = Pl111(pio_addr=0x10020000, int_num=23) kmi0 = Pl050(pio_addr=0x10006000, int_num=20) - kmi1 = Pl050(pio_addr=0x10007000, int_num=21) + kmi1 = Pl050(pio_addr=0x10007000, int_num=21, is_mouse=True) l2x0_fake = IsaFake(pio_addr=0x1f002000, pio_size=0xfff, warn_access="1") dmac_fake = AmbaFake(pio_addr=0x10030000) diff --git a/src/dev/arm/amba_device.cc b/src/dev/arm/amba_device.cc index e5d53d6a3..37eb77ae1 100644 --- a/src/dev/arm/amba_device.cc +++ b/src/dev/arm/amba_device.cc @@ -47,11 +47,19 @@ #include "mem/packet_access.hh" const uint64_t AmbaVendor = ULL(0xb105f00d00000000); + AmbaDevice::AmbaDevice(const Params *p) : BasicPioDevice(p), ambaId(AmbaVendor | p->amba_id) { } +AmbaIntDevice::AmbaIntDevice(const Params *p) + : AmbaDevice(p), intNum(p->int_num), gic(p->gic), intDelay(p->int_delay) +{ +} + + + AmbaDmaDevice::AmbaDmaDevice(const Params *p) : DmaDevice(p), ambaId(AmbaVendor | p->amba_id), pioAddr(p->pio_addr), pioSize(0), diff --git a/src/dev/arm/amba_device.hh b/src/dev/arm/amba_device.hh index 1782fb003..297a78f82 100644 --- a/src/dev/arm/amba_device.hh +++ b/src/dev/arm/amba_device.hh @@ -55,6 +55,7 @@ #include "mem/packet.hh" #include "mem/packet_access.hh" #include "params/AmbaDevice.hh" +#include "params/AmbaIntDevice.hh" #include "params/AmbaDmaDevice.hh" namespace AmbaDev { @@ -81,6 +82,18 @@ class AmbaDevice : public BasicPioDevice AmbaDevice(const Params *p); }; +class AmbaIntDevice : public AmbaDevice +{ + protected: + int intNum; + Gic *gic; + Tick intDelay; + + public: + typedef AmbaIntDeviceParams Params; + AmbaIntDevice(const Params *p); +}; + class AmbaDmaDevice : public DmaDevice { protected: diff --git a/src/dev/arm/kmi.cc b/src/dev/arm/kmi.cc index 6cd61fd09..adf1439b3 100644 --- a/src/dev/arm/kmi.cc +++ b/src/dev/arm/kmi.cc @@ -37,21 +37,31 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: William Wang + * Authors: Ali Saidi + * William Wang */ #include "base/trace.hh" +#include "base/vnc/vncserver.hh" #include "dev/arm/amba_device.hh" #include "dev/arm/kmi.hh" +#include "dev/ps2.hh" #include "mem/packet.hh" #include "mem/packet_access.hh" Pl050::Pl050(const Params *p) - : AmbaDevice(p), control(0x00), status(0x43), kmidata(0x00), clkdiv(0x00), - intreg(0x00), intNum(p->int_num), gic(p->gic), intDelay(p->int_delay), - intEvent(this) + : AmbaIntDevice(p), control(0), status(0x43), clkdiv(0), interrupts(0), + rawInterrupts(0), ackNext(false), shiftDown(false), vnc(p->vnc), + driverInitialized(false), intEvent(this) { pioSize = 0xfff; + + if (vnc) { + if (!p->is_mouse) + vnc->setKeyboard(this); + else + vnc->setMouse(this); + } } Tick @@ -62,28 +72,39 @@ Pl050::read(PacketPtr pkt) Addr daddr = pkt->getAddr() - pioAddr; pkt->allocate(); - DPRINTF(Pl050, " read register %#x size=%d\n", daddr, pkt->getSize()); - // use a temporary data since the KMI registers are read/written with - // different size operations - // uint32_t data = 0; switch (daddr) { case kmiCr: + DPRINTF(Pl050, "Read Commmand: %#x\n", (uint32_t)control); data = control; break; case kmiStat: + if (rxQueue.empty()) + status.rxfull = 0; + else + status.rxfull = 1; + + DPRINTF(Pl050, "Read Status: %#x\n", (uint32_t)status); data = status; break; case kmiData: - data = kmidata; + if (rxQueue.empty()) { + data = 0; + } else { + data = rxQueue.front(); + rxQueue.pop_front(); + } + DPRINTF(Pl050, "Read Data: %#x\n", (uint32_t)data); + updateIntStatus(); break; case kmiClkDiv: data = clkdiv; break; case kmiISR: - data = intreg; + data = interrupts; + DPRINTF(Pl050, "Read Interrupts: %#x\n", (uint32_t)interrupts); break; default: if (AmbaDev::readId(pkt, ambaId, pioAddr)) { @@ -123,65 +144,224 @@ Pl050::write(PacketPtr pkt) Addr daddr = pkt->getAddr() - pioAddr; - DPRINTF(Pl050, " write register %#x value %#x size=%d\n", daddr, - pkt->get(), pkt->getSize()); + assert(pkt->getSize() == sizeof(uint8_t)); - // use a temporary data since the KMI registers are read/written with - // different size operations - // - uint32_t data = 0; - switch (pkt->getSize()) { - case 1: - data = pkt->get(); + switch (daddr) { + case kmiCr: + DPRINTF(Pl050, "Write Commmand: %#x\n", (uint32_t)pkt->get()); + control = pkt->get(); + updateIntStatus(); break; - case 2: - data = pkt->get(); + case kmiData: + DPRINTF(Pl050, "Write Data: %#x\n", (uint32_t)pkt->get()); + processCommand(pkt->get()); + updateIntStatus(); break; - case 4: - data = pkt->get(); + case kmiClkDiv: + clkdiv = pkt->get(); break; default: - panic("KMI write size too big?\n"); + warn("Tried to write PL050 at offset %#x that doesn't exist\n", daddr); break; } + pkt->makeAtomicResponse(); + return pioDelay; +} +void +Pl050::processCommand(uint8_t byte) +{ + using namespace Ps2; - switch (daddr) { - case kmiCr: - control = data; + if (ackNext) { + ackNext--; + rxQueue.push_back(Ack); + updateIntStatus(); + return; + } + + switch (byte) { + case Ps2Reset: + rxQueue.push_back(Ack); + rxQueue.push_back(SelfTestPass); break; - case kmiStat: - panic("Tried to write PL050 register(read only) at offset %#x\n", - daddr); + case SetResolution: + case SetRate: + case SetStatusLed: + case SetScaling1_1: + case SetScaling1_2: + rxQueue.push_back(Ack); + ackNext = 1; break; - case kmiData: - kmidata = data; + case ReadId: + rxQueue.push_back(Ack); + if (params()->is_mouse) + rxQueue.push_back(MouseId); + else + rxQueue.push_back(KeyboardId); break; - case kmiClkDiv: - clkdiv = data; + case TpReadId: + if (!params()->is_mouse) + break; + // We're not a trackpoint device, this should make the probe go away + rxQueue.push_back(Ack); + rxQueue.push_back(0); + rxQueue.push_back(0); + // fall through + case Disable: + case Enable: + rxQueue.push_back(Ack); break; - case kmiISR: - panic("Tried to write PL050 register(read only) at offset %#x\n", - daddr); + case StatusRequest: + rxQueue.push_back(Ack); + rxQueue.push_back(0); + rxQueue.push_back(2); // default resolution + rxQueue.push_back(100); // default sample rate break; - default: - warn("Tried to write PL050 at offset %#x that doesn't exist\n", daddr); + case TouchKitId: + ackNext = 2; + rxQueue.push_back(Ack); + rxQueue.push_back(TouchKitId); + rxQueue.push_back(1); + rxQueue.push_back('A'); + + driverInitialized = true; break; + default: + panic("Unknown byte received: %d\n", byte); } - pkt->makeAtomicResponse(); - return pioDelay; + + updateIntStatus(); +} + + +void +Pl050::updateIntStatus() +{ + if (!rxQueue.empty()) + rawInterrupts.rx = 1; + else + rawInterrupts.rx = 0; + + interrupts.tx = rawInterrupts.tx & control.txint_enable; + interrupts.rx = rawInterrupts.rx & control.rxint_enable; + + DPRINTF(Pl050, "rawInterupts=%#x control=%#x interrupts=%#x\n", + (uint32_t)rawInterrupts, (uint32_t)control, (uint32_t)interrupts); + + if (interrupts && !intEvent.scheduled()) + schedule(intEvent, curTick() + intDelay); } void Pl050::generateInterrupt() { - if (intreg.rxintr || intreg.txintr) { + + if (interrupts) { gic->sendInt(intNum); - DPRINTF(Pl050, " -- Generated\n"); + DPRINTF(Pl050, "Generated interrupt\n"); } } +void +Pl050::mouseAt(uint16_t x, uint16_t y, uint8_t buttons) +{ + using namespace Ps2; + + // If the driver hasn't initialized the device yet, no need to try and send + // it anything. Similarly we can get vnc mouse events orders of maginture + // faster than m5 can process them. Only queue up two sets mouse movements + // and don't add more until those are processed. + if (!driverInitialized || rxQueue.size() > 10) + return; + + // We shouldn't be here unless a vnc server called us in which case + // we should have a pointer to it + assert(vnc); + + // Convert screen coordinates to touchpad coordinates + uint16_t _x = (2047.0/vnc->videoWidth()) * x; + uint16_t _y = (2047.0/vnc->videoHeight()) * y; + + rxQueue.push_back(buttons); + rxQueue.push_back(_x >> 7); + rxQueue.push_back(_x & 0x7f); + rxQueue.push_back(_y >> 7); + rxQueue.push_back(_y & 0x7f); + + updateIntStatus(); +} + + +void +Pl050::keyPress(uint32_t key, bool down) +{ + using namespace Ps2; + + std::list keys; + + // convert the X11 keysym into ps2 codes + keySymToPs2(key, down, shiftDown, keys); + + // Insert into our queue of charecters + rxQueue.splice(rxQueue.end(), keys); + updateIntStatus(); +} + +void +Pl050::serialize(std::ostream &os) +{ + uint8_t ctrlreg = control; + SERIALIZE_SCALAR(ctrlreg); + + uint8_t stsreg = status; + SERIALIZE_SCALAR(stsreg); + SERIALIZE_SCALAR(clkdiv); + + uint8_t ints = interrupts; + SERIALIZE_SCALAR(ints); + + uint8_t raw_ints = rawInterrupts; + SERIALIZE_SCALAR(raw_ints); + + SERIALIZE_SCALAR(ackNext); + SERIALIZE_SCALAR(shiftDown); + SERIALIZE_SCALAR(driverInitialized); + + arrayParamOut(os, "rxQueue", rxQueue); +} + +void +Pl050::unserialize(Checkpoint *cp, const std::string §ion) +{ + uint8_t ctrlreg; + UNSERIALIZE_SCALAR(ctrlreg); + control = ctrlreg; + + uint8_t stsreg; + UNSERIALIZE_SCALAR(stsreg); + status = stsreg; + + UNSERIALIZE_SCALAR(clkdiv); + + uint8_t ints; + UNSERIALIZE_SCALAR(ints); + interrupts = ints; + + uint8_t raw_ints; + UNSERIALIZE_SCALAR(raw_ints); + rawInterrupts = raw_ints; + + UNSERIALIZE_SCALAR(ackNext); + UNSERIALIZE_SCALAR(shiftDown); + UNSERIALIZE_SCALAR(driverInitialized); + + arrayParamIn(cp, section, "rxQueue", rxQueue); +} + + + Pl050 * Pl050Params::create() { diff --git a/src/dev/arm/kmi.hh b/src/dev/arm/kmi.hh index c96dd55a9..1e25f8974 100644 --- a/src/dev/arm/kmi.hh +++ b/src/dev/arm/kmi.hh @@ -48,13 +48,16 @@ #ifndef __DEV_ARM_PL050_HH__ #define __DEV_ARM_PL050_HH__ +#include + #include "base/range.hh" -#include "dev/io_device.hh" +#include "base/vnc/vncserver.hh" +#include "dev/arm/amba_device.hh" #include "params/Pl050.hh" class Gic; -class Pl050 : public AmbaDevice +class Pl050 : public AmbaIntDevice, public VncKeyboard, public VncMouse { protected: static const int kmiCr = 0x000; @@ -63,34 +66,68 @@ class Pl050 : public AmbaDevice static const int kmiClkDiv = 0x00C; static const int kmiISR = 0x010; - // control register - uint8_t control; + BitUnion8(ControlReg) + Bitfield<0> force_clock_low; + Bitfield<1> force_data_low; + Bitfield<2> enable; + Bitfield<3> txint_enable; + Bitfield<4> rxint_enable; + Bitfield<5> type; + EndBitUnion(ControlReg) - // status register - uint8_t status; + /** control register + */ + ControlReg control; + + /** KMI status register */ + BitUnion8(StatusReg) + Bitfield<0> data_in; + Bitfield<1> clk_in; + Bitfield<2> rxparity; + Bitfield<3> rxbusy; + Bitfield<4> rxfull; + Bitfield<5> txbusy; + Bitfield<6> txempty; + EndBitUnion(StatusReg) + + StatusReg status; + + /** clock divisor register + * This register is just kept around to satisfy reads after driver does + * writes. The divsor does nothing, as we're not actually signaling ps2 + * serial commands to anything. + */ + uint8_t clkdiv; - // received data (read) or data to be transmitted (write) - uint8_t kmidata; + BitUnion8(InterruptReg) + Bitfield<0> rx; + Bitfield<1> tx; + EndBitUnion(InterruptReg) - // clock divisor register - uint8_t clkdiv; + /** interrupt status register. */ + InterruptReg interrupts; + + /** raw interrupt register (unmasked) */ + InterruptReg rawInterrupts; - BitUnion8(IntReg) - Bitfield<0> txintr; - Bitfield<1> rxintr; - EndBitUnion(IntReg) + /** If the controller should ignore the next data byte and acknowledge it. + * The driver is attempting to setup some feature we don't care about + */ + int ackNext; - /** interrupt mask register. */ - IntReg intreg; + /** is the shift key currently down */ + bool shiftDown; - /** Interrupt number to generate */ - int intNum; + /** The vnc server we're connected to (if any) */ + VncServer *vnc; - /** Gic to use for interrupting */ - Gic *gic; + /** If the linux driver has initialized the device yet and thus can we send + * mouse data */ + bool driverInitialized; - /** Delay before interrupting */ - Tick intDelay; + /** Update the status of the interrupt registers and schedule an interrupt + * if required */ + void updateIntStatus(); /** Function to generate interrupt */ void generateInterrupt(); @@ -98,6 +135,15 @@ class Pl050 : public AmbaDevice /** Wrapper to create an event out of the thing */ EventWrapper intEvent; + /** Receive queue. This list contains all the pending commands that + * need to be sent to the driver + */ + std::list rxQueue; + + /** Handle a command sent to the kmi and respond appropriately + */ + void processCommand(uint8_t byte); + public: typedef Pl050Params Params; const Params * @@ -111,12 +157,11 @@ class Pl050 : public AmbaDevice virtual Tick read(PacketPtr pkt); virtual Tick write(PacketPtr pkt); - /** - * Return if we have an interrupt pending - * @return interrupt status - * @todo fix me when implementation improves - */ - virtual bool intStatus() { return false; } + virtual void mouseAt(uint16_t x, uint16_t y, uint8_t buttons); + virtual void keyPress(uint32_t key, bool down); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); }; -#endif +#endif // __DEV_ARM_PL050_HH__ diff --git a/src/dev/arm/pl111.cc b/src/dev/arm/pl111.cc index e597bf272..e884d9b58 100644 --- a/src/dev/arm/pl111.cc +++ b/src/dev/arm/pl111.cc @@ -35,9 +35,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: William Wang + * Ali Saidi */ +#include "base/bitmap.hh" +#include "base/output.hh" #include "base/trace.hh" +#include "base/vnc/vncserver.hh" #include "dev/arm/amba_device.hh" #include "dev/arm/gic.hh" #include "dev/arm/pl111.hh" @@ -50,20 +54,27 @@ using namespace AmbaDev; Pl111::Pl111(const Params *p) : AmbaDmaDevice(p), lcdTiming0(0), lcdTiming1(0), lcdTiming2(0), lcdTiming3(0), lcdUpbase(0), lcdLpbase(0), lcdControl(0), lcdImsc(0), - lcdRis(0), lcdMis(0), lcdIcr(0), lcdUpcurr(0), lcdLpcurr(0), + lcdRis(0), lcdMis(0), clcdCrsrCtrl(0), clcdCrsrConfig(0), clcdCrsrPalette0(0), clcdCrsrPalette1(0), clcdCrsrXY(0), clcdCrsrClip(0), clcdCrsrImsc(0), clcdCrsrIcr(0), clcdCrsrRis(0), clcdCrsrMis(0), clock(p->clock), - height(0), width(0), startTime(0), startAddr(0), maxAddr(0), curAddr(0), + vncserver(p->vnc), bmp(NULL), width(LcdMaxWidth), height(LcdMaxHeight), + bytesPerPixel(4), startTime(0), startAddr(0), maxAddr(0), curAddr(0), waterMark(0), dmaPendingNum(0), readEvent(this), fillFifoEvent(this), dmaDoneEvent(maxOutstandingDma, this), intEvent(this) { pioSize = 0xFFFF; + pic = simout.create("framebuffer.bmp", true); + + dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)]; + memset(lcdPalette, 0, sizeof(lcdPalette)); memset(cursorImage, 0, sizeof(cursorImage)); memset(dmaBuffer, 0, sizeof(dmaBuffer)); - memset(frameBuffer, 0, sizeof(frameBuffer)); + + if (vncserver) + vncserver->setFramebufferAddr(dmaBuffer); } // read registers and frame buffer @@ -75,111 +86,105 @@ Pl111::read(PacketPtr pkt) uint32_t data = 0; - if ((pkt->getAddr()& 0xffff0000) == pioAddr) { + assert(pkt->getAddr() >= pioAddr && + pkt->getAddr() < pioAddr + pioSize); - assert(pkt->getAddr() >= pioAddr && - pkt->getAddr() < pioAddr + pioSize); + Addr daddr = pkt->getAddr() - pioAddr; + pkt->allocate(); - Addr daddr = pkt->getAddr()&0xFFFF; - pkt->allocate(); + DPRINTF(PL111, " read register %#x size=%d\n", daddr, pkt->getSize()); - DPRINTF(PL111, " read register %#x size=%d\n", daddr, pkt->getSize()); - - switch (daddr) { - case LcdTiming0: - data = lcdTiming0; - break; - case LcdTiming1: - data = lcdTiming1; - break; - case LcdTiming2: - data = lcdTiming2; - break; - case LcdTiming3: - data = lcdTiming3; - break; - case LcdUpBase: - data = lcdUpbase; - break; - case LcdLpBase: - data = lcdLpbase; - break; - case LcdControl: - data = lcdControl; - break; - case LcdImsc: - warn("LCD interrupt set/clear function not supported\n"); - data = lcdImsc; - break; - case LcdRis: - warn("LCD Raw interrupt status function not supported\n"); - data = lcdRis; - break; - case LcdMis: - warn("LCD Masked interrupt status function not supported\n"); - data = lcdMis; - break; - case LcdIcr: - panic("LCD register at offset %#x is Write-Only\n", daddr); - break; - case LcdUpCurr: - data = lcdUpcurr; - break; - case LcdLpCurr: - data = lcdLpcurr; - break; - case ClcdCrsrCtrl: - data = clcdCrsrCtrl; - break; - case ClcdCrsrConfig: - data = clcdCrsrConfig; - break; - case ClcdCrsrPalette0: - data = clcdCrsrPalette0; - break; - case ClcdCrsrPalette1: - data = clcdCrsrPalette1; - break; - case ClcdCrsrXY: - data = clcdCrsrXY; - break; - case ClcdCrsrClip: - data = clcdCrsrClip; - break; - case ClcdCrsrImsc: - data = clcdCrsrImsc; - break; - case ClcdCrsrIcr: - panic("CLCD register at offset %#x is Write-Only\n", daddr); - break; - case ClcdCrsrRis: - data = clcdCrsrRis; - break; - case ClcdCrsrMis: - data = clcdCrsrMis; - break; - default: - if (AmbaDev::readId(pkt, AMBA_ID, pioAddr)) { - // Hack for variable size accesses - data = pkt->get(); - break; - } else if (daddr >= CrsrImage && daddr <= 0xBFC) { - // CURSOR IMAGE - int index; - index = (daddr - CrsrImage) >> 2; - data= cursorImage[index]; - break; - } else if (daddr >= LcdPalette && daddr <= 0x3FC) { - // LCD Palette - int index; - index = (daddr - LcdPalette) >> 2; - data = lcdPalette[index]; - break; - } else { - panic("Tried to read CLCD register at offset %#x that \ + switch (daddr) { + case LcdTiming0: + data = lcdTiming0; + break; + case LcdTiming1: + data = lcdTiming1; + break; + case LcdTiming2: + data = lcdTiming2; + break; + case LcdTiming3: + data = lcdTiming3; + break; + case LcdUpBase: + data = lcdUpbase; + break; + case LcdLpBase: + data = lcdLpbase; + break; + case LcdControl: + data = lcdControl; + break; + case LcdImsc: + data = lcdImsc; + break; + case LcdRis: + data = lcdRis; + break; + case LcdMis: + data = lcdMis; + break; + case LcdIcr: + panic("LCD register at offset %#x is Write-Only\n", daddr); + break; + case LcdUpCurr: + data = curAddr; + break; + case LcdLpCurr: + data = curAddr; + break; + case ClcdCrsrCtrl: + data = clcdCrsrCtrl; + break; + case ClcdCrsrConfig: + data = clcdCrsrConfig; + break; + case ClcdCrsrPalette0: + data = clcdCrsrPalette0; + break; + case ClcdCrsrPalette1: + data = clcdCrsrPalette1; + break; + case ClcdCrsrXY: + data = clcdCrsrXY; + break; + case ClcdCrsrClip: + data = clcdCrsrClip; + break; + case ClcdCrsrImsc: + data = clcdCrsrImsc; + break; + case ClcdCrsrIcr: + panic("CLCD register at offset %#x is Write-Only\n", daddr); + break; + case ClcdCrsrRis: + data = clcdCrsrRis; + break; + case ClcdCrsrMis: + data = clcdCrsrMis; + break; + default: + if (AmbaDev::readId(pkt, AMBA_ID, pioAddr)) { + // Hack for variable size accesses + data = pkt->get(); + break; + } else if (daddr >= CrsrImage && daddr <= 0xBFC) { + // CURSOR IMAGE + int index; + index = (daddr - CrsrImage) >> 2; + data= cursorImage[index]; + break; + } else if (daddr >= LcdPalette && daddr <= 0x3FC) { + // LCD Palette + int index; + index = (daddr - LcdPalette) >> 2; + data = lcdPalette[index]; + break; + } else { + panic("Tried to read CLCD register at offset %#x that \ doesn't exist\n", daddr); - break; - } + break; } } @@ -226,119 +231,133 @@ Pl111::write(PacketPtr pkt) break; } - if ((pkt->getAddr()& 0xffff0000) == pioAddr) { + assert(pkt->getAddr() >= pioAddr && + pkt->getAddr() < pioAddr + pioSize); - assert(pkt->getAddr() >= pioAddr && - pkt->getAddr() < pioAddr + pioSize); + Addr daddr = pkt->getAddr() - pioAddr; - Addr daddr = pkt->getAddr() - pioAddr; + DPRINTF(PL111, " write register %#x value %#x size=%d\n", daddr, + pkt->get(), pkt->getSize()); - DPRINTF(PL111, " write register %#x value %#x size=%d\n", daddr, - pkt->get(), pkt->getSize()); + switch (daddr) { + case LcdTiming0: + lcdTiming0 = data; + // width = 16 * (PPL+1) + width = (lcdTiming0.ppl + 1) << 4; + break; + case LcdTiming1: + lcdTiming1 = data; + // height = LPP + 1 + height = (lcdTiming1.lpp) + 1; + break; + case LcdTiming2: + lcdTiming2 = data; + break; + case LcdTiming3: + lcdTiming3 = data; + break; + case LcdUpBase: + lcdUpbase = data; + DPRINTF(PL111, "####### Upper panel base set to: %#x #######\n", lcdUpbase); + break; + case LcdLpBase: + warn("LCD dual screen mode not supported\n"); + lcdLpbase = data; + DPRINTF(PL111, "###### Lower panel base set to: %#x #######\n", lcdLpbase); + break; + case LcdControl: + int old_lcdpwr; + old_lcdpwr = lcdControl.lcdpwr; + lcdControl = data; + + DPRINTF(PL111, "LCD power is:%d\n", lcdControl.lcdpwr); + + // LCD power enable + if (lcdControl.lcdpwr && !old_lcdpwr) { + updateVideoParams(); + DPRINTF(PL111, " lcd size: height %d width %d\n", height, width); + waterMark = lcdControl.watermark ? 8 : 4; + startDma(); + } + break; + case LcdImsc: + lcdImsc = data; + if (lcdImsc.vcomp) + panic("Interrupting on vcomp not supported\n"); - switch (daddr) { - case LcdTiming0: - lcdTiming0 = data; - // width = 16 * (PPL+1) - width = (lcdTiming0.ppl + 1) << 4; - break; - case LcdTiming1: - lcdTiming1 = data; - // height = LPP + 1 - height = (lcdTiming1.lpp) + 1; - break; - case LcdTiming2: - lcdTiming2 = data; - break; - case LcdTiming3: - lcdTiming3 = data; - break; - case LcdUpBase: - lcdUpbase = data; - break; - case LcdLpBase: - warn("LCD dual screen mode not supported\n"); - lcdLpbase = data; - break; - case LcdControl: - int old_lcdpwr; - old_lcdpwr = lcdControl.lcdpwr; - lcdControl = data; - // LCD power enable - if (lcdControl.lcdpwr&&!old_lcdpwr) { - DPRINTF(PL111, " lcd size: height %d width %d\n", height, width); - waterMark = lcdControl.watermark ? 8 : 4; - readFramebuffer(); - } - break; - case LcdImsc: - warn("LCD interrupt mask set/clear not supported\n"); - lcdImsc = data; - break; - case LcdRis: - warn("LCD register at offset %#x is Read-Only\n", daddr); - break; - case LcdMis: - warn("LCD register at offset %#x is Read-Only\n", daddr); - break; - case LcdIcr: - warn("LCD interrupt clear not supported\n"); - lcdIcr = data; - break; - case LcdUpCurr: - warn("LCD register at offset %#x is Read-Only\n", daddr); - break; - case LcdLpCurr: - warn("LCD register at offset %#x is Read-Only\n", daddr); - break; - case ClcdCrsrCtrl: - clcdCrsrCtrl = data; - break; - case ClcdCrsrConfig: - clcdCrsrConfig = data; - break; - case ClcdCrsrPalette0: - clcdCrsrPalette0 = data; - break; - case ClcdCrsrPalette1: - clcdCrsrPalette1 = data; - break; - case ClcdCrsrXY: - clcdCrsrXY = data; - break; - case ClcdCrsrClip: - clcdCrsrClip = data; - break; - case ClcdCrsrImsc: - clcdCrsrImsc = data; - break; - case ClcdCrsrIcr: - clcdCrsrIcr = data; - break; - case ClcdCrsrRis: - warn("CLCD register at offset %#x is Read-Only\n", daddr); - break; - case ClcdCrsrMis: - warn("CLCD register at offset %#x is Read-Only\n", daddr); - break; - default: - if (daddr >= CrsrImage && daddr <= 0xBFC) { - // CURSOR IMAGE - int index; - index = (daddr - CrsrImage) >> 2; - cursorImage[index] = data; - break; - } else if (daddr >= LcdPalette && daddr <= 0x3FC) { - // LCD Palette - int index; - index = (daddr - LcdPalette) >> 2; - lcdPalette[index] = data; - break; - } else { - panic("Tried to write PL111 register at offset %#x that \ + lcdMis = lcdImsc & lcdRis; + + if (!lcdMis) + gic->clearInt(intNum); + + break; + case LcdRis: + panic("LCD register at offset %#x is Read-Only\n", daddr); + break; + case LcdMis: + panic("LCD register at offset %#x is Read-Only\n", daddr); + break; + case LcdIcr: + lcdRis = lcdRis & ~data; + lcdMis = lcdImsc & lcdRis; + + if (!lcdMis) + gic->clearInt(intNum); + + break; + case LcdUpCurr: + panic("LCD register at offset %#x is Read-Only\n", daddr); + break; + case LcdLpCurr: + panic("LCD register at offset %#x is Read-Only\n", daddr); + break; + case ClcdCrsrCtrl: + clcdCrsrCtrl = data; + break; + case ClcdCrsrConfig: + clcdCrsrConfig = data; + break; + case ClcdCrsrPalette0: + clcdCrsrPalette0 = data; + break; + case ClcdCrsrPalette1: + clcdCrsrPalette1 = data; + break; + case ClcdCrsrXY: + clcdCrsrXY = data; + break; + case ClcdCrsrClip: + clcdCrsrClip = data; + break; + case ClcdCrsrImsc: + clcdCrsrImsc = data; + break; + case ClcdCrsrIcr: + clcdCrsrIcr = data; + break; + case ClcdCrsrRis: + panic("CLCD register at offset %#x is Read-Only\n", daddr); + break; + case ClcdCrsrMis: + panic("CLCD register at offset %#x is Read-Only\n", daddr); + break; + default: + if (daddr >= CrsrImage && daddr <= 0xBFC) { + // CURSOR IMAGE + int index; + index = (daddr - CrsrImage) >> 2; + cursorImage[index] = data; + break; + } else if (daddr >= LcdPalette && daddr <= 0x3FC) { + // LCD Palette + int index; + index = (daddr - LcdPalette) >> 2; + lcdPalette[index] = data; + break; + } else { + panic("Tried to write PL111 register at offset %#x that \ doesn't exist\n", daddr); - break; - } + break; } } @@ -346,18 +365,76 @@ Pl111::write(PacketPtr pkt) return pioDelay; } +void +Pl111::updateVideoParams() +{ + if (lcdControl.lcdbpp == bpp24) { + bytesPerPixel = 4; + } else if (lcdControl.lcdbpp == bpp16m565) { + bytesPerPixel = 2; + } + + if (vncserver) { + if (lcdControl.lcdbpp == bpp24 && lcdControl.bgr) + vncserver->setFrameBufferParams(VideoConvert::bgr8888, width, + height); + else if (lcdControl.lcdbpp == bpp24 && !lcdControl.bgr) + vncserver->setFrameBufferParams(VideoConvert::rgb8888, width, + height); + else if (lcdControl.lcdbpp == bpp16m565 && lcdControl.bgr) + vncserver->setFrameBufferParams(VideoConvert::bgr565, width, + height); + else if (lcdControl.lcdbpp == bpp16m565 && !lcdControl.bgr) + vncserver->setFrameBufferParams(VideoConvert::rgb565, width, + height); + else + panic("Unimplemented video mode\n"); + } + + if (bmp) + delete bmp; + + if (lcdControl.lcdbpp == bpp24 && lcdControl.bgr) + bmp = new Bitmap(VideoConvert::bgr8888, width, height, dmaBuffer); + else if (lcdControl.lcdbpp == bpp24 && !lcdControl.bgr) + bmp = new Bitmap(VideoConvert::rgb8888, width, height, dmaBuffer); + else if (lcdControl.lcdbpp == bpp16m565 && lcdControl.bgr) + bmp = new Bitmap(VideoConvert::bgr565, width, height, dmaBuffer); + else if (lcdControl.lcdbpp == bpp16m565 && !lcdControl.bgr) + bmp = new Bitmap(VideoConvert::rgb565, width, height, dmaBuffer); + else + panic("Unimplemented video mode\n"); +} + +void +Pl111::startDma() +{ + if (dmaPendingNum != 0 || readEvent.scheduled()) + return; + readFramebuffer(); +} + void Pl111::readFramebuffer() { // initialization for dma read from frame buffer to dma buffer - uint32_t length = height*width; - if (startAddr != lcdUpbase) { + uint32_t length = height * width; + if (startAddr != lcdUpbase) startAddr = lcdUpbase; - } + + // Updating base address, interrupt if we're supposed to + lcdRis.baseaddr = 1; + if (!intEvent.scheduled()) + schedule(intEvent, nextCycle()); + curAddr = 0; startTime = curTick(); - maxAddr = static_cast(length*sizeof(uint32_t)); - dmaPendingNum =0 ; + + maxAddr = static_cast(length * bytesPerPixel); + + DPRINTF(PL111, " lcd frame buffer size of %d bytes \n", maxAddr); + + dmaPendingNum = 0; fillFifo(); } @@ -369,11 +446,16 @@ Pl111::fillFifo() // concurrent dma reads need different dma done events // due to assertion in scheduling state ++dmaPendingNum; - DPRINTF(PL111, " ++ DMA pending number %d read addr %#x\n", - dmaPendingNum, curAddr); + assert(!dmaDoneEvent[dmaPendingNum-1].scheduled()); - dmaRead(curAddr + startAddr, dmaSize, &dmaDoneEvent[dmaPendingNum-1], - curAddr + dmaBuffer); + + // We use a uncachable request here because the requests from the CPU + // will be uncacheable as well. If we have uncacheable and cacheable + // requests in the memory system for the same address it won't be + // pleased + dmaPort->dmaAction(MemCmd::ReadReq, curAddr + startAddr, dmaSize, + &dmaDoneEvent[dmaPendingNum-1], curAddr + dmaBuffer, 0, + Request::UNCACHEABLE); curAddr += dmaSize; } } @@ -381,27 +463,34 @@ Pl111::fillFifo() void Pl111::dmaDone() { - Tick maxFrameTime = lcdTiming2.cpl*height*clock; + Tick maxFrameTime = lcdTiming2.cpl * height * clock; --dmaPendingNum; - DPRINTF(PL111, " -- DMA pending number %d\n", dmaPendingNum); - if (maxAddr == curAddr && !dmaPendingNum) { - if ((curTick() - startTime) > maxFrameTime) + if ((curTick() - startTime) > maxFrameTime) { warn("CLCD controller buffer underrun, took %d cycles when should" " have taken %d\n", curTick() - startTime, maxFrameTime); + lcdRis.underflow = 1; + if (!intEvent.scheduled()) + schedule(intEvent, nextCycle()); + } - // double buffering so the vnc server doesn't see a tear in the screen - memcpy(frameBuffer, dmaBuffer, maxAddr); assert(!readEvent.scheduled()); + if (vncserver) + vncserver->setDirty(); DPRINTF(PL111, "-- write out frame buffer into bmp\n"); - writeBMP(frameBuffer); + + assert(bmp); + pic->seekp(0); + bmp->write(pic); DPRINTF(PL111, "-- schedule next dma read event at %d tick \n", maxFrameTime + curTick()); - schedule(readEvent, nextCycle(startTime + maxFrameTime)); + + if (lcdControl.lcden) + schedule(readEvent, nextCycle(startTime + maxFrameTime)); } if (dmaPendingNum > (maxOutstandingDma - waterMark)) @@ -409,9 +498,9 @@ Pl111::dmaDone() if (!fillFifoEvent.scheduled()) schedule(fillFifoEvent, nextCycle()); - } + Tick Pl111::nextCycle() { @@ -431,33 +520,6 @@ Pl111::nextCycle(Tick beginTick) return nextTick; } -// write out the frame buffer into a bitmap file -void -Pl111::writeBMP(uint32_t* frameBuffer) -{ - fstream pic; - - // write out bmp head - std::string filename = "./m5out/frameBuffer.bmp"; - pic.open(filename.c_str(), ios::out|ios::binary); - Bitmap bm(pic, height, width); - - DPRINTF(PL111, "-- write out data into bmp\n"); - - // write out frame buffer data - for (int i = height -1; i >= 0; --i) { - for (int j = 0; j< width; ++j) { - uint32_t pixel = frameBuffer[i*width + j]; - pic.write(reinterpret_cast(&pixel), - sizeof(uint32_t)); - DPRINTF(PL111, " write pixel data %#x at addr %#x\n", - pixel, i*width + j); - } - } - - pic.close(); -} - void Pl111::serialize(std::ostream &os) { @@ -490,9 +552,6 @@ Pl111::serialize(std::ostream &os) uint8_t lcdMis_serial = lcdMis; SERIALIZE_SCALAR(lcdMis_serial); - uint8_t lcdIcr_serial = lcdIcr; - SERIALIZE_SCALAR(lcdIcr_serial); - SERIALIZE_ARRAY(lcdPalette, LcdPaletteSize); SERIALIZE_ARRAY(cursorImage, CrsrImageSize); @@ -518,9 +577,9 @@ Pl111::serialize(std::ostream &os) SERIALIZE_SCALAR(clock); SERIALIZE_SCALAR(height); SERIALIZE_SCALAR(width); + SERIALIZE_SCALAR(bytesPerPixel); - SERIALIZE_ARRAY(dmaBuffer, height*width); - SERIALIZE_ARRAY(frameBuffer, height*width); + SERIALIZE_ARRAY(dmaBuffer, height * width); SERIALIZE_SCALAR(startTime); SERIALIZE_SCALAR(startAddr); SERIALIZE_SCALAR(maxAddr); @@ -569,10 +628,6 @@ Pl111::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(lcdMis_serial); lcdMis = lcdMis_serial; - uint8_t lcdIcr_serial; - UNSERIALIZE_SCALAR(lcdIcr_serial); - lcdIcr = lcdIcr_serial; - UNSERIALIZE_ARRAY(lcdPalette, LcdPaletteSize); UNSERIALIZE_ARRAY(cursorImage, CrsrImageSize); @@ -602,25 +657,29 @@ Pl111::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(clock); UNSERIALIZE_SCALAR(height); UNSERIALIZE_SCALAR(width); + UNSERIALIZE_SCALAR(bytesPerPixel); - UNSERIALIZE_ARRAY(dmaBuffer, height*width); - UNSERIALIZE_ARRAY(frameBuffer, height*width); + UNSERIALIZE_ARRAY(dmaBuffer, height * width); UNSERIALIZE_SCALAR(startTime); UNSERIALIZE_SCALAR(startAddr); UNSERIALIZE_SCALAR(maxAddr); UNSERIALIZE_SCALAR(curAddr); UNSERIALIZE_SCALAR(waterMark); UNSERIALIZE_SCALAR(dmaPendingNum); + + updateVideoParams(); + if (vncserver) + vncserver->setDirty(); } void Pl111::generateInterrupt() { DPRINTF(PL111, "Generate Interrupt: lcdImsc=0x%x lcdRis=0x%x lcdMis=0x%x\n", - lcdImsc, lcdRis, lcdMis); + (uint32_t)lcdImsc, (uint32_t)lcdRis, (uint32_t)lcdMis); lcdMis = lcdImsc & lcdRis; - if (lcdMis.ffufie || lcdMis.nbupie || lcdMis.vtcpie || lcdMis.ahmeie) { + if (lcdMis.underflow || lcdMis.baseaddr || lcdMis.vcomp || lcdMis.ahbmaster) { gic->sendInt(intNum); DPRINTF(PL111, " -- Generated\n"); } @@ -639,15 +698,4 @@ Pl111Params::create() return new Pl111(this); } -// bitmap class ctor -Bitmap::Bitmap(std::fstream& bmp, uint16_t h, uint16_t w) -{ - Magic magic = {{'B','M'}}; - Header header = {sizeof(Color)*w*h , 0, 0, 54}; - Info info = {sizeof(Info), w, h, 1, sizeof(Color)*8, 0, - ( sizeof(Color) *(w*h) ), 1, 1, 0, 0}; - - bmp.write(reinterpret_cast(&magic), sizeof(magic)); - bmp.write(reinterpret_cast(&header), sizeof(header)); - bmp.write(reinterpret_cast(&info), sizeof(info)); -} + diff --git a/src/dev/arm/pl111.hh b/src/dev/arm/pl111.hh index 4e75af4e8..f36dc6810 100644 --- a/src/dev/arm/pl111.hh +++ b/src/dev/arm/pl111.hh @@ -35,6 +35,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: William Wang + * Ali Saidi */ @@ -55,6 +56,8 @@ using namespace std; class Gic; +class VncServer; +class Bitmap; class Pl111: public AmbaDmaDevice { @@ -96,58 +99,69 @@ class Pl111: public AmbaDmaDevice static const int dmaSize = 8; // 64 bits static const int maxOutstandingDma = 16; // 16 deep FIFO of 64 bits + enum LcdMode { + bpp1 = 0, + bpp2, + bpp4, + bpp8, + bpp16, + bpp24, + bpp16m565, + bpp12 + }; + BitUnion8(InterruptReg) - Bitfield<1> ffufie; - Bitfield<2> nbupie; - Bitfield<3> vtcpie; - Bitfield<4> ahmeie; + Bitfield<1> underflow; + Bitfield<2> baseaddr; + Bitfield<3> vcomp; + Bitfield<4> ahbmaster; EndBitUnion(InterruptReg) BitUnion32(TimingReg0) - Bitfield<7,2> ppl; - Bitfield<15,8> hsw; - Bitfield<23,16> hfp; - Bitfield<31,24> hbp; + Bitfield<7,2> ppl; + Bitfield<15,8> hsw; + Bitfield<23,16> hfp; + Bitfield<31,24> hbp; EndBitUnion(TimingReg0) BitUnion32(TimingReg1) - Bitfield<9,0> lpp; - Bitfield<15,10> vsw; - Bitfield<23,16> vfp; - Bitfield<31,24> vbp; + Bitfield<9,0> lpp; + Bitfield<15,10> vsw; + Bitfield<23,16> vfp; + Bitfield<31,24> vbp; EndBitUnion(TimingReg1) BitUnion32(TimingReg2) - Bitfield<4,0> pcdlo; - Bitfield<5> clksel; - Bitfield<10,6> acb; - Bitfield<11> avs; - Bitfield<12> ihs; - Bitfield<13> ipc; - Bitfield<14> ioe; - Bitfield<25,16> cpl; - Bitfield<26> bcd; - Bitfield<31,27> pcdhi; + Bitfield<4,0> pcdlo; + Bitfield<5> clksel; + Bitfield<10,6> acb; + Bitfield<11> avs; + Bitfield<12> ihs; + Bitfield<13> ipc; + Bitfield<14> ioe; + Bitfield<25,16> cpl; + Bitfield<26> bcd; + Bitfield<31,27> pcdhi; EndBitUnion(TimingReg2) BitUnion32(TimingReg3) - Bitfield<6,0> led; - Bitfield<16> lee; + Bitfield<6,0> led; + Bitfield<16> lee; EndBitUnion(TimingReg3) BitUnion32(ControlReg) - Bitfield<0> lcden; - Bitfield<3,1> lcdbpp; - Bitfield<4> lcdbw; - Bitfield<5> lcdtft; - Bitfield<6> lcdmono8; - Bitfield<7> lcddual; - Bitfield<8> bgr; - Bitfield<9> bebo; - Bitfield<10> bepo; - Bitfield<11> lcdpwr; - Bitfield<13,12> lcdvcomp; - Bitfield<16> watermark; + Bitfield<0> lcden; + Bitfield<3,1> lcdbpp; + Bitfield<4> lcdbw; + Bitfield<5> lcdtft; + Bitfield<6> lcdmono8; + Bitfield<7> lcddual; + Bitfield<8> bgr; + Bitfield<9> bebo; + Bitfield<10> bepo; + Bitfield<11> lcdpwr; + Bitfield<13,12> lcdvcomp; + Bitfield<16> watermark; EndBitUnion(ControlReg) /** Horizontal axis panel control register */ @@ -180,15 +194,6 @@ class Pl111: public AmbaDmaDevice /** Masked interrupt status register */ InterruptReg lcdMis; - /** Interrupt clear register */ - InterruptReg lcdIcr; - - /** Upper panel current address value register - ro */ - int lcdUpcurr; - - /** Lower panel current address value register - ro */ - int lcdLpcurr; - /** 256x16-bit color palette registers * 256 palette entries organized as 128 locations of two entries per word */ int lcdPalette[LcdPaletteSize]; @@ -228,17 +233,26 @@ class Pl111: public AmbaDmaDevice /** Clock speed */ Tick clock; - /** Frame buffer height - lines per panel */ - uint16_t height; + /** VNC server */ + VncServer *vncserver; + + /** Helper to write out bitmaps */ + Bitmap *bmp; + + /** Picture of what the current frame buffer looks like */ + std::ostream *pic; /** Frame buffer width - pixels per line */ uint16_t width; - /** CLCDC supports up to 1024x768 */ - uint8_t dmaBuffer[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)]; + /** Frame buffer height - lines per panel */ + uint16_t height; - /** Double buffering */ - uint32_t frameBuffer[LcdMaxWidth * LcdMaxHeight]; + /** Bytes per pixel */ + uint8_t bytesPerPixel; + + /** CLCDC supports up to 1024x768 */ + uint8_t *dmaBuffer; /** Start time for frame buffer dma read */ Tick startTime; @@ -258,12 +272,12 @@ class Pl111: public AmbaDmaDevice /** Number of pending dma reads */ int dmaPendingNum; + /** Send updated parameters to the vnc server */ + void updateVideoParams(); + /** DMA framebuffer read */ void readFramebuffer(); - /** Write framebuffer to a bmp file */ - void writeBMP(uint32_t*); - /** Generate dma framebuffer read event */ void generateReadEvent(); @@ -273,6 +287,9 @@ class Pl111: public AmbaDmaDevice /** fillFIFO event */ void fillFifo(); + /** start the dmas off after power is enabled */ + void startDma(); + /** DMA done event */ void dmaDone(); @@ -289,7 +306,7 @@ class Pl111: public AmbaDmaDevice /** DMA done event */ vector > dmaDoneEvent; - /** Wrapper to create an event out of the thing */ + /** Wrapper to create an event out of the interrupt */ EventWrapper intEvent; public: @@ -312,57 +329,6 @@ class Pl111: public AmbaDmaDevice * @param range_list range list to populate with ranges */ void addressRanges(AddrRangeList &range_list); - - /** - * Return if we have an interrupt pending - * @return interrupt status - * @todo fix me when implementation improves - */ - virtual bool intStatus() { return false; } -}; - -// write frame buffer into a bitmap picture -class Bitmap -{ - public: - Bitmap(std::fstream& bmp, uint16_t h, uint16_t w); - - private: - struct Magic - { - unsigned char magic_number[2]; - } magic; - - struct Header - { - uint32_t size; - uint16_t reserved1; - uint16_t reserved2; - uint32_t offset; - } header; - - struct Info - { - uint32_t Size; - uint32_t Width; - uint32_t Height; - uint16_t Planes; - uint16_t BitCount; - uint32_t Compression; - uint32_t SizeImage; - uint32_t XPelsPerMeter; - uint32_t YPelsPerMeter; - uint32_t ClrUsed; - uint32_t ClrImportant; - } info; - - struct Color - { - unsigned char b; - unsigned char g; - unsigned char r; - unsigned char a; - } color; }; #endif diff --git a/src/dev/arm/rv_ctrl.cc b/src/dev/arm/rv_ctrl.cc index c0ba4c7aa..b1bbc065b 100644 --- a/src/dev/arm/rv_ctrl.cc +++ b/src/dev/arm/rv_ctrl.cc @@ -68,6 +68,27 @@ RealViewCtrl::read(PacketPtr pkt) case Flash: pkt->set(0); break; + case Clcd: + pkt->set(0x00001F00); + break; + case Osc0: + pkt->set(0x00012C5C); + break; + case Osc1: + pkt->set(0x00002CC0); + break; + case Osc2: + pkt->set(0x00002C75); + break; + case Osc3: + pkt->set(0x00020211); + break; + case Osc4: + pkt->set(0x00002C75); + break; + case Lock: + pkt->set(sysLock); + break; default: panic("Tried to read RealView I/O at offset %#x that doesn't exist\n", daddr); break; @@ -85,6 +106,15 @@ RealViewCtrl::write(PacketPtr pkt) Addr daddr = pkt->getAddr() - pioAddr; switch (daddr) { case Flash: + case Clcd: + case Osc0: + case Osc1: + case Osc2: + case Osc3: + case Osc4: + break; + case Lock: + sysLock.lockVal = pkt->get(); break; default: panic("Tried to write RVIO at offset %#x that doesn't exist\n", daddr); diff --git a/src/dev/arm/rv_ctrl.hh b/src/dev/arm/rv_ctrl.hh index 00a19d715..ceed5ef2f 100644 --- a/src/dev/arm/rv_ctrl.hh +++ b/src/dev/arm/rv_ctrl.hh @@ -40,6 +40,7 @@ #ifndef __DEV_ARM_RV_HH__ #define __DEV_ARM_RV_HH__ +#include "base/bitunion.hh" #include "base/range.hh" #include "dev/io_device.hh" #include "params/RealViewCtrl.hh" @@ -86,6 +87,14 @@ class RealViewCtrl : public BasicPioDevice TestOsc4 = 0xD0 }; + // system lock value + BitUnion32(SysLockReg) + Bitfield<15,0> lockVal; + Bitfield<16> locked; + EndBitUnion(SysLockReg) + + SysLockReg sysLock; + public: typedef RealViewCtrlParams Params; const Params * @@ -120,4 +129,3 @@ class RealViewCtrl : public BasicPioDevice #endif // __DEV_ARM_RV_HH__ - diff --git a/src/dev/ps2.cc b/src/dev/ps2.cc new file mode 100644 index 000000000..fe90ce6bc --- /dev/null +++ b/src/dev/ps2.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +#include +#include "x11keysym/keysym.h" + +#include "base/misc.hh" +#include "dev/ps2.hh" + + +namespace Ps2 { + +/** Table to convert simple key symbols (0x00XX) into ps2 bytes. Lower byte + * is the scan code to send and upper byte is if a modifier is required to + * generate it. The table generates us keyboard codes, (e.g. the guest is + * supposed to recognize the keyboard as en_US). A new table would be required + * for another locale. + */ + +static const uint16_t keySymToPs2Byte[128] = { +// 0 / 8 1 / 9 2 / A 3 / B 4 / C 5 / D 6 / E 7 / F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x00-0x07 + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x08-0x0f + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x10-0x17 + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x18-0x1f + 0x0029, 0x0116, 0x0152, 0x0126, 0x0125, 0x012e, 0x013d, 0x0052, // 0x20-0x27 + 0x0146, 0x0145, 0x013e, 0x0155, 0x0041, 0x004e, 0x0049, 0x004a, // 0x28-0x2f + 0x0045, 0x0016, 0x001e, 0x0026, 0x0025, 0x002e, 0x0036, 0x003d, // 0x30-0x37 + 0x003e, 0x0046, 0x014c, 0x004c, 0x0141, 0x0055, 0x0149, 0x014a, // 0x38-0x3f + 0x011e, 0x011c, 0x0132, 0x0121, 0x0123, 0x0124, 0x012b, 0x0134, // 0x40-0x47 + 0x0133, 0x0143, 0x013b, 0x0142, 0x014b, 0x013a, 0x0131, 0x0144, // 0x48-0x4f + 0x014d, 0x0115, 0x012d, 0x011b, 0x012c, 0x013c, 0x012a, 0x011d, // 0x50-0x57 + 0x0122, 0x0135, 0x011a, 0x0054, 0x005d, 0x005b, 0x0136, 0x014e, // 0x58-0x5f + 0x000e, 0x001c, 0x0032, 0x0021, 0x0023, 0x0024, 0x002b, 0x0034, // 0x60-0x67 + 0x0033, 0x0043, 0x003b, 0x0042, 0x004b, 0x003a, 0x0031, 0x0044, // 0x68-0x6f + 0x004d, 0x0015, 0x002d, 0x001b, 0x002c, 0x003c, 0x002a, 0x001d, // 0x70-0x77 + 0x0022, 0x0035, 0x001a, 0x0154, 0x015d, 0x015b, 0x010e, 0x0000 // 0x78-0x7f +}; + +const uint8_t ShiftKey = 0x12; +const uint8_t BreakKey = 0xf0; +const uint8_t ExtendedKey = 0xe0; +const uint32_t UpperKeys = 0xff00; + +void +keySymToPs2(uint32_t key, bool down, bool &cur_shift, + std::list &keys) +{ + if (key <= XK_asciitilde) { + uint16_t tmp = keySymToPs2Byte[key]; + uint8_t code = tmp & 0xff; + bool shift = tmp >> 8; + + if (down) { + if (!cur_shift && shift) { + keys.push_back(ShiftKey); + cur_shift = true; + } + keys.push_back(code); + } else { + if (cur_shift && !shift) { + keys.push_back(BreakKey); + keys.push_back(ShiftKey); + cur_shift = false; + } + keys.push_back(BreakKey); + keys.push_back(code); + } + } else { + if ((key & UpperKeys) == UpperKeys) { + bool extended = false; + switch (key) { + case XK_BackSpace: + keys.push_back(0x66); + break; + case XK_Tab: + keys.push_back(0x0d); + break; + case XK_Return: + keys.push_back(0x5a); + break; + case XK_Escape: + keys.push_back(0x76); + break; + case XK_Delete: + extended = true; + keys.push_back(0x71); + break; + case XK_Home: + extended = true; + keys.push_back(0x6c); + break; + case XK_Left: + extended = true; + keys.push_back(0x6b); + break; + case XK_Right: + extended = true; + keys.push_back(0x74); + break; + case XK_Down: + extended = true; + keys.push_back(0x72); + break; + case XK_Up: + extended = true; + keys.push_back(0x75); + break; + case XK_Page_Up: + extended = true; + keys.push_back(0x7d); + break; + case XK_Page_Down: + extended = true; + keys.push_back(0x7a); + break; + case XK_End: + extended = true; + keys.push_back(0x69); + break; + case XK_Shift_L: + keys.push_back(0x12); + if (down) + cur_shift = true; + else + cur_shift = false; + break; + case XK_Shift_R: + keys.push_back(0x59); + if (down) + cur_shift = true; + else + cur_shift = false; + break; + case XK_Control_L: + keys.push_back(0x14); + break; + case XK_Control_R: + extended = true; + keys.push_back(0x14); + break; + default: + warn("Unknown extended key %#x\n", key); + return; + } + + if (extended) { + if (down) { + keys.push_front(ExtendedKey); + } else { + keys.push_front(BreakKey); + keys.push_front(ExtendedKey); + } + } else { + if (!down) + keys.push_front(BreakKey); + } + } // upper keys + } // extended keys + return; +} + +} /* namespace Ps2 */ + diff --git a/src/dev/ps2.hh b/src/dev/ps2.hh new file mode 100644 index 000000000..73f3f9cd8 --- /dev/null +++ b/src/dev/ps2.hh @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +#ifndef __DEV_PS2_HH__ +#define __DEV_PS2_HH__ + +#include + +#include "base/bitunion.hh" + +/** @file misc functions and constants required to interface with or emulate ps2 + * devices + */ + +namespace Ps2 { +enum { + Ps2Reset = 0xff, + SelfTestPass = 0xAA, + SetStatusLed = 0xed, + SetResolution = 0xe8, + StatusRequest = 0xe9, + SetScaling1_2 = 0xe7, + SetScaling1_1 = 0xe6, + ReadId = 0xf2, + TpReadId = 0xe1, + Ack = 0xfa, + SetRate = 0xf3, + Enable = 0xf4, + Disable = 0xf6, + KeyboardId = 0xab, + TouchKitId = 0x0a, + MouseId = 0x00, +}; + +/** A bitfield that represents the first byte of a mouse movement packet + */ +BitUnion8(Ps2MouseMovement) + Bitfield<0> leftButton; + Bitfield<1> rightButton; + Bitfield<2> middleButton; + Bitfield<3> one; + Bitfield<4> xSign; + Bitfield<5> ySign; + Bitfield<6> xOverflow; + Bitfield<7> yOverflow; +EndBitUnion(Ps2MouseMovement) + +/** Convert an x11 key symbol into a set of ps2 charecters. + * @param key x11 key symbol + * @param down if the key is being pressed or released + * @param cur_shift if device has already sent a shift + * @param keys list of keys command to send to emulate the x11 key symbol + */ +void keySymToPs2(uint32_t key, bool down, bool &cur_shift, + std::list &keys); + +} /* namespace Ps2 */ +#endif // __DEV_PS2_HH__ -- cgit v1.2.3 From e65c15e9316f47dfd3cb10786733eac7ac81822c Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:26 -0500 Subject: inorder: remove unused isa ops pass/fail ops were used for testing but arent part of isa --- src/arch/mips/isa/decoder.isa | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'src') diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 173fa89df..d97a141de 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -367,21 +367,7 @@ decode OPCODE_HI default Unknown::unknown() { }}); 0x1: addiu({{ Rt.sw = Rs.sw + imm; }}); 0x2: slti({{ Rt.sw = (Rs.sw < imm) ? 1 : 0 }}); - - //Edited to include MIPS AVP Pass/Fail instructions and - //default to the sltiu instruction - 0x3: decode RS_RT_INTIMM { - 0xabc1: BasicOp::fail({{ - exitSimLoop("AVP/SRVP Test Failed"); - }}); - 0xabc2: BasicOp::pass({{ - exitSimLoop("AVP/SRVP Test Passed"); - }}); - default: sltiu({{ - Rt.uw = (Rs.uw < (uint32_t)sextImm) ? 1 : 0; - }}); - } - + 0x3: sltiu({{ Rt.uw = (Rs.uw < (uint32_t)sextImm) ? 1 : 0;}}); 0x4: andi({{ Rt.sw = Rs.sw & zextImm; }}); 0x5: ori({{ Rt.sw = Rs.sw | zextImm; }}); 0x6: xori({{ Rt.sw = Rs.sw ^ zextImm; }}); -- cgit v1.2.3 From 800e93f3587f769b1caa782c6d5542c372085cd2 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:32 -0500 Subject: inorder: remove unused file inst_buffer file isn't used , so remove it --- src/cpu/inorder/resources/inst_buffer_new.cc | 158 --------------------------- src/cpu/inorder/resources/inst_buffer_new.hh | 109 ------------------ 2 files changed, 267 deletions(-) delete mode 100644 src/cpu/inorder/resources/inst_buffer_new.cc delete mode 100644 src/cpu/inorder/resources/inst_buffer_new.hh (limited to 'src') diff --git a/src/cpu/inorder/resources/inst_buffer_new.cc b/src/cpu/inorder/resources/inst_buffer_new.cc deleted file mode 100644 index 2e5a9666a..000000000 --- a/src/cpu/inorder/resources/inst_buffer_new.cc +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2007 MIPS Technologies, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Korey Sewell - * - */ - -#include -#include - -#include "arch/isa_traits.hh" -#include "config/the_isa.hh" -#include "cpu/inorder/pipeline_traits.hh" -#include "cpu/inorder/resources/inst_buffer.hh" -#include "cpu/inorder/cpu.hh" - -using namespace std; -using namespace TheISA; -using namespace ThePipeline; - -InstBuffer::InstBuffer(string res_name, int res_id, int res_width, - int res_latency, InOrderCPU *_cpu) - : Resource(res_name, res_id, res_width, res_latency, _cpu) -{ } - -ResReqPtr -InstBuffer::getRequest(DynInstPtr inst, int stage_num, int res_idx, - int slot_num) -{ - // After this is working, change this to a reinterpret cast - // for performance considerations - InstBufferEntry* ib_entry = dynamic_cast(inst->resSched.top()); - assert(ib_entry); - - return new InstBufferRequest(this, inst, stage_num, id, slot_num, - ib_entry->cmd); -} - -void -InstBuffer::execute(int slot_idx) -{ - // After this is working, change this to a reinterpret cast - // for performance considerations - InstBufferRequest* ib_req = dynamic_cast(reqMap[slot_idx]); - assert(ib_req); - - DynInstPtr inst = ib_req->inst; - ThreadID tid = inst->readTid(); - int seq_num = inst->seqNum; - ib_req->fault = NoFault; - - switch (ib_req->cmd) - { - case InsertInst: - { - DPRINTF(Resource, "[tid:%i]: Inserting [sn:%i] into buffer.\n", - tid, seq_num); - insert(inst); - ib_req->done(); - } - break; - - case RemoveInst: - { - DPRINTF(Resource, "[tid:%i]: Removing [sn:%i] from buffer.\n", - tid, seq_num); - remove(inst); - ib_req->done(); - } - break; - - default: - fatal("Unrecognized command to %s", resName); - } - - DPRINTF(Resource, "Buffer now contains %i insts.\n", instList.size()); -} - -void -InstBuffer::insert(DynInstPtr inst) -{ - instList.push_back(inst); -} - -void -InstBuffer::remove(DynInstPtr inst) -{ - std::list::iterator list_it = instList.begin(); - std::list::iterator list_end = instList.end(); - - while (list_it != list_end) { - if((*list_it) == inst) { - instList.erase(list_it); - break; - } - list_it++; - } -} - -void -InstBuffer::pop() -{ instList.pop_front(); } - -ThePipeline::DynInstPtr -InstBuffer::top() -{ return instList.front(); } - -void -InstBuffer::squash(InstSeqNum squash_seq_num, ThreadID tid) -{ - list::iterator list_it = instList.begin(); - list::iterator list_end = instList.end(); - queue::iterator> remove_list; - - // Collect All Instructions to be Removed in Remove List - while (list_it != list_end) { - if((*list_it)->seqNum > squash_seq_num) { - DPRINTF(Resource, "[tid:%i]: Squashing [sn:%i] in resource.\n", - tid, (*list_it)->seqNum); - (*list_it)->setSquashed(); - remove_list.push(list_it); - } - - list_it++; - } - - // Removed Instructions from InstList & Clear Remove List - while (!remove_list.empty()) { - instList.erase(remove_list.front()); - remove_list.pop(); - } - - Resource::squash(squash_seq_num, tid); -} diff --git a/src/cpu/inorder/resources/inst_buffer_new.hh b/src/cpu/inorder/resources/inst_buffer_new.hh deleted file mode 100644 index b1d5a7b09..000000000 --- a/src/cpu/inorder/resources/inst_buffer_new.hh +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2007 MIPS Technologies, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Korey Sewell - * - */ - -#ifndef __CPU_INORDER_INST_BUFF_UNIT_HH__ -#define __CPU_INORDER_INST_BUFF_UNIT_HH__ - -#include -#include -#include - -#include "cpu/inorder/resource.hh" -#include "cpu/inorder/inorder_dyn_inst.hh" -#include "cpu/inorder/pipeline_traits.hh" -#include "cpu/inorder/cpu.hh" - -class InstBuffer : public Resource { - public: - typedef InOrderDynInst::DynInstPtr DynInstPtr; - - public: - enum Command { - InsertInst, - InsertAddr, - RemoveInst, - RemoveAddr - }; - - public: - InstBuffer(std::string res_name, int res_id, int res_width, - int res_latency, InOrderCPU *_cpu); - virtual ~InstBuffer() {} - - virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, - int res_idx, int slot_num); - - virtual void execute(int slot_num); - - virtual void insert(DynInstPtr inst); - - virtual void remove(DynInstPtr inst); - - virtual void pop(); - - virtual DynInstPtr top(); - - virtual void squash(InstSeqNum squash_seq_num, ThreadID tid); - - protected: - /** List of instructions this resource is currently - * processing. - */ - std::list instList; - - /** @todo: Add Resource Stats Here */ - -}; - -struct InstBufferEntry : public ThePipeline::ScheduleEntry { - InstBufferEntry(int stage_num, int res_num, InstBuffer::Command _cmd) : - ScheduleEntry(stage_num, res_num), cmd(_cmd) - { } - - InstBuffer::Command cmd; -}; - -class InstBufferRequest : public ResourceRequest { - public: - typedef InOrderDynInst::DynInstPtr DynInstPtr; - - public: - InstBufferRequest(InstBuffer *res, DynInstPtr inst, int stage_num, int res_idx, int slot_num, - InstBuffer::Command _cmd) - : ResourceRequest(res, inst, stage_num, res_idx, slot_num), - cmd(_cmd) - { } - - InstBuffer::Command cmd; -}; - - -#endif //__CPU_INORDER_INST_BUFF_UNIT_HH__ -- cgit v1.2.3 From af67631790afbfeba01b05f7ae2ca54ae27428f1 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:34 -0500 Subject: inorder: comments for resource sked class --- src/cpu/inorder/resource_sked.hh | 67 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource_sked.hh b/src/cpu/inorder/resource_sked.hh index 22e29d728..e3c0c6bc1 100644 --- a/src/cpu/inorder/resource_sked.hh +++ b/src/cpu/inorder/resource_sked.hh @@ -35,6 +35,17 @@ #include #include +/** ScheduleEntry class represents a single function that an instruction + wants to do at any pipeline stage. For example, if an instruction + needs to be decoded and do a branch prediction all in one stage + then each of those tasks would need it's own ScheduleEntry. + + Each schedule entry corresponds to some resource that the instruction + wants to interact with. + + The file pipeline_traits.cc shows how a typical instruction schedule is + made up of these schedule entries. +*/ class ScheduleEntry { public: ScheduleEntry(int stage_num, int _priority, int res_num, int _cmd = 0, @@ -43,43 +54,89 @@ class ScheduleEntry { idx(_idx), priority(_priority) { } - // Stage number to perform this service. + /** Stage number to perform this service. */ int stageNum; - // Resource ID to access + /** Resource ID to access */ int resNum; - // See specific resource for meaning + /** See specific resource for meaning */ unsigned cmd; - // See specific resource for meaning + /** See specific resource for meaning */ unsigned idx; - // Some Resources May Need Priority + /** Some Resources May Need Priority */ int priority; }; +/** The ResourceSked maintains the complete schedule + for an instruction. That schedule includes what + resources an instruction wants to acquire at each + pipeline stage and is represented by a collection + of ScheduleEntry objects (described above) that + must be executed in-order. + + In every pipeline stage, the InOrder model will + process all entries on the resource schedule for + that stage and then send the instruction to the next + stage if and only if the instruction successfully + completed each ScheduleEntry. +*/ class ResourceSked { public: typedef std::list::iterator SkedIt; ResourceSked(); + /** Initializee the current entry pointer to + pipeline stage 0 and the 1st schedule entry + */ void init(); + /** Goes through the remaining stages on the schedule + and sums all the remaining entries left to be + processed + */ int size(); + + /** Is the schedule empty? */ bool empty(); + + /** What is the next task for this instruction schedule? */ ScheduleEntry* top(); + + /** Top() Task is completed, remove it from schedule */ void pop(); + + /** Add To Schedule based on stage num and priority of + Schedule Entry + */ void push(ScheduleEntry* sked_entry); + + /** Add Schedule Entry to be in front of another Entry */ void pushBefore(ScheduleEntry* sked_entry, int sked_cmd, int sked_cmd_idx); + + /** Print what's left on the instruction schedule */ void print(); private: + /** Current Schedule Entry Pointer */ SkedIt curSkedEntry; + + /** The Resource Schedule: Resized to Number of Stages in + the constructor + */ std::vector > sked; + /** Find a place to insert the instruction using the + schedule entries priority + */ SkedIt findIterByPriority(ScheduleEntry *sked_entry, int stage_num); + + /** Find a place to insert the instruction using a particular command + to look for. + */ SkedIt findIterByCommand(ScheduleEntry *sked_entry, int stage_num, int sked_cmd, int sked_cmd_idx = -1); }; -- cgit v1.2.3 From 6713dbfe080df4dd04b0f29b5f2fbd6e221ffebf Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:36 -0500 Subject: inorder: cache instruction schedules first step in a optimization to not dynamically allocate an instruction schedule for every instruction but rather used cached schedules --- src/cpu/inorder/cpu.cc | 6 +++- src/cpu/inorder/cpu.hh | 56 ++++++++++++++++++++++++++++++++++++++ src/cpu/inorder/pipeline_traits.hh | 1 + 3 files changed, 62 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index ffdcae7df..39357cd30 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -334,9 +334,13 @@ InOrderCPU::InOrderCPU(Params *params) dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0); dummyReqInst->setSquashed(); + dummyReqInst->resetInstCount(); dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0); dummyBufferInst->setSquashed(); + dummyBufferInst->resetInstCount(); + + endOfSkedIt = skedCache.end(); lastRunningCycle = curTick(); @@ -348,7 +352,6 @@ InOrderCPU::InOrderCPU(Params *params) reset(); #endif - dummyBufferInst->resetInstCount(); // Schedule First Tick Event, CPU will reschedule itself from here on out. scheduleTickEvent(0); @@ -359,6 +362,7 @@ InOrderCPU::~InOrderCPU() delete resPool; } +std::map InOrderCPU::skedCache; void InOrderCPU::regStats() diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 9ff0f12ce..154ab690c 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -296,6 +296,62 @@ class InOrderCPU : public BaseCPU TheISA::TLB *getITBPtr(); TheISA::TLB *getDTBPtr(); + /** Accessor Type for the SkedCache */ + typedef uint32_t SkedID; + + /** Cache of Instruction Schedule using the instruction's name as a key */ + static std::map skedCache; + + typedef std::map::iterator SkedCacheIt; + + /** Initialized to last iterator in map, signifying a invalid entry + on map searches + */ + SkedCacheIt endOfSkedIt; + + /** Add a new instruction schedule to the schedule cache */ + void addToSkedCache(DynInstPtr inst, ThePipeline::RSkedPtr inst_sked) + { + SkedID sked_id = genSkedID(inst); + skedCache[sked_id] = inst_sked; + } + + + /** Find a instruction schedule */ + ThePipeline::RSkedPtr lookupSked(DynInstPtr inst) + { + SkedID sked_id = genSkedID(inst); + SkedCacheIt lookup_it = skedCache.find(sked_id); + + if (lookup_it != endOfSkedIt) { + return (*lookup_it).second; + } else { + return NULL; + } + } + + static const uint8_t INST_OPCLASS = 26; + static const uint8_t INST_LOAD = 25; + static const uint8_t INST_STORE = 24; + static const uint8_t INST_CONTROL = 23; + static const uint8_t INST_NONSPEC = 22; + static const uint8_t INST_DEST_REGS = 18; + static const uint8_t INST_SRC_REGS = 14; + + inline SkedID genSkedID(DynInstPtr inst) + { + SkedID id = 0; + id = (inst->opClass() << INST_OPCLASS) | + (inst->isLoad() << INST_LOAD) | + (inst->isStore() << INST_STORE) | + (inst->isControl() << INST_CONTROL) | + (inst->isNonSpeculative() << INST_NONSPEC) | + (inst->numDestRegs() << INST_DEST_REGS) | + (inst->numSrcRegs() << INST_SRC_REGS); + return id; + } + + public: /** Registers statistics. */ diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index df964e254..2c4e44339 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -77,6 +77,7 @@ namespace ThePipeline { // RESOURCE SCHEDULING ////////////////////////// typedef ResourceSked ResSchedule; + typedef ResourceSked* RSkedPtr; void createFrontEndSchedule(DynInstPtr &inst); bool createBackEndSchedule(DynInstPtr &inst); -- cgit v1.2.3 From ec9b2ec25151bd857ad0557befc37f00bc61d1c7 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:40 -0500 Subject: inorder: stage scheduler for front/back end schedule creation add a stage scheduler class to replace InstStage in pipeline_traits.cc use that class to define a default front-end, resource schedule that all instructions will follow. This will also replace the back end schedule in pipeline_traits.cc. The reason for adding this is so that we can cache instruction schedules in the future instead of calling the same function over/over again as well as constantly dynamically alllocating memory on every instruction to try to figure out it's schedule --- src/cpu/inorder/cpu.cc | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/cpu/inorder/cpu.hh | 29 +++++++++++++++ 2 files changed, 126 insertions(+) (limited to 'src') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 39357cd30..0591b0510 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -341,6 +341,7 @@ InOrderCPU::InOrderCPU(Params *params) dummyBufferInst->resetInstCount(); endOfSkedIt = skedCache.end(); + frontEndSked = createFrontEndSked(); lastRunningCycle = curTick(); @@ -364,6 +365,102 @@ InOrderCPU::~InOrderCPU() std::map InOrderCPU::skedCache; +RSkedPtr +InOrderCPU::createFrontEndSked() +{ + RSkedPtr res_sked = NULL; + int stage_num = 0; + StageScheduler F(res_sked, stage_num++); + StageScheduler D(res_sked, stage_num++); + + // FETCH + F.needs(FetchSeq, FetchSeqUnit::AssignNextPC); + F.needs(ICache, FetchUnit::InitiateFetch); + + // DECODE + D.needs(ICache, FetchUnit::CompleteFetch); + D.needs(Decode, DecodeUnit::DecodeInst); + D.needs(BPred, BranchPredictor::PredictBranch); + D.needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); + + return res_sked; +} + +RSkedPtr +InOrderCPU::createBackEndSked(DynInstPtr inst) +{ + RSkedPtr res_sked = lookupSked(inst); + if (res_sked != NULL) { + return res_sked; + } + + int stage_num = ThePipeline::BackEndStartStage; + StageScheduler X(res_sked, stage_num++); + StageScheduler M(res_sked, stage_num++); + StageScheduler W(res_sked, stage_num++); + + if (!inst->staticInst) { + warn_once("Static Instruction Object Not Set. Can't Create" + " Back End Schedule"); + return false; + } + + // EXECUTE + for (int idx=0; idx < inst->numSrcRegs(); idx++) { + if (!idx || !inst->isStore()) { + X.needs(RegManager, UseDefUnit::ReadSrcReg, idx); + } + } + + if ( inst->isNonSpeculative() ) { + // skip execution of non speculative insts until later + } else if ( inst->isMemRef() ) { + if ( inst->isLoad() ) { + X.needs(AGEN, AGENUnit::GenerateAddr); + } + } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { + X.needs(MDU, MultDivUnit::StartMultDiv); + } else { + X.needs(ExecUnit, ExecutionUnit::ExecuteInst); + } + + if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { + X.needs(MDU, MultDivUnit::EndMultDiv); + } + + // MEMORY + if ( inst->isLoad() ) { + M.needs(DCache, CacheUnit::InitiateReadData); + } else if ( inst->isStore() ) { + if ( inst->numSrcRegs() >= 2 ) { + M.needs(RegManager, UseDefUnit::ReadSrcReg, 1); + } + M.needs(AGEN, AGENUnit::GenerateAddr); + M.needs(DCache, CacheUnit::InitiateWriteData); + } + + + // WRITEBACK + if ( inst->isLoad() ) { + W.needs(DCache, CacheUnit::CompleteReadData); + } else if ( inst->isStore() ) { + W.needs(DCache, CacheUnit::CompleteWriteData); + } + + if ( inst->isNonSpeculative() ) { + if ( inst->isMemRef() ) fatal("Non-Speculative Memory Instruction"); + W.needs(ExecUnit, ExecutionUnit::ExecuteInst); + } + + for (int idx=0; idx < inst->numDestRegs(); idx++) { + W.needs(RegManager, UseDefUnit::WriteDestReg, idx); + } + + W.needs(Grad, GraduationUnit::GraduateInst); + + return res_sked; +} + void InOrderCPU::regStats() { diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 154ab690c..2a5c815e1 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -309,6 +309,8 @@ class InOrderCPU : public BaseCPU */ SkedCacheIt endOfSkedIt; + ThePipeline::RSkedPtr frontEndSked; + /** Add a new instruction schedule to the schedule cache */ void addToSkedCache(DynInstPtr inst, ThePipeline::RSkedPtr inst_sked) { @@ -351,6 +353,33 @@ class InOrderCPU : public BaseCPU return id; } + ThePipeline::RSkedPtr createFrontEndSked(); + ThePipeline::RSkedPtr createBackEndSked(DynInstPtr inst); + + class StageScheduler { + private: + ThePipeline::RSkedPtr rsked; + int stageNum; + int nextTaskPriority; + + public: + StageScheduler(ThePipeline::RSkedPtr _rsked, int stage_num) + : rsked(_rsked), stageNum(stage_num), + nextTaskPriority(0) + { } + + void needs(int unit, int request) { + rsked->push(new ScheduleEntry( + stageNum, nextTaskPriority++, unit, request + )); + } + + void needs(int unit, int request, int param) { + rsked->push(new ScheduleEntry( + stageNum, nextTaskPriority++, unit, request, param + )); + } + }; public: -- cgit v1.2.3 From 516b61146271b13d2350563b0349747724ffbc99 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:43 -0500 Subject: inorder: define iterator for resource schedules resource skeds are divided into two parts: front end (all insts) and back end (inst. specific) each of those are implemented as separate lists, so this iterator wraps around the traditional list iterator so that an instruction can walk it's schedule but seamlessly transfer from front end to back end when necessary --- src/cpu/inorder/resource_sked.hh | 143 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource_sked.hh b/src/cpu/inorder/resource_sked.hh index e3c0c6bc1..bd002e161 100644 --- a/src/cpu/inorder/resource_sked.hh +++ b/src/cpu/inorder/resource_sked.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Regents of The University of Michigan + * Copyright (c) 2010-2011 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ #include #include +#include /** ScheduleEntry class represents a single function that an instruction wants to do at any pipeline stage. For example, if an instruction @@ -86,6 +87,7 @@ class ScheduleEntry { class ResourceSked { public: typedef std::list::iterator SkedIt; + typedef std::vector > StageList; ResourceSked(); @@ -103,6 +105,12 @@ class ResourceSked { /** Is the schedule empty? */ bool empty(); + /** Beginning Entry of this schedule */ + SkedIt begin(); + + /** Ending Entry of this schedule */ + SkedIt end(); + /** What is the next task for this instruction schedule? */ ScheduleEntry* top(); @@ -120,14 +128,19 @@ class ResourceSked { /** Print what's left on the instruction schedule */ void print(); + StageList *getStages() + { + return &stages; + } + private: /** Current Schedule Entry Pointer */ SkedIt curSkedEntry; - /** The Resource Schedule: Resized to Number of Stages in - the constructor + /** The Stage-by-Stage Resource Schedule: + Resized to Number of Stages in the constructor */ - std::vector > sked; + StageList stages; /** Find a place to insert the instruction using the schedule entries priority @@ -141,4 +154,126 @@ class ResourceSked { int sked_cmd, int sked_cmd_idx = -1); }; +/** Wrapper class around the SkedIt iterator in the Resource Sked so that + we can use ++ operator to automatically go to the next available + resource schedule entry but otherwise maintain same functionality + as a normal iterator. +*/ +class RSkedIt +{ + public: + RSkedIt() + : curStage(0), numStages(0) + { } + + + /** init() must be called before the use of any other member + in the RSkedIt class. + */ + void init(ResourceSked* rsked) + { + stages = rsked->getStages(); + numStages = stages->size(); + } + + /* Update the encapsulated "myIt" iterator, but only + update curStage/curStage_end if the iterator is valid. + The iterator could be invalid in the case where + someone is saving the end of a list (i.e. std::list->end()) + */ + RSkedIt operator=(ResourceSked::SkedIt const &rhs) + { + myIt = rhs; + if (myIt != (*stages)[numStages-1].end()) { + curStage = (*myIt)->stageNum; + curStage_end = (*stages)[curStage].end(); + } + return *this; + } + + /** Increment to the next entry in current stage. + If no more entries then find the next stage that has + resource schedule to complete. + If no more stages, then return the end() iterator from + the last stage to indicate we are done. + */ + RSkedIt &operator++(int unused) + { + if (++myIt == curStage_end) { + curStage++; + while (curStage < numStages) { + if ((*stages)[curStage].empty()) { + curStage++; + } else { + myIt = (*stages)[curStage].begin(); + curStage_end = (*stages)[curStage].end(); + return *this; + } + } + + myIt = (*stages)[numStages - 1].end(); + } + + return *this; + } + + /** The "pointer" operator can be used on a RSkedIt and it + will use the encapsulated iterator + */ + ScheduleEntry* operator->() + { + return *myIt; + } + + /** Dereferencing a RSKedIt will access the encapsulated + iterator + */ + ScheduleEntry* operator*() + { + return *myIt; + } + + /** Equality for RSkedIt only compares the "myIt" iterators, + as the other members are just ancillary + */ + bool operator==(RSkedIt const &rhs) + { + return this->myIt == rhs.myIt; + } + + /** Inequality for RSkedIt only compares the "myIt" iterators, + as the other members are just ancillary + */ + bool operator!=(RSkedIt const &rhs) + { + return this->myIt != rhs.myIt; + } + + /* The == and != operator overloads should be sufficient + here if need otherwise direct access to the schedule + iterator, then this can be used */ + ResourceSked::SkedIt getIt() + { + return myIt; + } + + private: + /** Schedule Iterator that this class is encapsulating */ + ResourceSked::SkedIt myIt; + + /** Ptr to resource schedule that the 'myIt' iterator + belongs to + */ + ResourceSked::StageList *stages; + + /** The last iterator in the current stage. */ + ResourceSked::SkedIt curStage_end; + + /** Current Stage that "myIt" refers to. */ + int curStage; + + /** Number of stages in the "*stages" object. */ + int numStages; +}; + #endif //__CPU_INORDER_RESOURCE_SKED_HH__ -- cgit v1.2.3 From e26aee514d328bd8c9930c742df6ce1485dce5ae Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:45 -0500 Subject: inorder: utilize cached skeds in pipeline allow the pipeline and resources to use the cached instruction schedule and resource sked iterator --- src/cpu/inorder/SConscript | 2 +- src/cpu/inorder/cpu.cc | 19 +++++- src/cpu/inorder/first_stage.cc | 2 +- src/cpu/inorder/inorder_dyn_inst.cc | 4 +- src/cpu/inorder/inorder_dyn_inst.hh | 92 +++++++++++++++++++--------- src/cpu/inorder/pipeline_stage.cc | 11 +++- src/cpu/inorder/resource.cc | 4 +- src/cpu/inorder/resource_pool.cc | 11 ++++ src/cpu/inorder/resource_pool.hh | 2 + src/cpu/inorder/resource_sked.cc | 78 +++++++++++++++-------- src/cpu/inorder/resources/cache_unit.cc | 20 +++--- src/cpu/inorder/resources/decode_unit.cc | 7 ++- src/cpu/inorder/resources/fetch_unit.cc | 6 +- src/cpu/inorder/resources/graduation_unit.cc | 5 +- src/cpu/inorder/resources/mult_div_unit.cc | 6 +- src/cpu/inorder/resources/tlb_unit.cc | 2 +- src/cpu/inorder/resources/use_def.cc | 6 +- 17 files changed, 188 insertions(+), 89 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript index ae5ec0257..7427df226 100644 --- a/src/cpu/inorder/SConscript +++ b/src/cpu/inorder/SConscript @@ -55,7 +55,7 @@ if 'InOrderCPU' in env['CPU_MODELS']: TraceFlag('ThreadModel') TraceFlag('RefCount') TraceFlag('AddrDep') - + TraceFlag('SkedCache') CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU', 'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred', diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 0591b0510..1d678b9c5 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -368,7 +368,7 @@ std::map InOrderCPU::skedCache; RSkedPtr InOrderCPU::createFrontEndSked() { - RSkedPtr res_sked = NULL; + RSkedPtr res_sked = new ResourceSked(); int stage_num = 0; StageScheduler F(res_sked, stage_num++); StageScheduler D(res_sked, stage_num++); @@ -383,6 +383,9 @@ InOrderCPU::createFrontEndSked() D.needs(BPred, BranchPredictor::PredictBranch); D.needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); + + DPRINTF(SkedCache, "Resource Sked created for instruction \"front_end\"\n"); + return res_sked; } @@ -391,7 +394,11 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) { RSkedPtr res_sked = lookupSked(inst); if (res_sked != NULL) { + DPRINTF(SkedCache, "Found %s in sked cache.\n", + inst->instName()); return res_sked; + } else { + res_sked = new ResourceSked(); } int stage_num = ThePipeline::BackEndStartStage; @@ -402,7 +409,7 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) if (!inst->staticInst) { warn_once("Static Instruction Object Not Set. Can't Create" " Back End Schedule"); - return false; + return NULL; } // EXECUTE @@ -458,6 +465,14 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) W.needs(Grad, GraduationUnit::GraduateInst); + // Insert Front Schedule into our cache of + // resource schedules + addToSkedCache(inst, res_sked); + + DPRINTF(SkedCache, "Back End Sked Created for instruction: %s (%08p)\n", + inst->instName(), inst->getMachInst()); + res_sked->print(); + return res_sked; } diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc index 71c6ec3e0..b656ca1c7 100644 --- a/src/cpu/inorder/first_stage.cc +++ b/src/cpu/inorder/first_stage.cc @@ -181,7 +181,7 @@ FirstStage::processInsts(ThreadID tid) inst->setInstListIt(cpu->addInst(inst)); // Create Front-End Resource Schedule For Instruction - ThePipeline::createFrontEndSchedule(inst); + inst->setFrontSked(cpu->frontEndSked); } int reqs_processed = 0; diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 6afe35862..6f3e822fe 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -51,7 +51,7 @@ InOrderDynInst::InOrderDynInst(TheISA::ExtMachInst machInst, const TheISA::PCState &instPC, const TheISA::PCState &_predPC, InstSeqNum seq_num, InOrderCPU *cpu) - : staticInst(machInst, instPC.instAddr()), traceData(NULL), cpu(cpu) + : staticInst(machInst, instPC.instAddr()), traceData(NULL), cpu(cpu) { seqNum = seq_num; @@ -108,6 +108,8 @@ InOrderDynInst::setMachInst(ExtMachInst machInst) void InOrderDynInst::initVars() { + inFrontEnd = true; + fetchMemReq = NULL; dataMemReq = NULL; splitMemData = NULL; diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 1c0ee4384..6acd0f7f4 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -337,9 +337,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted //////////////////////////////////////////////////////////// std::string instName() { return staticInst->getName(); } - void setMachInst(ExtMachInst inst); + ExtMachInst getMachInst() { return staticInst->machInst; } + /** Sets the StaticInst. */ void setStaticInst(StaticInstPtr &static_inst); @@ -411,6 +412,39 @@ class InOrderDynInst : public FastAlloc, public RefCounted // RESOURCE SCHEDULING // ///////////////////////////////////////////// + typedef ThePipeline::RSkedPtr RSkedPtr; + bool inFrontEnd; + + RSkedPtr frontSked; + RSkedIt frontSked_end; + + RSkedPtr backSked; + RSkedIt backSked_end; + + RSkedIt curSkedEntry; + + void setFrontSked(RSkedPtr front_sked) + { + frontSked = front_sked; + frontSked_end.init(frontSked); + frontSked_end = frontSked->end(); + //DPRINTF(InOrderDynInst, "Set FrontSked End to : %x \n" , + // frontSked_end.getIt()/*, frontSked->end()*/); + //assert(frontSked_end == frontSked->end()); + + // This initializes instruction to be able + // to walk the resource schedule + curSkedEntry.init(frontSked); + curSkedEntry = frontSked->begin(); + } + + void setBackSked(RSkedPtr back_sked) + { + backSked = back_sked; + backSked_end.init(backSked); + backSked_end = backSked->end(); + } + void setNextStage(int stage_num) { nextStage = stage_num; } int getNextStage() { return nextStage; } @@ -426,53 +460,51 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Print Resource Schedule */ - /** @NOTE: DEBUG ONLY */ - void printSched() + void printSked() { - ThePipeline::ResSchedule tempSched; - std::cerr << "\tInst. Res. Schedule: "; - while (!resSched.empty()) { - std::cerr << '\t' << resSched.top()->stageNum << "-" - << resSched.top()->resNum << ", "; - - tempSched.push(resSched.top()); - resSched.pop(); + if (frontSked != NULL) { + frontSked->print(); } - std::cerr << std::endl; - resSched = tempSched; + if (backSked != NULL) { + backSked->print(); + } } /** Return Next Resource Stage To Be Used */ int nextResStage() { - if (resSched.empty()) - return -1; - else - return resSched.top()->stageNum; + assert((inFrontEnd && curSkedEntry != frontSked_end) || + (!inFrontEnd && curSkedEntry != backSked_end)); + + return curSkedEntry->stageNum; } /** Return Next Resource To Be Used */ int nextResource() { - if (resSched.empty()) - return -1; - else - return resSched.top()->resNum; + assert((inFrontEnd && curSkedEntry != frontSked_end) || + (!inFrontEnd && curSkedEntry != backSked_end)); + + return curSkedEntry->resNum; } - /** Remove & Deallocate a schedule entry */ - void popSchedEntry() + /** Finish using a schedule entry, increment to next entry */ + bool finishSkedEntry() { - if (!resSched.empty()) { - ScheduleEntry* sked = resSched.top(); - resSched.pop(); - if (sked != 0) { - delete sked; - - } + curSkedEntry++; + + if (inFrontEnd && curSkedEntry == frontSked_end) { + assert(backSked != NULL); + curSkedEntry.init(backSked); + curSkedEntry = backSked->begin(); + inFrontEnd = false; + } else if (!inFrontEnd && curSkedEntry == backSked_end) { + return true; } + + return false; } /** Release a Resource Request (Currently Unused) */ diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 744ffd4d2..bc31a8537 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -944,11 +944,16 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) "completed.\n", tid, inst->seqNum, cpu->resPool->name(res_num)); - inst->popSchedEntry(); - reqs_processed++; req->stagePasses++; + + bool done_in_pipeline = inst->finishSkedEntry(); + if (done_in_pipeline) { + DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] finished " + "in pipeline.\n", tid, inst->seqNum); + break; + } } else { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); @@ -982,7 +987,7 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) // Activate Next Ready Thread at end of cycle DPRINTF(ThreadModel, "Attempting to activate next ready " "thread due to cache miss.\n"); - cpu->activateNextReadyContext(); + cpu->activateNextReadyContext(); } // Mark request for deletion diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 51beb5aa0..72b45dda8 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -184,8 +184,8 @@ Resource::request(DynInstPtr inst) if (slot_num != -1) { // Get Stage # from Schedule Entry - stage_num = inst->resSched.top()->stageNum; - unsigned cmd = inst->resSched.top()->cmd; + stage_num = inst->curSkedEntry->stageNum; + unsigned cmd = inst->curSkedEntry->cmd; // Generate Resource Request inst_req = getRequest(inst, stage_num, id, slot_num, cmd); diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index a037cbe9e..e1914623a 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -91,6 +91,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params)); + } ResourcePool::~ResourcePool() @@ -122,6 +123,16 @@ ResourcePool::name() return cpu->name() + ".ResourcePool"; } +void +ResourcePool::print() +{ + for (int i=0; i < resources.size(); i++) { + DPRINTF(InOrderDynInst, "Res:%i %s\n", + i, resources[i]->name()); + } + +} + void ResourcePool::regStats() diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh index e8061d3ff..fde38b4e9 100644 --- a/src/cpu/inorder/resource_pool.hh +++ b/src/cpu/inorder/resource_pool.hh @@ -130,6 +130,8 @@ class ResourcePool { void init(); + void print(); + /** Register Statistics in All Resources */ void regStats(); diff --git a/src/cpu/inorder/resource_sked.cc b/src/cpu/inorder/resource_sked.cc index 4104e6989..4cf791228 100644 --- a/src/cpu/inorder/resource_sked.cc +++ b/src/cpu/inorder/resource_sked.cc @@ -34,30 +34,30 @@ #include #include -#include +#include using namespace std; using namespace ThePipeline; ResourceSked::ResourceSked() { - sked.resize(NumStages); + stages.resize(NumStages); } void ResourceSked::init() { - assert(!sked[0].empty()); + assert(!stages[0].empty()); - curSkedEntry = sked[0].begin(); + curSkedEntry = stages[0].begin(); } int ResourceSked::size() { int total = 0; - for (int i = 0; i < sked.size(); i++) { - total += sked[i].size(); + for (int i = 0; i < stages.size(); i++) { + total += stages[i].size(); } return total; @@ -69,6 +69,26 @@ ResourceSked::empty() return size() == 0; } + +ResourceSked::SkedIt +ResourceSked::begin() +{ + int num_stages = stages.size(); + for (int i = 0; i < num_stages; i++) { + if (stages[i].size() > 0) + return stages[i].begin(); + } + + return stages[num_stages - 1].end(); +} + +ResourceSked::SkedIt +ResourceSked::end() +{ + int num_stages = stages.size(); + return stages[num_stages - 1].end(); +} + ScheduleEntry* ResourceSked::top() { @@ -82,18 +102,18 @@ ResourceSked::pop() { int stage_num = (*curSkedEntry)->stageNum; - sked[stage_num].erase(curSkedEntry); + stages[stage_num].erase(curSkedEntry); - if (!sked[stage_num].empty()) { - curSkedEntry = sked[stage_num].begin(); + if (!stages[stage_num].empty()) { + curSkedEntry = stages[stage_num].begin(); } else { int next_stage = stage_num + 1; while (next_stage < NumStages) { - if (sked[next_stage].empty()) { + if (stages[next_stage].empty()) { next_stage++; } else { - curSkedEntry = sked[next_stage].begin(); + curSkedEntry = stages[next_stage].begin(); break; } } @@ -108,7 +128,7 @@ ResourceSked::push(ScheduleEntry* sked_entry) SkedIt pri_iter = findIterByPriority(sked_entry, stage_num); - sked[stage_num].insert(pri_iter, sked_entry); + stages[stage_num].insert(pri_iter, sked_entry); } void @@ -122,23 +142,23 @@ ResourceSked::pushBefore(ScheduleEntry* sked_entry, int sked_cmd, SkedIt pri_iter = findIterByCommand(sked_entry, stage_num, sked_cmd, sked_cmd_idx); - assert(pri_iter != sked[stage_num].end() && + assert(pri_iter != stages[stage_num].end() && "Could not find command to insert in front of."); - sked[stage_num].insert(pri_iter, sked_entry); + stages[stage_num].insert(pri_iter, sked_entry); } ResourceSked::SkedIt ResourceSked::findIterByPriority(ScheduleEntry* sked_entry, int stage_num) { - if (sked[stage_num].empty()) { - return sked[stage_num].end(); + if (stages[stage_num].empty()) { + return stages[stage_num].end(); } int priority = sked_entry->priority; - SkedIt sked_it = sked[stage_num].begin(); - SkedIt sked_end = sked[stage_num].end(); + SkedIt sked_it = stages[stage_num].begin(); + SkedIt sked_end = stages[stage_num].end(); while (sked_it != sked_end) { if ((*sked_it)->priority > priority) @@ -154,12 +174,12 @@ ResourceSked::SkedIt ResourceSked::findIterByCommand(ScheduleEntry* sked_entry, int stage_num, int sked_cmd, int sked_cmd_idx) { - if (sked[stage_num].empty()) { - return sked[stage_num].end(); + if (stages[stage_num].empty()) { + return stages[stage_num].end(); } - SkedIt sked_it = sked[stage_num].begin(); - SkedIt sked_end = sked[stage_num].end(); + SkedIt sked_it = stages[stage_num].begin(); + SkedIt sked_end = stages[stage_num].end(); while (sked_it != sked_end) { if ((*sked_it)->cmd == sked_cmd && @@ -175,12 +195,16 @@ ResourceSked::findIterByCommand(ScheduleEntry* sked_entry, int stage_num, void ResourceSked::print() { - for (int i = 0; i < sked.size(); i++) { - cprintf("Stage %i\n====\n", i); - SkedIt sked_it = sked[i].begin(); - SkedIt sked_end = sked[i].end(); + for (int i = 0; i < stages.size(); i++) { + //ccprintf(cerr, "Stage %i\n====\n", i); + SkedIt sked_it = stages[i].begin(); + SkedIt sked_end = stages[i].end(); while (sked_it != sked_end) { - cprintf("\t res:%i cmd:%i idx:%i\n", (*sked_it)->resNum, (*sked_it)->cmd, (*sked_it)->idx); + DPRINTF(SkedCache, "\t stage:%i res:%i cmd:%i idx:%i\n", + (*sked_it)->stageNum, + (*sked_it)->resNum, + (*sked_it)->cmd, + (*sked_it)->idx); sked_it++; } } diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 8b4dd4402..e2e1c177a 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -260,7 +260,7 @@ CacheUnit::findRequest(DynInstPtr inst) if (cache_req && cache_req->getInst() == inst && - cache_req->instIdx == inst->resSched.top()->idx) { + cache_req->instIdx == inst->curSkedEntry->idx) { return cache_req; } map_it++; @@ -296,7 +296,7 @@ ResReqPtr CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - ScheduleEntry* sched_entry = inst->resSched.top(); + ScheduleEntry* sched_entry = *inst->curSkedEntry; if (!inst->validMemAddr()) { panic("Mem. Addr. must be set before requesting cache access\n"); @@ -346,7 +346,7 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, return new CacheRequest(this, inst, stage_num, id, slot_num, sched_entry->cmd, 0, pkt_cmd, 0/*flags*/, this->cpu->readCpuId(), - inst->resSched.top()->idx); + inst->curSkedEntry->idx); } void @@ -357,17 +357,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) // Check to see if this instruction is requesting the same command // or a different one - if (cache_req->cmd != inst->resSched.top()->cmd && - cache_req->instIdx == inst->resSched.top()->idx) { + if (cache_req->cmd != inst->curSkedEntry->cmd && + cache_req->instIdx == inst->curSkedEntry->idx) { // If different, then update command in the request - cache_req->cmd = inst->resSched.top()->cmd; + cache_req->cmd = inst->curSkedEntry->cmd; DPRINTF(InOrderCachePort, "[tid:%i]: [sn:%i]: Updating the command for this " "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; - } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead && - inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) { + } else if (inst->curSkedEntry->idx != CacheUnit::InitSecondSplitRead && + inst->curSkedEntry->idx != CacheUnit::InitSecondSplitWrite) { // If same command, just check to see if memory access was completed // but dont try to re-execute DPRINTF(InOrderCachePort, @@ -487,6 +487,8 @@ CacheUnit::read(DynInstPtr inst, Addr addr, inst->splitMemData = new uint8_t[size]; if (!inst->splitInstSked) { + assert(0 && "Split Requests Not Supported for Now..."); + // Schedule Split Read/Complete for Instruction // ============================== int stage_num = cache_req->getStageNum(); @@ -590,6 +592,8 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, inst->splitInst = true; if (!inst->splitInstSked) { + assert(0 && "Split Requests Not Supported for Now..."); + // Schedule Split Read/Complete for Instruction // ============================== int stage_num = cache_req->getStageNum(); diff --git a/src/cpu/inorder/resources/decode_unit.cc b/src/cpu/inorder/resources/decode_unit.cc index c2f7ae22d..42857c783 100644 --- a/src/cpu/inorder/resources/decode_unit.cc +++ b/src/cpu/inorder/resources/decode_unit.cc @@ -57,13 +57,16 @@ DecodeUnit::execute(int slot_num) { case DecodeInst: { - bool done_sked = ThePipeline::createBackEndSchedule(inst); + inst->setBackSked(cpu->createBackEndSked(inst)); - if (done_sked) { + if (inst->backSked != NULL) { DPRINTF(InOrderDecode, "[tid:%i]: Setting Destination Register(s) for [sn:%i].\n", tid, inst->seqNum); regDepMap[tid]->insert(inst); + + //inst->printSked(); + decode_req->done(); } else { DPRINTF(Resource, diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc index 0e9866708..0a5483aff 100644 --- a/src/cpu/inorder/resources/fetch_unit.cc +++ b/src/cpu/inorder/resources/fetch_unit.cc @@ -118,7 +118,7 @@ ResReqPtr FetchUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - ScheduleEntry* sched_entry = inst->resSched.top(); + ScheduleEntry* sched_entry = *inst->curSkedEntry; if (!inst->validMemAddr()) { panic("Mem. Addr. must be set before requesting cache access\n"); @@ -144,7 +144,7 @@ FetchUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, return new CacheRequest(this, inst, stage_num, id, slot_num, sched_entry->cmd, 0, pkt_cmd, 0/*flags*/, this->cpu->readCpuId(), - inst->resSched.top()->idx); + inst->curSkedEntry->idx); } void @@ -447,7 +447,7 @@ FetchUnit::processCacheCompletion(PacketPtr pkt) short asid = cpu->asid[tid]; assert(!cache_req->isSquashed()); - assert(inst->resSched.top()->cmd == CompleteFetch); + assert(inst->curSkedEntry->cmd == CompleteFetch); DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i]: Processing fetch access for block %#x\n", diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 8ccdaa36a..362641b54 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -52,14 +52,15 @@ GraduationUnit::execute(int slot_num) ResourceRequest* grad_req = reqMap[slot_num]; DynInstPtr inst = reqMap[slot_num]->inst; ThreadID tid = inst->readTid(); - int stage_num = inst->resSched.top()->stageNum; + int stage_num = inst->curSkedEntry->stageNum; switch (grad_req->cmd) { case GraduateInst: { // Make sure this is the last thing on the resource schedule - assert(inst->resSched.size() == 1); + // @todo: replace this check + // assert(inst->resSched.size() == 1); // Handle Any Faults Before Graduating Instruction if (inst->fault != NoFault) { diff --git a/src/cpu/inorder/resources/mult_div_unit.cc b/src/cpu/inorder/resources/mult_div_unit.cc index 5aa0b0aa1..042fb590b 100644 --- a/src/cpu/inorder/resources/mult_div_unit.cc +++ b/src/cpu/inorder/resources/mult_div_unit.cc @@ -110,9 +110,9 @@ MultDivUnit::requestAgain(DynInstPtr inst, bool &service_request) // Check to see if this instruction is requesting the same command // or a different one - if (mult_div_req->cmd != inst->resSched.top()->cmd) { + if (mult_div_req->cmd != inst->curSkedEntry->cmd) { // If different, then update command in the request - mult_div_req->cmd = inst->resSched.top()->cmd; + mult_div_req->cmd = inst->curSkedEntry->cmd; DPRINTF(InOrderMDU, "[tid:%i]: [sn:%i]: Updating the command for this " "instruction\n", inst->readTid(), inst->seqNum); @@ -132,7 +132,7 @@ MultDivUnit::getSlot(DynInstPtr inst) // If we have this instruction's request already then return if (slot_num != -1 && - inst->resSched.top()->cmd == reqMap[slot_num]->cmd) + inst->curSkedEntry->cmd == reqMap[slot_num]->cmd) return slot_num; unsigned repeat_rate = 0; diff --git a/src/cpu/inorder/resources/tlb_unit.cc b/src/cpu/inorder/resources/tlb_unit.cc index 59840d15b..2e19ea928 100644 --- a/src/cpu/inorder/resources/tlb_unit.cc +++ b/src/cpu/inorder/resources/tlb_unit.cc @@ -217,7 +217,7 @@ TLBUnitEvent::process() // Effectively NOP the instruction but still allow it // to commit //while (!inst->resSched.empty() && - // inst->resSched.top()->stageNum != ThePipeline::NumStages - 1) { + // inst->curSkedEntry->stageNum != ThePipeline::NumStages - 1) { //inst->resSched.pop(); //} } diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 743011573..538b20246 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -93,7 +93,7 @@ UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { return new UseDefRequest(this, inst, stage_num, id, slot_num, cmd, - inst->resSched.top()->idx); + inst->curSkedEntry->idx); } @@ -110,8 +110,8 @@ UseDefUnit::findRequest(DynInstPtr inst) if (ud_req && ud_req->getInst() == inst && - ud_req->cmd == inst->resSched.top()->cmd && - ud_req->useDefIdx == inst->resSched.top()->idx) { + ud_req->cmd == inst->curSkedEntry->cmd && + ud_req->useDefIdx == inst->curSkedEntry->idx) { return ud_req; } map_it++; -- cgit v1.2.3 From 470aa289da4ca2d6564db76b30355a527c65347d Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 12 Feb 2011 10:14:48 -0500 Subject: inorder: clean up the old way of inst. scheduling remove remnants of old way of instruction scheduling which dynamically allocated a new resource schedule for every instruction --- src/cpu/inorder/SConscript | 1 - src/cpu/inorder/cpu.cc | 12 +-- src/cpu/inorder/inorder_dyn_inst.cc | 26 ----- src/cpu/inorder/inorder_dyn_inst.hh | 19 ---- src/cpu/inorder/pipeline_traits.cc | 171 ------------------------------- src/cpu/inorder/pipeline_traits.hh | 17 --- src/cpu/inorder/resources/cache_unit.cc | 42 +++++--- src/cpu/inorder/resources/inst_buffer.cc | 13 ++- 8 files changed, 35 insertions(+), 266 deletions(-) delete mode 100644 src/cpu/inorder/pipeline_traits.cc (limited to 'src') diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript index 7427df226..b9c526763 100644 --- a/src/cpu/inorder/SConscript +++ b/src/cpu/inorder/SConscript @@ -63,7 +63,6 @@ if 'InOrderCPU' in env['CPU_MODELS']: 'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource', 'ThreadModel', 'AddrDep']) - Source('pipeline_traits.cc') Source('inorder_dyn_inst.cc') Source('inorder_cpu_builder.cc') Source('inorder_trace.cc') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 1d678b9c5..3a705258d 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -1415,14 +1415,6 @@ InOrderCPU::cleanUpRemovedInsts() DynInstPtr inst = *removeList.front(); ThreadID tid = inst->threadNumber; - // Make Sure Resource Schedule Is Emptied Out - ThePipeline::ResSchedule *inst_sched = &inst->resSched; - while (!inst_sched->empty()) { - ScheduleEntry* sch_entry = inst_sched->top(); - inst_sched->pop(); - delete sch_entry; - } - // Remove From Register Dependency Map, If Necessary archRegDepMap[(*removeList.front())->threadNumber]. remove((*removeList.front())); @@ -1430,8 +1422,8 @@ InOrderCPU::cleanUpRemovedInsts() // Clear if Non-Speculative if (inst->staticInst && - inst->seqNum == nonSpecSeqNum[tid] && - nonSpecInstActive[tid] == true) { + inst->seqNum == nonSpecSeqNum[tid] && + nonSpecInstActive[tid] == true) { nonSpecInstActive[tid] = false; } diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index 6f3e822fe..e9deb7625 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -125,7 +125,6 @@ InOrderDynInst::initVars() readyRegs = 0; nextStage = 0; - nextInstStageNum = 0; for(int i = 0; i < MaxInstDestRegs; i++) instResult[i].val.integer = 0; @@ -208,8 +207,6 @@ InOrderDynInst::~InOrderDynInst() --instcount; - deleteStages(); - DPRINTF(InOrderDynInst, "DynInst: [tid:%i] [sn:%lli] Instruction destroyed" " (active insts: %i)\n", threadNumber, seqNum, instcount); } @@ -284,29 +281,6 @@ InOrderDynInst::completeAcc(Packet *pkt) return this->fault; } -InstStage *InOrderDynInst::addStage() -{ - this->currentInstStage = new InstStage(this, nextInstStageNum++); - instStageList.push_back( this->currentInstStage ); - return this->currentInstStage; -} - -InstStage *InOrderDynInst::addStage(int stage_num) -{ - nextInstStageNum = stage_num; - return InOrderDynInst::addStage(); -} - -void InOrderDynInst::deleteStages() { - std::list::iterator list_it = instStageList.begin(); - std::list::iterator list_end = instStageList.end(); - - while(list_it != list_end) { - delete *list_it; - list_it++; - } -} - Fault InOrderDynInst::memAccess() { diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 6acd0f7f4..0e6be3da2 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -210,9 +210,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted /** Data used for a store for operation. */ uint64_t storeData; - /** The resource schedule for this inst */ - ThePipeline::ResSchedule resSched; - /** List of active resource requests for this instruction */ std::list reqList; @@ -304,11 +301,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted int nextStage; - /* vars to keep track of InstStage's - used for resource sched defn */ - int nextInstStageNum; - ThePipeline::InstStage *currentInstStage; - std::list instStageList; - private: /** Function to initialize variables in the constructors. */ void initVars(); @@ -445,20 +437,9 @@ class InOrderDynInst : public FastAlloc, public RefCounted backSked_end = backSked->end(); } - void setNextStage(int stage_num) { nextStage = stage_num; } int getNextStage() { return nextStage; } - ThePipeline::InstStage *addStage(); - ThePipeline::InstStage *addStage(int stage); - ThePipeline::InstStage *currentStage() { return currentInstStage; } - void deleteStages(); - - /** Add A Entry To Reource Schedule */ - void addToSched(ScheduleEntry* sched_entry) - { resSched.push(sched_entry); } - - /** Print Resource Schedule */ void printSked() { diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc deleted file mode 100644 index a6fad68f7..000000000 --- a/src/cpu/inorder/pipeline_traits.cc +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2007 MIPS Technologies, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Korey Sewell - * - */ - -#include "cpu/inorder/pipeline_traits.hh" -#include "cpu/inorder/inorder_dyn_inst.hh" -#include "cpu/inorder/resources/resource_list.hh" - -using namespace std; - -namespace ThePipeline { - -//@TODO: create my own Instruction Schedule Class -//that operates as a Priority QUEUE -int getNextPriority(DynInstPtr &inst, int stage_num) -{ - int cur_pri = 20; - - /* - std::priority_queue, - entryCompare>::iterator sked_it = inst->resSched.begin(); - - std::priority_queue, - entryCompare>::iterator sked_end = inst->resSched.end(); - - while (sked_it != sked_end) { - - if (sked_it.top()->stageNum == stage_num) { - cur_pri = sked_it.top()->priority; - } - - sked_it++; - } - */ - - return cur_pri; -} - -void createFrontEndSchedule(DynInstPtr &inst) -{ - InstStage *F = inst->addStage(); - InstStage *D = inst->addStage(); - - // FETCH - F->needs(FetchSeq, FetchSeqUnit::AssignNextPC); - F->needs(ICache, FetchUnit::InitiateFetch); - - // DECODE - D->needs(ICache, FetchUnit::CompleteFetch); - D->needs(Decode, DecodeUnit::DecodeInst); - D->needs(BPred, BranchPredictor::PredictBranch); - D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC); - - inst->resSched.init(); -} - -bool createBackEndSchedule(DynInstPtr &inst) -{ - if (!inst->staticInst) { - return false; - } - - InstStage *X = inst->addStage(); - InstStage *M = inst->addStage(); - InstStage *W = inst->addStage(); - - // EXECUTE - for (int idx=0; idx < inst->numSrcRegs(); idx++) { - if (!idx || !inst->isStore()) { - X->needs(RegManager, UseDefUnit::ReadSrcReg, idx); - } - } - - if ( inst->isNonSpeculative() ) { - // skip execution of non speculative insts until later - } else if ( inst->isMemRef() ) { - if ( inst->isLoad() ) { - X->needs(AGEN, AGENUnit::GenerateAddr); - } - } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - X->needs(MDU, MultDivUnit::StartMultDiv); - } else { - X->needs(ExecUnit, ExecutionUnit::ExecuteInst); - } - - if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) { - X->needs(MDU, MultDivUnit::EndMultDiv); - } - - // MEMORY - if ( inst->isLoad() ) { - M->needs(DCache, CacheUnit::InitiateReadData); - } else if ( inst->isStore() ) { - if ( inst->numSrcRegs() >= 2 ) { - M->needs(RegManager, UseDefUnit::ReadSrcReg, 1); - } - M->needs(AGEN, AGENUnit::GenerateAddr); - M->needs(DCache, CacheUnit::InitiateWriteData); - } - - - // WRITEBACK - if ( inst->isLoad() ) { - W->needs(DCache, CacheUnit::CompleteReadData); - } else if ( inst->isStore() ) { - W->needs(DCache, CacheUnit::CompleteWriteData); - } - - if ( inst->isNonSpeculative() ) { - if ( inst->isMemRef() ) fatal("Non-Speculative Memory Instruction"); - W->needs(ExecUnit, ExecutionUnit::ExecuteInst); - } - - for (int idx=0; idx < inst->numDestRegs(); idx++) { - W->needs(RegManager, UseDefUnit::WriteDestReg, idx); - } - - W->needs(Grad, GraduationUnit::GraduateInst); - - return true; -} - -InstStage::InstStage(DynInstPtr inst, int stage_num) -{ - stageNum = stage_num; - nextTaskPriority = 0; - instSched = &inst->resSched; -} - -void -InstStage::needs(int unit, int request) { - instSched->push( new ScheduleEntry( - stageNum, nextTaskPriority++, unit, request - )); -} - -void -InstStage::needs(int unit, int request, int param) { - instSched->push( new ScheduleEntry( - stageNum, nextTaskPriority++, unit, request, param - )); -} - -}; diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index 2c4e44339..75f01adb1 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -78,23 +78,6 @@ namespace ThePipeline { ////////////////////////// typedef ResourceSked ResSchedule; typedef ResourceSked* RSkedPtr; - - void createFrontEndSchedule(DynInstPtr &inst); - bool createBackEndSchedule(DynInstPtr &inst); - int getNextPriority(DynInstPtr &inst, int stage_num); - - class InstStage { - private: - int nextTaskPriority; - int stageNum; - ResSchedule *instSched; - - public: - InstStage(DynInstPtr inst, int stage_num); - - void needs(int unit, int request); - void needs(int unit, int request, int param); - }; }; diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index e2e1c177a..8cd105493 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -492,11 +492,15 @@ CacheUnit::read(DynInstPtr inst, Addr addr, // Schedule Split Read/Complete for Instruction // ============================== int stage_num = cache_req->getStageNum(); - - int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + RSkedPtr inst_sked = (stage_num >= ThePipeline::BackEndStartStage) ? + inst->backSked : inst->frontSked; + + // this is just an arbitrarily high priority to ensure that this + // gets pushed to the back of the list + int stage_pri = 20; int isplit_cmd = CacheUnit::InitSecondSplitRead; - inst->resSched.push(new + inst_sked->push(new ScheduleEntry(stage_num, stage_pri, cpu->resPool->getResIdx(DCache), @@ -504,7 +508,7 @@ CacheUnit::read(DynInstPtr inst, Addr addr, 1)); int csplit_cmd = CacheUnit::CompleteSecondSplitRead; - inst->resSched.push(new + inst_sked->push(new ScheduleEntry(stage_num + 1, 1/*stage_pri*/, cpu->resPool->getResIdx(DCache), @@ -597,24 +601,28 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, // Schedule Split Read/Complete for Instruction // ============================== int stage_num = cache_req->getStageNum(); + RSkedPtr inst_sked = (stage_num >= ThePipeline::BackEndStartStage) ? + inst->backSked : inst->frontSked; - int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + // this is just an arbitrarily high priority to ensure that this + // gets pushed to the back of the list + int stage_pri = 20; int isplit_cmd = CacheUnit::InitSecondSplitWrite; - inst->resSched.push(new - ScheduleEntry(stage_num, - stage_pri, - cpu->resPool->getResIdx(DCache), - isplit_cmd, - 1)); + inst_sked->push(new + ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + isplit_cmd, + 1)); int csplit_cmd = CacheUnit::CompleteSecondSplitWrite; - inst->resSched.push(new - ScheduleEntry(stage_num + 1, - 1/*stage_pri*/, - cpu->resPool->getResIdx(DCache), - csplit_cmd, - 1)); + inst_sked->push(new + ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + csplit_cmd, + 1)); inst->splitInstSked = true; } else { DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read " diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc index 18dd26a78..988fcd4da 100644 --- a/src/cpu/inorder/resources/inst_buffer.cc +++ b/src/cpu/inorder/resources/inst_buffer.cc @@ -99,19 +99,22 @@ InstBuffer::execute(int slot_idx) inst->seqNum, next_stage); // Add to schedule: Insert into buffer in next stage - int stage_pri = ThePipeline::getNextPriority(inst, - next_stage); + int stage_pri = 20; + RSkedPtr insert_sked = (stage_num >= ThePipeline::BackEndStartStage) ? + inst->backSked : inst->frontSked; - inst->resSched.push(new ScheduleEntry(next_stage, + insert_sked->push(new ScheduleEntry(next_stage, stage_pri, id, InstBuffer::InsertInst)); // Add to schedule: Remove from buffer in next next (bypass) // stage - stage_pri = ThePipeline::getNextPriority(inst, bypass_stage); + stage_pri = 20; + RSkedPtr bypass_sked = (stage_num >= ThePipeline::BackEndStartStage) ? + inst->backSked : inst->frontSked; - inst->resSched.push(new ScheduleEntry(bypass_stage, + bypass_sked->push(new ScheduleEntry(bypass_stage, stage_pri, id, InstBuffer::RemoveInst)); -- cgit v1.2.3 From 0cede15d6c5213d83e4cd143014321b2ee7ec5eb Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Sat, 12 Feb 2011 11:41:20 -0600 Subject: Ruby: Reorder Cache Lookup in Protocol Files The patch changes the order in which L1 dcache and icache are looked up when a request comes in. Earlier, if a request came in for instruction fetch, the dcache was looked up before the icache, to correctly handle self-modifying code. But, in the common case, dcache is going to report a miss and the subsequent icache lookup is going to report a hit. Given the invariant - caches under the same controller keep track of disjoint sets of cache blocks, we can move the icache lookup before the dcache lookup. In case of a hit in the icache, using our invariant, we know that the dcache would have reported a miss. In case of a miss in the icache, we know that icache would have missed even if the dcache was looked up before looking up the icache. Effectively, we are doing the same thing as before, though in the common case, we expect reduction in the number of lookups. This was empirically confirmed for MOESI hammer. The ratio lookups to access requests is now about 1.1 to 1. --- src/mem/protocol/MESI_CMP_directory-L1cache.sm | 35 ++++++++------- src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 32 +++++++------- src/mem/protocol/MOESI_CMP_token-L1cache.sm | 35 +++++++-------- src/mem/protocol/MOESI_hammer-cache.sm | 57 +++++++++++++------------ 4 files changed, 82 insertions(+), 77 deletions(-) (limited to 'src') diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm index 8744a7122..4442cee41 100644 --- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm @@ -287,20 +287,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 asks the L2 for it. trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Icache_entry, L1_TBEs[in_msg.LineAddress]); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); + } + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, @@ -313,21 +314,23 @@ machine(L1Cache, "MSI Directory L1 Cache CMP") } } } else { - // *** DATA ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Icache_entry, L1_TBEs[in_msg.LineAddress]); - } + // *** DATA ACCESS *** Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 ask the L2 for it trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Dcache_entry, L1_TBEs[in_msg.LineAddress]); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Icache_entry, L1_TBEs[in_msg.LineAddress]); + } + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index 4082f23c9..7f0ab62a8 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -338,14 +338,6 @@ machine(L1Cache, "Directory protocol") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - // Check to see if it is in the OTHER L1 - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry, - TBEs[in_msg.LineAddress]); - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 asks the L2 for it. @@ -353,6 +345,14 @@ machine(L1Cache, "Directory protocol") in_msg.LineAddress, L1Icache_entry, TBEs[in_msg.LineAddress]); } else { + + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + // Check to see if it is in the OTHER L1 + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry, + TBEs[in_msg.LineAddress]); + } if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), @@ -369,14 +369,6 @@ machine(L1Cache, "Directory protocol") } else { // *** DATA ACCESS *** - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - // Check to see if it is in the OTHER L1 - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, put the request on the queue to the shared L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Icache_entry, TBEs[in_msg.LineAddress]); - } - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 ask the L2 for it @@ -384,6 +376,14 @@ machine(L1Cache, "Directory protocol") in_msg.LineAddress, L1Dcache_entry, TBEs[in_msg.LineAddress]); } else { + + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + // Check to see if it is in the OTHER L1 + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, put the request on the queue to the shared L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Icache_entry, TBEs[in_msg.LineAddress]); + } if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it trigger(mandatory_request_type_to_event(in_msg.Type), diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm index 00e9404c9..226f21374 100644 --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -647,20 +647,21 @@ machine(L1Cache, "Token protocol") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Dcache_entry, tbe); - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Icache_entry, tbe); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Dcache_entry, tbe); + } + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 trigger(mandatory_request_type_to_event(in_msg.Type), @@ -676,21 +677,21 @@ machine(L1Cache, "Token protocol") } else { // *** DATA ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - trigger(Event:L1_Replacement, in_msg.LineAddress, - L1Icache_entry, tbe); - } - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Dcache_entry, tbe); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + trigger(Event:L1_Replacement, in_msg.LineAddress, + L1Icache_entry, tbe); + } + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 trigger(mandatory_request_type_to_event(in_msg.Type), diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm index 78bc9e3e7..ab2a6acf4 100644 --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -377,26 +377,26 @@ machine(L1Cache, "AMD Hammer-like protocol") if (in_msg.Type == CacheRequestType:IFETCH) { // ** INSTRUCTION ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); - if (is_valid(L1Dcache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { - trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe); - } else { - trigger(Event:L2_Replacement, - L2cacheMemory.cacheProbe(in_msg.LineAddress), - getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), - TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); - } - } - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); if (is_valid(L1Icache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Icache_entry, tbe); } else { + // Check to see if it is in the OTHER L1 + Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); + if (is_valid(L1Dcache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe); + } else { + trigger(Event:L2_Replacement, + L2cacheMemory.cacheProbe(in_msg.LineAddress), + getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), + TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); + } + } + if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 @@ -430,26 +430,27 @@ machine(L1Cache, "AMD Hammer-like protocol") } else { // *** DATA ACCESS *** - // Check to see if it is in the OTHER L1 - Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); - if (is_valid(L1Icache_entry)) { - // The block is in the wrong L1, try to write it to the L2 - if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { - trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe); - } else { - trigger(Event:L2_Replacement, - L2cacheMemory.cacheProbe(in_msg.LineAddress), - getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), - TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); - } - } - Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress); if (is_valid(L1Dcache_entry)) { // The tag matches for the L1, so the L1 fetches the line. We know it can't be in the L2 due to exclusion trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress, L1Dcache_entry, tbe); } else { + + // Check to see if it is in the OTHER L1 + Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress); + if (is_valid(L1Icache_entry)) { + // The block is in the wrong L1, try to write it to the L2 + if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) { + trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe); + } else { + trigger(Event:L2_Replacement, + L2cacheMemory.cacheProbe(in_msg.LineAddress), + getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)), + TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]); + } + } + if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) { // L1 does't have the line, but we have space for it in the L1 Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress); -- cgit v1.2.3 From 7c763b34c9fc69a4e13a8f74df0f981fdf71f221 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sun, 13 Feb 2011 16:51:15 -0500 Subject: O3: Fix GCC 4.2.4 complaint --- src/cpu/o3/inst_queue_impl.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index d6da4b818..aa21a0edc 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -749,7 +749,7 @@ InstructionQueue::scheduleReadyInsts() DynInstPtr deferred_mem_inst; int total_deferred_mem_issued = 0; while (total_deferred_mem_issued < totalWidth && - (deferred_mem_inst = getDeferredMemInstToExecute()) != NULL) { + (deferred_mem_inst = getDeferredMemInstToExecute()) != 0) { issueToExecuteQueue->access(0)->size++; instsToExecute.push_back(deferred_mem_inst); total_deferred_mem_issued++; -- cgit v1.2.3 From f036fd97481081afce7f757231ab69ba212f7f2a Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:40:07 -0800 Subject: O3: Fetch from the microcode ROM when needed. --- src/cpu/o3/fetch_impl.hh | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 2e4e4819e..d2cde496e 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -1070,6 +1070,8 @@ DefaultFetch::fetch(bool &status_change) Addr pcOffset = fetchOffset[tid]; Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + bool inRom = isRomMicroPC(thisPC.microPC()); + // If returning from the delay of a cache miss, then update the status // to running, otherwise do the cache access. Possibly move this up // to tick() function. @@ -1083,7 +1085,7 @@ DefaultFetch::fetch(bool &status_change) Addr block_PC = icacheBlockAlignPC(fetchAddr); // Unless buffer already got the block, fetch it from icache. - if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid]) { + if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) && !inRom) { DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " "instruction, starting at PC %s.\n", tid, thisPC); @@ -1155,7 +1157,7 @@ DefaultFetch::fetch(bool &status_change) !predictedBranch) { // If we need to process more memory, do it now. - if (!curMacroop && !predecoder.extMachInstReady()) { + if (!(curMacroop || inRom) && !predecoder.extMachInstReady()) { if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { // Walk past any annulled delay slot instructions. Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; @@ -1181,7 +1183,7 @@ DefaultFetch::fetch(bool &status_change) // Extract as many instructions and/or microops as we can from // the memory we've processed so far. do { - if (!curMacroop) { + if (!(curMacroop || inRom)) { if (predecoder.extMachInstReady()) { ExtMachInst extMachInst; @@ -1202,8 +1204,13 @@ DefaultFetch::fetch(bool &status_change) break; } } - if (curMacroop) { - staticInst = curMacroop->fetchMicroop(thisPC.microPC()); + if (curMacroop || inRom) { + if (inRom) { + staticInst = cpu->microcodeRom.fetchMicroop( + thisPC.microPC(), curMacroop); + } else { + staticInst = curMacroop->fetchMicroop(thisPC.microPC()); + } if (staticInst->isLastMicroop()) { curMacroop = NULL; pcOffset = 0; -- cgit v1.2.3 From 5ee94f4a3dadda357c6d28b60c19b3638146f9a7 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:41:10 -0800 Subject: X86: Only reset npc to reflect instruction length once. When redirecting fetch to handle branches, the npc of the current pc state needs to be left alone. This change makes the pc state record whether or not the npc already reflects a real value by making it keep track of the current instruction size, or if no size has been set. --- src/arch/x86/predecoder.hh | 6 +++++- src/arch/x86/types.hh | 50 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh index c06ec18bc..5c67e28e1 100644 --- a/src/arch/x86/predecoder.hh +++ b/src/arch/x86/predecoder.hh @@ -225,7 +225,11 @@ namespace X86ISA { assert(emiIsReady); emiIsReady = false; - nextPC.npc(nextPC.pc() + getInstSize()); + if (!nextPC.size()) { + Addr size = getInstSize(); + nextPC.size(size); + nextPC.npc(nextPC.pc() + size); + } return emi; } }; diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index 5a208446a..d78af1b81 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -222,7 +222,55 @@ namespace X86ISA return true; } - typedef GenericISA::UPCState PCState; + class PCState : public GenericISA::UPCState + { + protected: + typedef GenericISA::UPCState Base; + + uint8_t _size; + + public: + void + set(Addr val) + { + Base::set(val); + _size = 0; + } + + PCState() {} + PCState(Addr val) { set(val); } + + uint8_t size() const { return _size; } + void size(uint8_t newSize) { _size = newSize; } + + void + advance() + { + Base::advance(); + _size = 0; + } + + void + uEnd() + { + Base::uEnd(); + _size = 0; + } + + void + serialize(std::ostream &os) + { + Base::serialize(os); + SERIALIZE_SCALAR(_size); + } + + void + unserialize(Checkpoint *cp, const std::string §ion) + { + Base::unserialize(cp, section); + UNSERIALIZE_SCALAR(_size); + } + }; struct CoreSpecific { int core_type; -- cgit v1.2.3 From 1aa9698fa00e8ffce9b8d3c90b3bd76c3c9e950e Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:42:05 -0800 Subject: X86: Define fault objects to carry debug messages. These faults can panic/warn/warn_once, etc., instead of instructions doing that themselves directly. That way, instructions can be speculatively executed, and only if they're actually going to commit will their fault be invoked and the panic, etc., happen. --- src/arch/generic/debugfaults.hh | 111 ++++++++++++++++++++++++++++++++++++ src/arch/x86/isa/includes.isa | 1 + src/arch/x86/isa/microops/debug.isa | 67 ++++++++++------------ 3 files changed, 142 insertions(+), 37 deletions(-) create mode 100644 src/arch/generic/debugfaults.hh (limited to 'src') diff --git a/src/arch/generic/debugfaults.hh b/src/arch/generic/debugfaults.hh new file mode 100644 index 000000000..acffadc34 --- /dev/null +++ b/src/arch/generic/debugfaults.hh @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2010 Advanced Micro Devices + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_GENERIC_DEBUGFAULTS_HH__ +#define __ARCH_GENERIC_DEBUGFAULTS_HH__ + +#include "base/misc.hh" +#include "sim/faults.hh" + +#include + +namespace GenericISA +{ +class M5DebugFault : public FaultBase +{ + public: + enum DebugFunc + { + PanicFunc, + FatalFunc, + WarnFunc, + WarnOnceFunc + }; + + protected: + std::string message; + DebugFunc func; + + public: + M5DebugFault(DebugFunc _func, std::string _message) : + message(_message), func(_func) + {} + + FaultName + name() const + { + switch (func) { + case PanicFunc: + return "panic fault"; + case FatalFunc: + return "fatal fault"; + case WarnFunc: + return "warn fault"; + case WarnOnceFunc: + return "warn_once fault"; + default: + panic("unrecognized debug function number\n"); + } + } + + void + invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr) + { + switch (func) { + case PanicFunc: + panic(message); + break; + case FatalFunc: + fatal(message); + break; + case WarnFunc: + warn(message); + break; + case WarnOnceFunc: + warn_once(message); + break; + default: + panic("unrecognized debug function number\n"); + } + } +}; +} // namespace GenericISA + +#endif // __ARCH_GENERIC_DEBUGFAULTS_HH__ diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa index 58b1fbc62..32708043e 100644 --- a/src/arch/x86/isa/includes.isa +++ b/src/arch/x86/isa/includes.isa @@ -53,6 +53,7 @@ output header {{ #include #include +#include "arch/generic/debugfaults.hh" #include "arch/x86/emulenv.hh" #include "arch/x86/insts/macroop.hh" #include "arch/x86/insts/microfpop.hh" diff --git a/src/arch/x86/isa/microops/debug.isa b/src/arch/x86/isa/microops/debug.isa index 4b2ecdd5a..c2735565d 100644 --- a/src/arch/x86/isa/microops/debug.isa +++ b/src/arch/x86/isa/microops/debug.isa @@ -45,16 +45,29 @@ output header {{ class MicroDebugBase : public X86ISA::X86MicroopBase { protected: + typedef GenericISA::M5DebugFault::DebugFunc DebugFunc; + DebugFunc func; std::string message; uint8_t cc; public: - MicroDebugBase(ExtMachInst _machInst, const char * mnem, + MicroDebugBase(ExtMachInst machInst, const char * mnem, const char * instMnem, uint64_t setFlags, - std::string _message, uint8_t _cc); + DebugFunc _func, std::string _message, uint8_t _cc) : + X86MicroopBase(machInst, mnem, instMnem, setFlags, No_OpClass), + func(_func), message(_message), cc(_cc) + {} - std::string generateDisassembly(Addr pc, - const SymbolTable *symtab) const; + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, instMnem, mnemonic); + response << "\"" << message << "\""; + + return response.str(); + } }; }}; @@ -70,53 +83,31 @@ def template MicroDebugDeclare {{ }}; def template MicroDebugExecute {{ - Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Fault + %(class_name)s::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { %(op_decl)s %(op_rd)s if (%(cond_test)s) { - %(func)s("%s\n", message); + return new GenericISA::M5DebugFault(func, message); + } else { + return NoFault; } - return NoFault; } }}; -output decoder {{ - inline MicroDebugBase::MicroDebugBase( - ExtMachInst machInst, const char * mnem, const char * instMnem, - uint64_t setFlags, std::string _message, uint8_t _cc) : - X86MicroopBase(machInst, mnem, instMnem, - setFlags, No_OpClass), - message(_message), cc(_cc) - { - } -}}; - def template MicroDebugConstructor {{ inline %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, uint64_t setFlags, std::string _message, uint8_t _cc) : %(base_class)s(machInst, "%(func)s", instMnem, - setFlags, _message, _cc) + setFlags, %(func_num)s, _message, _cc) { %(constructor)s; } }}; -output decoder {{ - std::string MicroDebugBase::generateDisassembly(Addr pc, - const SymbolTable *symtab) const - { - std::stringstream response; - - printMnemonic(response, instMnem, mnemonic); - response << "\"" << message << "\""; - - return response.str(); - } -}}; - let {{ class MicroDebug(X86Microop): def __init__(self, message, flags=None): @@ -142,13 +133,14 @@ let {{ header_output = "" decoder_output = "" - def buildDebugMicro(func): + def buildDebugMicro(func, func_num): global exec_output, header_output, decoder_output iop = InstObjParams(func, "Micro%sFlags" % func.capitalize(), "MicroDebugBase", {"code": "", "func": func, + "func_num": "GenericISA::M5DebugFault::%s" % func_num, "cond_test": "checkCondition(ccFlagBits, cc)"}) exec_output += MicroDebugExecute.subst(iop) header_output += MicroDebugDeclare.subst(iop) @@ -158,6 +150,7 @@ let {{ "MicroDebugBase", {"code": "", "func": func, + "func_num": "GenericISA::M5DebugFault::%s" % func_num, "cond_test": "true"}) exec_output += MicroDebugExecute.subst(iop) header_output += MicroDebugDeclare.subst(iop) @@ -169,8 +162,8 @@ let {{ global microopClasses microopClasses[func] = MicroDebugChild - buildDebugMicro("panic") - buildDebugMicro("fatal") - buildDebugMicro("warn") - buildDebugMicro("warn_once") + buildDebugMicro("panic", "PanicFunc") + buildDebugMicro("fatal", "FatalFunc") + buildDebugMicro("warn", "WarnFunc") + buildDebugMicro("warn_once", "WarnOnceFunc") }}; -- cgit v1.2.3 From 399e095510ff6bc469c45b1e5afa96567d757004 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:42:56 -0800 Subject: X86: On a bad microopc, return a microop that returns a fault that panics. This way a bad micropc will have to get all the way to commit before killing the simulation. This accounts for misspeculated branches. --- src/arch/x86/SConscript | 1 + src/arch/x86/insts/badmicroop.cc | 55 ++++++++++++++++++++++++++++++++++++++++ src/arch/x86/insts/badmicroop.hh | 52 +++++++++++++++++++++++++++++++++++++ src/arch/x86/insts/macroop.hh | 7 +++-- src/arch/x86/microcode_rom.hh | 7 +++-- 5 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 src/arch/x86/insts/badmicroop.cc create mode 100644 src/arch/x86/insts/badmicroop.hh (limited to 'src') diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript index 27de9da11..9cb774647 100644 --- a/src/arch/x86/SConscript +++ b/src/arch/x86/SConscript @@ -46,6 +46,7 @@ if env['TARGET_ISA'] == 'x86': Source('cpuid.cc') Source('emulenv.cc') Source('faults.cc') + Source('insts/badmicroop.cc') Source('insts/microfpop.cc') Source('insts/microldstop.cc') Source('insts/micromediaop.cc') diff --git a/src/arch/x86/insts/badmicroop.cc b/src/arch/x86/insts/badmicroop.cc new file mode 100644 index 000000000..ef493f250 --- /dev/null +++ b/src/arch/x86/insts/badmicroop.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011 Advanced Micro Devices + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/insts/badmicroop.hh" +#include "arch/x86/isa_traits.hh" +#include "arch/x86/decoder.hh" + +namespace X86ISA +{ + +// This microop needs to be allocated on the heap even though it could +// theoretically be statically allocated. The reference counted pointer would +// try to delete the static memory when it was destructed. +const StaticInstPtr badMicroop = + new X86ISAInst::MicroPanic(NoopMachInst, "BAD", + StaticInst::IsMicroop | StaticInst::IsLastMicroop, + "Invalid microop!", 0); + +} // namespace X86ISA diff --git a/src/arch/x86/insts/badmicroop.hh b/src/arch/x86/insts/badmicroop.hh new file mode 100644 index 000000000..57fe242c4 --- /dev/null +++ b/src/arch/x86/insts/badmicroop.hh @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2011 Advanced Micro Devices + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_X86_INSTS_BADMICROOP_HH__ +#define __ARCH_X86_INSTS_BADMICROOP_HH__ + +class StaticInstPtr; + +namespace X86ISA +{ + +extern const StaticInstPtr badMicroop; + +} // namespace X86ISA + +#endif //__ARCH_X86_INSTS_BADMICROOP_HH__ diff --git a/src/arch/x86/insts/macroop.hh b/src/arch/x86/insts/macroop.hh index fcf051a37..4f4176b77 100644 --- a/src/arch/x86/insts/macroop.hh +++ b/src/arch/x86/insts/macroop.hh @@ -41,6 +41,7 @@ #define __ARCH_X86_INSTS_MACROOP_HH__ #include "arch/x86/emulenv.hh" +#include "arch/x86/insts/badmicroop.hh" #include "arch/x86/types.hh" #include "arch/x86/insts/static_inst.hh" @@ -76,8 +77,10 @@ class MacroopBase : public X86StaticInst StaticInstPtr fetchMicroop(MicroPC microPC) const { - assert(microPC < numMicroops); - return microops[microPC]; + if (microPC >= numMicroops) + return badMicroop; + else + return microops[microPC]; } std::string diff --git a/src/arch/x86/microcode_rom.hh b/src/arch/x86/microcode_rom.hh index f8ad410ce..84c503bb9 100644 --- a/src/arch/x86/microcode_rom.hh +++ b/src/arch/x86/microcode_rom.hh @@ -32,6 +32,7 @@ #define __ARCH_X86_MICROCODE_ROM_HH__ #include "arch/x86/emulenv.hh" +#include "arch/x86/insts/badmicroop.hh" #include "cpu/static_inst.hh" namespace X86ISAInst @@ -60,8 +61,10 @@ namespace X86ISAInst fetchMicroop(MicroPC microPC, StaticInstPtr curMacroop) { microPC = normalMicroPC(microPC); - assert(microPC < numMicroops); - return genFuncs[microPC](curMacroop); + if (microPC >= numMicroops) + return X86ISA::badMicroop; + else + return genFuncs[microPC](curMacroop); } }; } -- cgit v1.2.3 From 4e1adf85f77edf761466af3568576d3f9134a14c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:44:24 -0800 Subject: X86: Don't read in dest regs if all bits are replaced. In x86, 32 and 64 bit writes to registers in which registers appear to be 32 or 64 bits wide overwrite all bits of the destination register. This change removes false dependencies in these cases where the previous value of a register doesn't need to be read to write a new value. New versions of most microops are created that have a "Big" suffix which simply overwrite their destination, and the right version to use is selected during microop allocation based on the selected data size. This does not change the performance of the O3 CPU model significantly, I assume because there are other false dependencies from the condition code bits in the flags register. --- src/arch/x86/isa/microops/ldstop.isa | 80 +++++++++-- src/arch/x86/isa/microops/limmop.isa | 27 ++-- src/arch/x86/isa/microops/regop.isa | 269 ++++++++++++++++++++++++----------- 3 files changed, 267 insertions(+), 109 deletions(-) (limited to 'src') diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 216a74c6c..cd649d644 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -301,6 +301,46 @@ let {{ "dataSize" : self.dataSize, "addressSize" : self.addressSize, "memFlags" : self.memFlags} return allocator + + class BigLdStOp(X86Microop): + def __init__(self, data, segment, addr, disp, + dataSize, addressSize, baseFlags, atCPL0, prefetch): + self.data = data + [self.scale, self.index, self.base] = addr + self.disp = disp + self.segment = segment + self.dataSize = dataSize + self.addressSize = addressSize + self.memFlags = baseFlags + if atCPL0: + self.memFlags += " | (CPL0FlagBit << FlagShift)" + if prefetch: + self.memFlags += " | Request::PREFETCH" + self.memFlags += " | (machInst.legacy.addr ? " + \ + "(AddrSizeFlagBit << FlagShift) : 0)" + + def getAllocator(self, microFlags): + allocString = ''' + (%(dataSize)s >= 4) ? + (StaticInstPtr)(new %(class_name)sBig(machInst, + macrocodeBlock, %(flags)s, %(scale)s, %(index)s, + %(base)s, %(disp)s, %(segment)s, %(data)s, + %(dataSize)s, %(addressSize)s, %(memFlags)s)) : + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s, %(scale)s, %(index)s, + %(base)s, %(disp)s, %(segment)s, %(data)s, + %(dataSize)s, %(addressSize)s, %(memFlags)s)) + ''' + allocator = allocString % { + "class_name" : self.className, + "flags" : self.microFlagsText(microFlags), + "scale" : self.scale, "index" : self.index, + "base" : self.base, + "disp" : self.disp, + "segment" : self.segment, "data" : self.data, + "dataSize" : self.dataSize, "addressSize" : self.addressSize, + "memFlags" : self.memFlags} + return allocator }}; let {{ @@ -315,7 +355,8 @@ let {{ EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0); ''' - def defineMicroLoadOp(mnemonic, code, mem_flags="0"): + def defineMicroLoadOp(mnemonic, code, bigCode='', + mem_flags="0", big=True): global header_output global decoder_output global exec_output @@ -324,16 +365,22 @@ let {{ name = mnemonic.lower() # Build up the all register version of this micro op - iop = InstObjParams(name, Name, 'X86ISA::LdStOp', - {"code": code, - "ea_code": calculateEA}) - header_output += MicroLdStOpDeclare.subst(iop) - decoder_output += MicroLdStOpConstructor.subst(iop) - exec_output += MicroLoadExecute.subst(iop) - exec_output += MicroLoadInitiateAcc.subst(iop) - exec_output += MicroLoadCompleteAcc.subst(iop) - - class LoadOp(LdStOp): + iops = [InstObjParams(name, Name, 'X86ISA::LdStOp', + {"code": code, "ea_code": calculateEA})] + if big: + iops += [InstObjParams(name, Name + "Big", 'X86ISA::LdStOp', + {"code": bigCode, "ea_code": calculateEA})] + for iop in iops: + header_output += MicroLdStOpDeclare.subst(iop) + decoder_output += MicroLdStOpConstructor.subst(iop) + exec_output += MicroLoadExecute.subst(iop) + exec_output += MicroLoadInitiateAcc.subst(iop) + exec_output += MicroLoadCompleteAcc.subst(iop) + + base = LdStOp + if big: + base = BigLdStOp + class LoadOp(base): def __init__(self, data, segment, addr, disp = 0, dataSize="env.dataSize", addressSize="env.addressSize", @@ -346,12 +393,15 @@ let {{ microopClasses[name] = LoadOp - defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);') + defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);', + 'Data = Mem & mask(dataSize * 8);') defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);', - '(StoreCheck << FlagShift)') + 'Data = Mem & mask(dataSize * 8);', + '(StoreCheck << FlagShift)') defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);', - '(StoreCheck << FlagShift) | Request::LOCKED') - defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;') + 'Data = Mem & mask(dataSize * 8);', + '(StoreCheck << FlagShift) | Request::LOCKED') + defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;', big = False) def defineMicroStoreOp(mnemonic, code, \ postCode="", completeCode="", mem_flags="0"): diff --git a/src/arch/x86/isa/microops/limmop.isa b/src/arch/x86/isa/microops/limmop.isa index 2871d5a89..ac78b090d 100644 --- a/src/arch/x86/isa/microops/limmop.isa +++ b/src/arch/x86/isa/microops/limmop.isa @@ -114,8 +114,16 @@ let {{ self.dataSize = dataSize def getAllocator(self, microFlags): - allocator = '''new %(class_name)s(machInst, macrocodeBlock, - %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % { + allocString = ''' + (%(dataSize)s >= 4) ? + (StaticInstPtr)(new %(class_name)sBig(machInst, + macrocodeBlock, %(flags)s, %(dest)s, %(imm)s, + %(dataSize)s)) : + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s, %(dest)s, %(imm)s, + %(dataSize)s)) + ''' + allocator = allocString % { "class_name" : self.className, "mnemonic" : self.mnemonic, "flags" : self.microFlagsText(microFlags), @@ -152,12 +160,15 @@ let {{ let {{ # Build up the all register version of this micro op - iop = InstObjParams("limm", "Limm", 'X86MicroopBase', - {"code" : "DestReg = merge(DestReg, imm, dataSize);"}) - header_output += MicroLimmOpDeclare.subst(iop) - decoder_output += MicroLimmOpConstructor.subst(iop) - decoder_output += MicroLimmOpDisassembly.subst(iop) - exec_output += MicroLimmOpExecute.subst(iop) + iops = [InstObjParams("limm", "Limm", 'X86MicroopBase', + {"code" : "DestReg = merge(DestReg, imm, dataSize);"}), + InstObjParams("limm", "LimmBig", 'X86MicroopBase', + {"code" : "DestReg = imm & mask(dataSize * 8);"})] + for iop in iops: + header_output += MicroLimmOpDeclare.subst(iop) + decoder_output += MicroLimmOpConstructor.subst(iop) + decoder_output += MicroLimmOpDisassembly.subst(iop) + exec_output += MicroLimmOpExecute.subst(iop) iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase', {"code" : "FpDestReg.uqw = imm"}) diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index ccfcb3a69..158bfdd59 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -224,8 +224,8 @@ let {{ MicroRegOpExecute) class RegOpMeta(type): - def buildCppClasses(self, name, Name, suffix, \ - code, flag_code, cond_check, else_code, cond_control_flag_init): + def buildCppClasses(self, name, Name, suffix, code, big_code, \ + flag_code, cond_check, else_code, cond_control_flag_init): # Globals to stick the output in global header_output @@ -235,11 +235,13 @@ let {{ # Stick all the code together so it can be searched at once allCode = "|".join((code, flag_code, cond_check, else_code, cond_control_flag_init)) + allBigCode = "|".join((big_code, flag_code, cond_check, else_code, + cond_control_flag_init)) # If op2 is used anywhere, make register and immediate versions # of this code. matcher = re.compile("(?s?)op2(?P\\.\\w+)?") - match = matcher.search(allCode) + match = matcher.search(allCode + allBigCode) if match: typeQual = "" if match.group("typeQual"): @@ -247,6 +249,7 @@ let {{ src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual) self.buildCppClasses(name, Name, suffix, matcher.sub(src2_name, code), + matcher.sub(src2_name, big_code), matcher.sub(src2_name, flag_code), matcher.sub(src2_name, cond_check), matcher.sub(src2_name, else_code), @@ -254,6 +257,7 @@ let {{ imm_name = "%simm8" % match.group("prefix") self.buildCppClasses(name + "i", Name, suffix + "Imm", matcher.sub(imm_name, code), + matcher.sub(imm_name, big_code), matcher.sub(imm_name, flag_code), matcher.sub(imm_name, cond_check), matcher.sub(imm_name, else_code), @@ -264,27 +268,32 @@ let {{ # a version without it and fix up this version to use it. if flag_code != "" or cond_check != "true": self.buildCppClasses(name, Name, suffix, - code, "", "true", else_code, "") + code, big_code, "", "true", else_code, "") suffix = "Flags" + suffix # If psrc1 or psrc2 is used, we need to actually insert code to # compute it. - matcher = re.compile("(?= 4) ? + (StaticInstPtr)(new %(class_name)sBig(machInst, + macrocodeBlock, %(flags)s, %(src1)s, %(op2)s, + %(dest)s, %(dataSize)s, %(ext)s)) : + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s, %(src1)s, %(op2)s, + %(dest)s, %(dataSize)s, %(ext)s)) + ''' + allocator = allocString % { + "class_name" : className, + "flags" : self.microFlagsText(microFlags), + "src1" : self.src1, "op2" : self.op2, + "dest" : self.dest, + "dataSize" : self.dataSize, + "ext" : self.ext} + return allocator + else: + className = self.className + if self.mnemonic == self.base_mnemonic + 'i': + className += "Imm" + allocator = '''new %(class_name)s(machInst, macrocodeBlock, + %(flags)s, %(src1)s, %(op2)s, %(dest)s, + %(dataSize)s, %(ext)s)''' % { + "class_name" : className, + "flags" : self.microFlagsText(microFlags), + "src1" : self.src1, "op2" : self.op2, + "dest" : self.dest, + "dataSize" : self.dataSize, + "ext" : self.ext} + return allocator class LogicRegOp(RegOp): abstract = True @@ -429,30 +472,43 @@ let {{ class Add(FlagRegOp): code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);' + big_code = 'DestReg = (psrc1 + op2) & mask(dataSize * 8);' class Or(LogicRegOp): code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);' + big_code = 'DestReg = (psrc1 | op2) & mask(dataSize * 8);' class Adc(FlagRegOp): code = ''' CCFlagBits flags = ccFlagBits; DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize); ''' + big_code = ''' + CCFlagBits flags = ccFlagBits; + DestReg = (psrc1 + op2 + flags.cf) & mask(dataSize * 8); + ''' class Sbb(SubRegOp): code = ''' CCFlagBits flags = ccFlagBits; DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize); ''' + big_code = ''' + CCFlagBits flags = ccFlagBits; + DestReg = (psrc1 - op2 - flags.cf) & mask(dataSize * 8); + ''' class And(LogicRegOp): code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)' + big_code = 'DestReg = (psrc1 & op2) & mask(dataSize * 8)' class Sub(SubRegOp): code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)' + big_code = 'DestReg = (psrc1 - op2) & mask(dataSize * 8)' class Xor(LogicRegOp): code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)' + big_code = 'DestReg = (psrc1 ^ op2) & mask(dataSize * 8)' class Mul1s(WrRegOp): code = ''' @@ -505,6 +561,7 @@ let {{ class Mulel(RdRegOp): code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);' + big_code = 'DestReg = ProdLow & mask(dataSize * 8);' class Muleh(RdRegOp): def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"): @@ -513,6 +570,7 @@ let {{ super(RdRegOp, self).__init__(dest, src1, \ "InstRegIndex(NUM_INTREGS)", flags, dataSize) code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);' + big_code = 'DestReg = ProdHi & mask(dataSize * 8);' # One or two bit divide class Div1(WrRegOp): @@ -540,7 +598,7 @@ let {{ # Step divide class Div2(RegOp): - code = ''' + divCode = ''' uint64_t dividend = Remainder; uint64_t divisor = Divisor; uint64_t quotient = Quotient; @@ -587,11 +645,13 @@ let {{ } } //Keep track of how many bits there are still to pull in. - DestReg = merge(DestReg, remaining, dataSize); + %s //Record the final results Remainder = remainder; Quotient = quotient; ''' + code = divCode % "DestReg = merge(DestReg, remaining, dataSize);" + big_code = divCode % "DestReg = remaining & mask(dataSize * 8);" flag_code = ''' if (remaining == 0) ccFlagBits = ccFlagBits | (ext & EZFBit); @@ -601,9 +661,11 @@ let {{ class Divq(RdRegOp): code = 'DestReg = merge(SrcReg1, Quotient, dataSize);' + big_code = 'DestReg = Quotient & mask(dataSize * 8);' class Divr(RdRegOp): code = 'DestReg = merge(SrcReg1, Remainder, dataSize);' + big_code = 'DestReg = Remainder & mask(dataSize * 8);' class Mov(CondRegOp): code = 'DestReg = merge(SrcReg1, op2, dataSize)' @@ -616,6 +678,10 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); DestReg = merge(DestReg, psrc1 << shiftAmt, dataSize); ''' + big_code = ''' + uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); + DestReg = (psrc1 << shiftAmt) & mask(dataSize * 8); + ''' flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -641,14 +707,19 @@ let {{ ''' class Srl(RegOp): + # Because what happens to the bits shift -in- on a right shift + # is not defined in the C/C++ standard, we have to mask them out + # to be sure they're zero. code = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); - // Because what happens to the bits shift -in- on a right shift - // is not defined in the C/C++ standard, we have to mask them out - // to be sure they're zero. uint64_t logicalMask = mask(dataSize * 8 - shiftAmt); DestReg = merge(DestReg, (psrc1 >> shiftAmt) & logicalMask, dataSize); ''' + big_code = ''' + uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); + uint64_t logicalMask = mask(dataSize * 8 - shiftAmt); + DestReg = (psrc1 >> shiftAmt) & logicalMask; + ''' flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -671,15 +742,21 @@ let {{ ''' class Sra(RegOp): + # Because what happens to the bits shift -in- on a right shift + # is not defined in the C/C++ standard, we have to sign extend + # them manually to be sure. code = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); - // Because what happens to the bits shift -in- on a right shift - // is not defined in the C/C++ standard, we have to sign extend - // them manually to be sure. uint64_t arithMask = (shiftAmt == 0) ? 0 : -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt); DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize); ''' + big_code = ''' + uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); + uint64_t arithMask = (shiftAmt == 0) ? 0 : + -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt); + DestReg = ((psrc1 >> shiftAmt) | arithMask) & mask(dataSize * 8); + ''' flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -704,13 +781,11 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8); - if(realShiftAmt) - { + if (realShiftAmt) { uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt); uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -739,16 +814,14 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1); - if(realShiftAmt) - { + if (realShiftAmt) { CCFlagBits flags = ccFlagBits; uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt); if (realShiftAmt > 1) top |= psrc1 << (dataSize * 8 - realShiftAmt + 1); uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -780,14 +853,12 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8); - if(realShiftAmt) - { + if (realShiftAmt) { uint64_t top = psrc1 << realShiftAmt; uint64_t bottom = bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -816,8 +887,7 @@ let {{ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1); - if(realShiftAmt) - { + if (realShiftAmt) { CCFlagBits flags = ccFlagBits; uint64_t top = psrc1 << realShiftAmt; uint64_t bottom = flags.cf << (realShiftAmt - 1); @@ -826,8 +896,7 @@ let {{ bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt + 1); DestReg = merge(DestReg, top | bottom, dataSize); - } - else + } else DestReg = merge(DestReg, DestReg, dataSize); ''' flag_code = ''' @@ -853,10 +922,10 @@ let {{ ''' class Sld(RegOp): - code = ''' + sldCode = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t dataBits = dataSize * 8; - uint8_t realShiftAmt = shiftAmt % (2 * dataBits); + uint8_t realShiftAmt = shiftAmt %% (2 * dataBits); uint64_t result; if (realShiftAmt == 0) { result = psrc1; @@ -867,8 +936,10 @@ let {{ result = (DoubleBits << (realShiftAmt - dataBits)) | (psrc1 >> (2 * dataBits - realShiftAmt)); } - DestReg = merge(DestReg, result, dataSize); + %s ''' + code = sldCode % "DestReg = merge(DestReg, result, dataSize);" + big_code = sldCode % "DestReg = result & mask(dataSize * 8);" flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -899,10 +970,10 @@ let {{ ''' class Srd(RegOp): - code = ''' + srdCode = ''' uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t dataBits = dataSize * 8; - uint8_t realShiftAmt = shiftAmt % (2 * dataBits); + uint8_t realShiftAmt = shiftAmt %% (2 * dataBits); uint64_t result; if (realShiftAmt == 0) { result = psrc1; @@ -919,8 +990,10 @@ let {{ logicalMask) | (psrc1 << (2 * dataBits - realShiftAmt)); } - DestReg = merge(DestReg, result, dataSize); + %s ''' + code = srdCode % "DestReg = merge(DestReg, result, dataSize);" + big_code = srdCode % "DestReg = result & mask(dataSize * 8);" flag_code = ''' // If the shift amount is zero, no flags should be modified. if (shiftAmt) { @@ -986,6 +1059,12 @@ let {{ ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : (ccFlagBits & ~EZFBit); ''' + big_code = ''' + int flag = bits(ccFlagBits, imm8); + DestReg = flag & mask(dataSize * 8); + ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : + (ccFlagBits & ~EZFBit); + ''' def __init__(self, dest, imm, flags=None, \ dataSize="env.dataSize"): super(Ruflag, self).__init__(dest, \ @@ -1000,6 +1079,14 @@ let {{ ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : (ccFlagBits & ~EZFBit); ''' + big_code = ''' + MiscReg flagMask = 0x3F7FDD5; + MiscReg flags = (nccFlagBits | ccFlagBits) & flagMask; + int flag = bits(flags, imm8); + DestReg = flag & mask(dataSize * 8); + ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : + (ccFlagBits & ~EZFBit); + ''' def __init__(self, dest, imm, flags=None, \ dataSize="env.dataSize"): super(Rflag, self).__init__(dest, \ @@ -1015,6 +1102,15 @@ let {{ val = sign_bit ? (val | ~maskVal) : (val & maskVal); DestReg = merge(DestReg, val, dataSize); ''' + big_code = ''' + IntReg val = psrc1; + // Mask the bit position so that it wraps. + int bitPos = op2 & (dataSize * 8 - 1); + int sign_bit = bits(val, bitPos, bitPos); + uint64_t maskVal = mask(bitPos+1); + val = sign_bit ? (val | ~maskVal) : (val & maskVal); + DestReg = val & mask(dataSize * 8); + ''' flag_code = ''' if (!sign_bit) ccFlagBits = ccFlagBits & @@ -1026,12 +1122,13 @@ let {{ class Zext(RegOp): code = 'DestReg = merge(DestReg, bits(psrc1, op2, 0), dataSize);' + big_code = 'DestReg = bits(psrc1, op2, 0) & mask(dataSize * 8);' class Rddr(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): super(Rddr, self).__init__(dest, \ src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize) - code = ''' + rdrCode = ''' CR4 cr4 = CR4Op; DR7 dr7 = DR7Op; if ((cr4.de == 1 && (src1 == 4 || src1 == 5)) || src1 >= 8) { @@ -1039,9 +1136,11 @@ let {{ } else if (dr7.gd) { fault = new DebugException(); } else { - DestReg = merge(DestReg, DebugSrc1, dataSize); + %s } ''' + code = rdrCode % "DestReg = merge(DestReg, DebugSrc1, dataSize);" + big_code = rdrCode % "DestReg = DebugSrc1 & mask(dataSize * 8);" class Wrdr(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): @@ -1066,13 +1165,15 @@ let {{ def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): super(Rdcr, self).__init__(dest, \ src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize) - code = ''' + rdcrCode = ''' if (src1 == 1 || (src1 > 4 && src1 < 8) || (src1 > 8)) { fault = new InvalidOpcode(); } else { - DestReg = merge(DestReg, ControlSrc1, dataSize); + %s } ''' + code = rdcrCode % "DestReg = merge(DestReg, ControlSrc1, dataSize);" + big_code = rdcrCode % "DestReg = ControlSrc1 & mask(dataSize * 8);" class Wrcr(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): @@ -1154,24 +1255,20 @@ let {{ ''' class Rdbase(SegOp): - code = ''' - DestReg = merge(DestReg, SegBaseSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegBaseSrc1, dataSize);' + big_code = 'DestReg = SegBaseSrc1 & mask(dataSize * 8);' class Rdlimit(SegOp): - code = ''' - DestReg = merge(DestReg, SegLimitSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegLimitSrc1, dataSize);' + big_code = 'DestReg = SegLimitSrc1 & mask(dataSize * 8);' class RdAttr(SegOp): - code = ''' - DestReg = merge(DestReg, SegAttrSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegAttrSrc1, dataSize);' + big_code = 'DestReg = SegAttrSrc1 & mask(dataSize * 8);' class Rdsel(SegOp): - code = ''' - DestReg = merge(DestReg, SegSelSrc1, dataSize); - ''' + code = 'DestReg = merge(DestReg, SegSelSrc1, dataSize);' + big_code = 'DestReg = SegSelSrc1 & mask(dataSize * 8);' class Rdval(RegOp): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): -- cgit v1.2.3 From bce2be525d0ff05af914515bc2d9e9ac9d66a123 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:45:12 -0800 Subject: X86: Put the result used for flags in an intermediate variable. Using the destination register directly causes the ISA parser to treat it as a source even if none of the original bits are used. --- src/arch/x86/insts/microregop.cc | 3 --- src/arch/x86/isa/includes.isa | 1 + src/arch/x86/isa/microops/regop.isa | 38 ++++++++++++++++++++----------------- 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc index 6aee87449..dedea0f3d 100644 --- a/src/arch/x86/insts/microregop.cc +++ b/src/arch/x86/insts/microregop.cc @@ -50,9 +50,6 @@ namespace X86ISA bool subtract) const { DPRINTF(X86, "flagMask = %#x\n", flagMask); - if (_destRegIdx[0] & IntFoldBit) { - _dest >>= 8; - } uint64_t flags = oldFlags & ~flagMask; if(flagMask & (ECFBit | CFBit)) { diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa index 32708043e..674e69e98 100644 --- a/src/arch/x86/isa/includes.isa +++ b/src/arch/x86/isa/includes.isa @@ -114,6 +114,7 @@ output exec {{ #include "arch/x86/regs/misc.hh" #include "arch/x86/tlb.hh" #include "base/bigint.hh" +#include "base/compiler.hh" #include "base/condcodes.hh" #include "cpu/base.hh" #include "cpu/exetrace.hh" diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index 158bfdd59..e2a51c127 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -51,6 +51,8 @@ def template MicroRegOpExecute {{ %(op_decl)s; %(op_rd)s; + IntReg result M5_VAR_USED; + if(%(cond_check)s) { %(code)s; @@ -79,6 +81,8 @@ def template MicroRegOpImmExecute {{ %(op_decl)s; %(op_rd)s; + IntReg result M5_VAR_USED; + if(%(cond_check)s) { %(code)s; @@ -434,7 +438,7 @@ let {{ flag_code = ''' //Don't have genFlags handle the OF or CF bits uint64_t mask = CFBit | ECFBit | OFBit; - ccFlagBits = genFlags(ccFlagBits, ext & ~mask, DestReg, psrc1, op2); + ccFlagBits = genFlags(ccFlagBits, ext & ~mask, result, psrc1, op2); //If a logic microop wants to set these, it wants to set them to 0. ccFlagBits &= ~(CFBit & ext); ccFlagBits &= ~(ECFBit & ext); @@ -444,12 +448,12 @@ let {{ class FlagRegOp(RegOp): abstract = True flag_code = \ - "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, op2);" + "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, op2);" class SubRegOp(RegOp): abstract = True flag_code = \ - "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, ~op2, true);" + "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, ~op2, true);" class CondRegOp(RegOp): abstract = True @@ -471,44 +475,44 @@ let {{ src1, src2, flags, dataSize) class Add(FlagRegOp): - code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);' - big_code = 'DestReg = (psrc1 + op2) & mask(dataSize * 8);' + code = 'DestReg = merge(DestReg, result = (psrc1 + op2), dataSize);' + big_code = 'DestReg = result = (psrc1 + op2) & mask(dataSize * 8);' class Or(LogicRegOp): - code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);' - big_code = 'DestReg = (psrc1 | op2) & mask(dataSize * 8);' + code = 'DestReg = merge(DestReg, result = (psrc1 | op2), dataSize);' + big_code = 'DestReg = result = (psrc1 | op2) & mask(dataSize * 8);' class Adc(FlagRegOp): code = ''' CCFlagBits flags = ccFlagBits; - DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize); + DestReg = merge(DestReg, result = (psrc1 + op2 + flags.cf), dataSize); ''' big_code = ''' CCFlagBits flags = ccFlagBits; - DestReg = (psrc1 + op2 + flags.cf) & mask(dataSize * 8); + DestReg = result = (psrc1 + op2 + flags.cf) & mask(dataSize * 8); ''' class Sbb(SubRegOp): code = ''' CCFlagBits flags = ccFlagBits; - DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize); + DestReg = merge(DestReg, result = (psrc1 - op2 - flags.cf), dataSize); ''' big_code = ''' CCFlagBits flags = ccFlagBits; - DestReg = (psrc1 - op2 - flags.cf) & mask(dataSize * 8); + DestReg = result = (psrc1 - op2 - flags.cf) & mask(dataSize * 8); ''' class And(LogicRegOp): - code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)' - big_code = 'DestReg = (psrc1 & op2) & mask(dataSize * 8)' + code = 'DestReg = merge(DestReg, result = (psrc1 & op2), dataSize)' + big_code = 'DestReg = result = (psrc1 & op2) & mask(dataSize * 8)' class Sub(SubRegOp): - code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)' - big_code = 'DestReg = (psrc1 - op2) & mask(dataSize * 8)' + code = 'DestReg = merge(DestReg, result = (psrc1 - op2), dataSize)' + big_code = 'DestReg = result = (psrc1 - op2) & mask(dataSize * 8)' class Xor(LogicRegOp): - code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)' - big_code = 'DestReg = (psrc1 ^ op2) & mask(dataSize * 8)' + code = 'DestReg = merge(DestReg, result = (psrc1 ^ op2), dataSize)' + big_code = 'DestReg = result = (psrc1 ^ op2) & mask(dataSize * 8)' class Mul1s(WrRegOp): code = ''' -- cgit v1.2.3 From 77b4a370670bed84d1c000a58d3e668334fdc86b Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 13 Feb 2011 17:45:47 -0800 Subject: X86: Detect branches taking into account instruction size. The size of the current instruction determines what the npc should be if there's no branching. --- src/arch/x86/types.hh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index d78af1b81..4641141d3 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -243,6 +243,12 @@ namespace X86ISA uint8_t size() const { return _size; } void size(uint8_t newSize) { _size = newSize; } + bool + branching() const + { + return this->npc() != this->pc() + size(); + } + void advance() { -- cgit v1.2.3 From 343e94a257baa94575adf0d0def18ffe8da0c4f8 Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Mon, 14 Feb 2011 16:14:54 -0600 Subject: Ruby: Improve Change PerfectSwitch's wakeup function Currently the wakeup function for the PerfectSwitch contains three loops - loop on number of virtual networks loop on number of incoming links loop till all messages for this (link, network) have been routed With an 8 processor mesh network and Hammer protocol, about 11-12% of the was observed to have been spent in this function, which is the highest amongst all the functions. It was found that the innermost loop is executed about 45 times per invocation of the wakeup function, when each invocation of the wakeup function processes just about one message. The patch tries to do away with the redundant executions of the innermost loop. Counters have been added for each virtual network that record the number of messages that need to be routed for that virtual network. The inner loops are only executed when the number of messages for that particular virtual network > 0. This does away with almost 80% of the executions of the innermost loop. The function now consumes about 5-6% of the total execution time. --- src/mem/ruby/buffers/MessageBuffer.cc | 3 + src/mem/ruby/buffers/MessageBuffer.hh | 6 + src/mem/ruby/common/Consumer.hh | 1 + src/mem/ruby/network/simple/PerfectSwitch.cc | 281 +++++++++++++------------ src/mem/ruby/network/simple/PerfectSwitch.hh | 2 + src/mem/ruby/slicc_interface/Message.hh | 2 + src/mem/ruby/slicc_interface/NetworkMessage.hh | 7 + 7 files changed, 170 insertions(+), 132 deletions(-) (limited to 'src') diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc index f6b79c580..225595005 100644 --- a/src/mem/ruby/buffers/MessageBuffer.cc +++ b/src/mem/ruby/buffers/MessageBuffer.cc @@ -58,6 +58,8 @@ MessageBuffer::MessageBuffer(const string &name) m_name = name; m_stall_msg_map.clear(); + m_input_link_id = 0; + m_vnet_id = 0; } int @@ -228,6 +230,7 @@ MessageBuffer::enqueue(MsgPtr message, Time delta) // Schedule the wakeup if (m_consumer_ptr != NULL) { g_eventQueue_ptr->scheduleEventAbsolute(m_consumer_ptr, arrival_time); + m_consumer_ptr->storeEventInfo(m_vnet_id); } else { panic("No consumer: %s name: %s\n", *this, m_name); } diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh index 62cc65670..88df5b788 100644 --- a/src/mem/ruby/buffers/MessageBuffer.hh +++ b/src/mem/ruby/buffers/MessageBuffer.hh @@ -142,6 +142,9 @@ class MessageBuffer void printStats(std::ostream& out); void clearStats() { m_not_avail_count = 0; m_msg_counter = 0; } + void setIncomingLink(int link_id) { m_input_link_id = link_id; } + void setVnet(int net) { m_vnet_id = net; } + private: //added by SS int m_recycle_latency; @@ -184,6 +187,9 @@ class MessageBuffer bool m_ordering_set; bool m_randomization; Time m_last_arrival_time; + + int m_input_link_id; + int m_vnet_id; }; inline std::ostream& diff --git a/src/mem/ruby/common/Consumer.hh b/src/mem/ruby/common/Consumer.hh index c1f8bc42e..a119abb39 100644 --- a/src/mem/ruby/common/Consumer.hh +++ b/src/mem/ruby/common/Consumer.hh @@ -67,6 +67,7 @@ class Consumer virtual void wakeup() = 0; virtual void print(std::ostream& out) const = 0; + virtual void storeEventInfo(int info) {} const Time& getLastScheduledWakeup() const diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc index 7229c724f..5c461c63f 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.cc +++ b/src/mem/ruby/network/simple/PerfectSwitch.cc @@ -54,6 +54,11 @@ PerfectSwitch::PerfectSwitch(SwitchID sid, SimpleNetwork* network_ptr) m_round_robin_start = 0; m_network_ptr = network_ptr; m_wakeups_wo_switch = 0; + + for(int i = 0;i < m_virtual_networks;++i) + { + m_pending_message_count.push_back(0); + } } void @@ -62,12 +67,15 @@ PerfectSwitch::addInPort(const vector& in) assert(in.size() == m_virtual_networks); NodeID port = m_in.size(); m_in.push_back(in); + for (int j = 0; j < m_virtual_networks; j++) { m_in[port][j]->setConsumer(this); string desc = csprintf("[Queue from port %s %s %s to PerfectSwitch]", NodeIDToString(m_switch_id), NodeIDToString(port), NodeIDToString(j)); m_in[port][j]->setDescription(desc); + m_in[port][j]->setIncomingLink(port); + m_in[port][j]->setVnet(j); } } @@ -154,160 +162,169 @@ PerfectSwitch::wakeup() m_round_robin_start = 0; } - // for all input ports, use round robin scheduling - for (int counter = 0; counter < m_in.size(); counter++) { - // Round robin scheduling - incoming++; - if (incoming >= m_in.size()) { - incoming = 0; - } + if(m_pending_message_count[vnet] > 0) { + // for all input ports, use round robin scheduling + for (int counter = 0; counter < m_in.size(); counter++) { + // Round robin scheduling + incoming++; + if (incoming >= m_in.size()) { + incoming = 0; + } - // temporary vectors to store the routing results - vector output_links; - vector output_link_destinations; - - // Is there a message waiting? - while (m_in[incoming][vnet]->isReady()) { - DPRINTF(RubyNetwork, "incoming: %d\n", incoming); - - // Peek at message - msg_ptr = m_in[incoming][vnet]->peekMsgPtr(); - net_msg_ptr = safe_cast(msg_ptr.get()); - DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr)); - - output_links.clear(); - output_link_destinations.clear(); - NetDest msg_dsts = - net_msg_ptr->getInternalDestination(); - - // Unfortunately, the token-protocol sends some - // zero-destination messages, so this assert isn't valid - // assert(msg_dsts.count() > 0); - - assert(m_link_order.size() == m_routing_table.size()); - assert(m_link_order.size() == m_out.size()); - - if (m_network_ptr->getAdaptiveRouting()) { - if (m_network_ptr->isVNetOrdered(vnet)) { - // Don't adaptively route - for (int out = 0; out < m_out.size(); out++) { - m_link_order[out].m_link = out; - m_link_order[out].m_value = 0; - } - } else { - // Find how clogged each link is - for (int out = 0; out < m_out.size(); out++) { - int out_queue_length = 0; - for (int v = 0; v < m_virtual_networks; v++) { - out_queue_length += m_out[out][v]->getSize(); + // temporary vectors to store the routing results + vector output_links; + vector output_link_destinations; + + // Is there a message waiting? + while (m_in[incoming][vnet]->isReady()) { + DPRINTF(RubyNetwork, "incoming: %d\n", incoming); + + // Peek at message + msg_ptr = m_in[incoming][vnet]->peekMsgPtr(); + net_msg_ptr = safe_cast(msg_ptr.get()); + DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr)); + + output_links.clear(); + output_link_destinations.clear(); + NetDest msg_dsts = + net_msg_ptr->getInternalDestination(); + + // Unfortunately, the token-protocol sends some + // zero-destination messages, so this assert isn't valid + // assert(msg_dsts.count() > 0); + + assert(m_link_order.size() == m_routing_table.size()); + assert(m_link_order.size() == m_out.size()); + + if (m_network_ptr->getAdaptiveRouting()) { + if (m_network_ptr->isVNetOrdered(vnet)) { + // Don't adaptively route + for (int out = 0; out < m_out.size(); out++) { + m_link_order[out].m_link = out; + m_link_order[out].m_value = 0; + } + } else { + // Find how clogged each link is + for (int out = 0; out < m_out.size(); out++) { + int out_queue_length = 0; + for (int v = 0; v < m_virtual_networks; v++) { + out_queue_length += m_out[out][v]->getSize(); + } + int value = + (out_queue_length << 8) | (random() & 0xff); + m_link_order[out].m_link = out; + m_link_order[out].m_value = value; } - int value = - (out_queue_length << 8) | (random() & 0xff); - m_link_order[out].m_link = out; - m_link_order[out].m_value = value; + + // Look at the most empty link first + sort(m_link_order.begin(), m_link_order.end()); } + } - // Look at the most empty link first - sort(m_link_order.begin(), m_link_order.end()); + for (int i = 0; i < m_routing_table.size(); i++) { + // pick the next link to look at + int link = m_link_order[i].m_link; + NetDest dst = m_routing_table[link]; + DPRINTF(RubyNetwork, "dst: %s\n", dst); + + if (!msg_dsts.intersectionIsNotEmpty(dst)) + continue; + + // Remember what link we're using + output_links.push_back(link); + + // Need to remember which destinations need this + // message in another vector. This Set is the + // intersection of the routing_table entry and the + // current destination set. The intersection must + // not be empty, since we are inside "if" + output_link_destinations.push_back(msg_dsts.AND(dst)); + + // Next, we update the msg_destination not to + // include those nodes that were already handled + // by this link + msg_dsts.removeNetDest(dst); } - } - for (int i = 0; i < m_routing_table.size(); i++) { - // pick the next link to look at - int link = m_link_order[i].m_link; - NetDest dst = m_routing_table[link]; - DPRINTF(RubyNetwork, "dst: %s\n", dst); - - if (!msg_dsts.intersectionIsNotEmpty(dst)) - continue; - - // Remember what link we're using - output_links.push_back(link); - - // Need to remember which destinations need this - // message in another vector. This Set is the - // intersection of the routing_table entry and the - // current destination set. The intersection must - // not be empty, since we are inside "if" - output_link_destinations.push_back(msg_dsts.AND(dst)); - - // Next, we update the msg_destination not to - // include those nodes that were already handled - // by this link - msg_dsts.removeNetDest(dst); - } + assert(msg_dsts.count() == 0); + //assert(output_links.size() > 0); + + // Check for resources - for all outgoing queues + bool enough = true; + for (int i = 0; i < output_links.size(); i++) { + int outgoing = output_links[i]; + if (!m_out[outgoing][vnet]->areNSlotsAvailable(1)) + enough = false; + DPRINTF(RubyNetwork, "Checking if node is blocked\n" + "outgoing: %d, vnet: %d, enough: %d\n", + outgoing, vnet, enough); + } - assert(msg_dsts.count() == 0); - //assert(output_links.size() > 0); - - // Check for resources - for all outgoing queues - bool enough = true; - for (int i = 0; i < output_links.size(); i++) { - int outgoing = output_links[i]; - if (!m_out[outgoing][vnet]->areNSlotsAvailable(1)) - enough = false; - DPRINTF(RubyNetwork, "Checking if node is blocked\n" - "outgoing: %d, vnet: %d, enough: %d\n", - outgoing, vnet, enough); - } + // There were not enough resources + if (!enough) { + g_eventQueue_ptr->scheduleEvent(this, 1); + DPRINTF(RubyNetwork, "Can't deliver message since a node " + "is blocked\n" + "Message: %s\n", (*net_msg_ptr)); + break; // go to next incoming port + } - // There were not enough resources - if (!enough) { - g_eventQueue_ptr->scheduleEvent(this, 1); - DPRINTF(RubyNetwork, "Can't deliver message since a node " - "is blocked\n" - "Message: %s\n", (*net_msg_ptr)); - break; // go to next incoming port - } + MsgPtr unmodified_msg_ptr; - MsgPtr unmodified_msg_ptr; + if (output_links.size() > 1) { + // If we are sending this message down more than + // one link (size>1), we need to make a copy of + // the message so each branch can have a different + // internal destination we need to create an + // unmodified MsgPtr because the MessageBuffer + // enqueue func will modify the message - if (output_links.size() > 1) { - // If we are sending this message down more than - // one link (size>1), we need to make a copy of - // the message so each branch can have a different - // internal destination we need to create an - // unmodified MsgPtr because the MessageBuffer - // enqueue func will modify the message + // This magic line creates a private copy of the + // message + unmodified_msg_ptr = msg_ptr->clone(); + } - // This magic line creates a private copy of the - // message - unmodified_msg_ptr = msg_ptr->clone(); - } + // Enqueue it - for all outgoing queues + for (int i=0; i 0) { + // create a private copy of the unmodified + // message + msg_ptr = unmodified_msg_ptr->clone(); + } - if (i > 0) { - // create a private copy of the unmodified - // message - msg_ptr = unmodified_msg_ptr->clone(); - } + // Change the internal destination set of the + // message so it knows which destinations this + // link is responsible for. + net_msg_ptr = safe_cast(msg_ptr.get()); + net_msg_ptr->getInternalDestination() = + output_link_destinations[i]; - // Change the internal destination set of the - // message so it knows which destinations this - // link is responsible for. - net_msg_ptr = safe_cast(msg_ptr.get()); - net_msg_ptr->getInternalDestination() = - output_link_destinations[i]; + // Enqeue msg + DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from " + "inport[%d][%d] to outport [%d][%d] time: %lld.\n", + m_switch_id, incoming, vnet, outgoing, vnet, + g_eventQueue_ptr->getTime()); - // Enqeue msg - DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from " - "inport[%d][%d] to outport [%d][%d] time: %lld.\n", - m_switch_id, incoming, vnet, outgoing, vnet, - g_eventQueue_ptr->getTime()); + m_out[outgoing][vnet]->enqueue(msg_ptr); + } - m_out[outgoing][vnet]->enqueue(msg_ptr); + // Dequeue msg + m_in[incoming][vnet]->pop(); + m_pending_message_count[vnet]--; } - - // Dequeue msg - m_in[incoming][vnet]->pop(); } } } } +void +PerfectSwitch::storeEventInfo(int info) +{ + m_pending_message_count[info]++; +} + void PerfectSwitch::printStats(std::ostream& out) const { diff --git a/src/mem/ruby/network/simple/PerfectSwitch.hh b/src/mem/ruby/network/simple/PerfectSwitch.hh index a7e577df0..cd0219fd9 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.hh +++ b/src/mem/ruby/network/simple/PerfectSwitch.hh @@ -69,6 +69,7 @@ class PerfectSwitch : public Consumer int getOutLinks() const { return m_out.size(); } void wakeup(); + void storeEventInfo(int info); void printStats(std::ostream& out) const; void clearStats(); @@ -92,6 +93,7 @@ class PerfectSwitch : public Consumer int m_round_robin_start; int m_wakeups_wo_switch; SimpleNetwork* m_network_ptr; + std::vector m_pending_message_count; }; inline std::ostream& diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh index ff94fdd40..7fcfabe9c 100644 --- a/src/mem/ruby/slicc_interface/Message.hh +++ b/src/mem/ruby/slicc_interface/Message.hh @@ -57,6 +57,8 @@ class Message : public RefCounted virtual Message* clone() const = 0; virtual void print(std::ostream& out) const = 0; + virtual void setIncomingLink(int) {} + virtual void setVnet(int) {} void setDelayedCycles(const int& cycles) { m_DelayedCycles = cycles; } const int& getDelayedCycles() const {return m_DelayedCycles;} diff --git a/src/mem/ruby/slicc_interface/NetworkMessage.hh b/src/mem/ruby/slicc_interface/NetworkMessage.hh index 082481e05..a8f9c625b 100644 --- a/src/mem/ruby/slicc_interface/NetworkMessage.hh +++ b/src/mem/ruby/slicc_interface/NetworkMessage.hh @@ -82,9 +82,16 @@ class NetworkMessage : public Message virtual void print(std::ostream& out) const = 0; + int getIncomingLink() const { return incoming_link; } + void setIncomingLink(int link) { incoming_link = link; } + int getVnet() const { return vnet; } + void setVnet(int net) { vnet = net; } + private: NetDest m_internal_dest; bool m_internal_dest_valid; + int incoming_link; + int vnet; }; inline std::ostream& -- cgit v1.2.3 From 989138970e3512aa9c1b37810ba64a742c00543e Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 14 Feb 2011 21:36:37 -0800 Subject: Info: Clean up some info files. Get rid of RELEASE_NOTES since we no longer do releases, update some of the information in README, and update the date in LICENSE. --- src/SConscript | 2 +- src/python/m5/main.py | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'src') diff --git a/src/SConscript b/src/SConscript index cad0736c5..0ee144747 100755 --- a/src/SConscript +++ b/src/SConscript @@ -446,7 +446,7 @@ def makeInfoPyFile(target, source, env): # Generate a file that wraps the basic top level files env.Command('python/m5/info.py', - [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ], + [ '#/AUTHORS', '#/LICENSE', '#/README', ], MakeAction(makeInfoPyFile, Transform("INFO"))) PySource('m5', 'python/m5/info.py') diff --git a/src/python/m5/main.py b/src/python/m5/main.py index cd139ccb3..23a012166 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -61,8 +61,6 @@ add_option('-C', "--copyright", action="store_true", default=False, help="Show full copyright information") add_option('-R', "--readme", action="store_true", default=False, help="Show the readme") -add_option('-N', "--release-notes", action="store_true", default=False, - help="Show the release notes") # Options for configuring the base simulator add_option('-d', "--outdir", metavar="DIR", default="m5out", @@ -207,13 +205,6 @@ def main(): print info.README print - if options.release_notes: - done = True - print 'Release Notes:' - print - print info.RELEASE_NOTES - print - if options.trace_help: done = True check_tracing() -- cgit v1.2.3 From fde8b5c3876ad431b193989ab64c802d1cec1ed0 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 15 Feb 2011 15:58:16 -0800 Subject: X86: Get rid of "inline" on the MicroPanic constructor in decoder.cc. This was making certain versions of gcc omit the function from the object file which would break the build. --- src/arch/x86/isa/microops/debug.isa | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/arch/x86/isa/microops/debug.isa b/src/arch/x86/isa/microops/debug.isa index c2735565d..220c1af97 100644 --- a/src/arch/x86/isa/microops/debug.isa +++ b/src/arch/x86/isa/microops/debug.isa @@ -98,7 +98,7 @@ def template MicroDebugExecute {{ }}; def template MicroDebugConstructor {{ - inline %(class_name)s::%(class_name)s( + %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, uint64_t setFlags, std::string _message, uint8_t _cc) : %(base_class)s(machInst, "%(func)s", instMnem, -- cgit v1.2.3 From 991d0185c68b53a04ae5d1f1a05749bbfddced89 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:27:52 -0500 Subject: inorder: initialize res. req. vectors based on resource bandwidth first change in an optimization that will stop InOrder from allocating new memory for every instruction's request to a resource. This gets expensive since every instruction needs to access ~10 requests before graduation. Instead, the plan is to allocate just enough resource request objects to satisfy each resource's bandwidth (e.g. the execution unit would need to allocate 3 resource request objects for a 1-issue pipeline since on any given cycle it could have 2 read requests and 1 write request) and then let the instructions contend and reuse those allocated requests. The end result is a smaller memory footprint for the InOrder model and increased simulation performance --- src/cpu/inorder/resource.cc | 6 ++++++ src/cpu/inorder/resource.hh | 2 ++ src/cpu/inorder/resources/cache_unit.cc | 5 +++++ src/cpu/inorder/resources/use_def.cc | 13 +++++++++++++ src/cpu/inorder/resources/use_def.hh | 2 ++ 5 files changed, 28 insertions(+) (limited to 'src') diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 72b45dda8..5a31125c6 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -40,6 +40,8 @@ Resource::Resource(string res_name, int res_id, int res_width, : resName(res_name), id(res_id), width(res_width), latency(res_latency), cpu(_cpu) { + reqs.resize(width); + // Use to deny a instruction a resource. deniedReq = new ResourceRequest(this, NULL, 0, 0, 0, 0); } @@ -57,6 +59,10 @@ Resource::init() // Set Up Resource Events to Appropriate Resource BandWidth resourceEvent = new ResourceEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new ResourceRequest(this, NULL, 0, 0, 0, 0); + } + initSlots(); } diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index bd9ec48ca..7f6cb6642 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -224,6 +224,8 @@ class Resource { /** Mapping of slot-numbers to the resource-request pointers */ std::map reqMap; + std::vector reqs; + /** A list of all the available execution slots for this resource. * This correlates with the actual resource event idx. */ diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 8cd105493..47fafe45a 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -133,6 +133,11 @@ CacheUnit::getPort(const string &if_name, int idx) void CacheUnit::init() { + for (int i = 0; i < width; i++) { + reqs[i] = new CacheRequest(this, NULL, 0, 0, 0, 0, 0, + MemCmd::Command(0), 0, 0, 0); + } + // Currently Used to Model TLB Latency. Eventually // Switch to Timing TLB translations. resourceEvent = new CacheUnitEvent[width]; diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 538b20246..dd178403b 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -88,6 +88,19 @@ UseDefUnit::regStats() Resource::regStats(); } +void +UseDefUnit::init() +{ + // Set Up Resource Events to Appropriate Resource BandWidth + resourceEvent = new ResourceEvent[width]; + + for (int i = 0; i < width; i++) { + reqs[i] = new UseDefRequest(this, NULL, 0, 0, 0, 0, 0); + } + + initSlots(); +} + ResReqPtr UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh index d2cc55315..6db8ed987 100644 --- a/src/cpu/inorder/resources/use_def.hh +++ b/src/cpu/inorder/resources/use_def.hh @@ -56,6 +56,8 @@ class UseDefUnit : public Resource { UseDefUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); + void init(); + ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned cmd); -- cgit v1.2.3 From ff48afcf4f3d8cccc899c5840734228a5bebc045 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:28:10 -0500 Subject: inorder: remove reqRemoveList we are going to be getting away from creating new resource requests for every instruction so no more need to keep track of a reqRemoveList and clean it up every tick --- src/cpu/inorder/cpu.cc | 25 +------------------------ src/cpu/inorder/cpu.hh | 10 +--------- src/cpu/inorder/pipeline_stage.cc | 14 -------------- src/cpu/inorder/resource.cc | 11 ----------- src/cpu/inorder/resources/cache_unit.cc | 3 --- src/cpu/inorder/resources/tlb_unit.cc | 3 --- src/cpu/inorder/resources/use_def.cc | 3 --- 7 files changed, 2 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 3a705258d..56b27e0d0 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -636,8 +636,7 @@ InOrderCPU::tick() } activityRec.advance(); - // Any squashed requests, events, or insts then remove them now - cleanUpRemovedReqs(); + // Any squashed events, or insts then remove them now cleanUpRemovedEvents(); cleanUpRemovedInsts(); @@ -1435,28 +1434,6 @@ InOrderCPU::cleanUpRemovedInsts() removeInstsThisCycle = false; } -void -InOrderCPU::cleanUpRemovedReqs() -{ - while (!reqRemoveList.empty()) { - ResourceRequest *res_req = reqRemoveList.front(); - - DPRINTF(RefCount, "[tid:%i] [sn:%lli]: Removing Request " - "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n", - res_req->inst->threadNumber, - res_req->inst->seqNum, - res_req->getStageNum(), - res_req->res->name(), - (res_req->isCompleted()) ? - res_req->getComplSlot() : res_req->getSlot(), - res_req->isCompleted()); - - reqRemoveList.pop(); - - delete res_req; - } -} - void InOrderCPU::cleanUpRemovedEvents() { diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 2a5c815e1..073a0a982 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -593,10 +593,7 @@ class InOrderCPU : public BaseCPU /** Cleans up all instructions on the instruction remove list. */ void cleanUpRemovedInsts(); - /** Cleans up all instructions on the request remove list. */ - void cleanUpRemovedReqs(); - - /** Cleans up all instructions on the CPU event remove list. */ + /** Cleans up all events on the CPU event remove list. */ void cleanUpRemovedEvents(); /** Debug function to print all instructions on the list. */ @@ -626,11 +623,6 @@ class InOrderCPU : public BaseCPU */ std::queue removeList; - /** List of all the resource requests that will be removed at the end - * of this cycle. - */ - std::queue reqRemoveList; - /** List of all the cpu event requests that will be removed at the end of * the current cycle. */ diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index bc31a8537..5de963b0e 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -990,20 +990,6 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) cpu->activateNextReadyContext(); } - // Mark request for deletion - // if it isnt currently being used by a resource - if (!req->hasSlot()) { - DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no " - "slot in resource.\n", inst->seqNum); - - cpu->reqRemoveList.push(req); - } else { - DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, " - "in resource [slot:%i].\n", inst->seqNum, - req->getSlot()); - } - - break; } diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 5a31125c6..568e2759d 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -322,9 +322,6 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, if (resourceEvent[req_slot_num].scheduled()) unscheduleEvent(req_slot_num); - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); } @@ -456,14 +453,6 @@ ResourceRequest::done(bool completed) // Used for debugging purposes if (completed) { complSlotNum = slotNum; - - // Would like to start a convention such as all requests deleted in - // resources/pipeline - // but a little more complex then it seems... - // For now, all COMPLETED requests deleted in resource.. - // all FAILED requests deleted in pipeline stage - // *all SQUASHED requests deleted in resource - res->cpu->reqRemoveList.push(res->reqMap[slotNum]); } // Free Slot So Another Instruction Can Use This Resource diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 47fafe45a..11902f51e 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -1171,9 +1171,6 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); } else { diff --git a/src/cpu/inorder/resources/tlb_unit.cc b/src/cpu/inorder/resources/tlb_unit.cc index 2e19ea928..0b273eabc 100644 --- a/src/cpu/inorder/resources/tlb_unit.cc +++ b/src/cpu/inorder/resources/tlb_unit.cc @@ -257,9 +257,6 @@ TLBUnit::squash(DynInstPtr inst, int stage_num, if (resourceEvent[req_slot_num].scheduled()) unscheduleEvent(req_slot_num); - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); } diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index dd178403b..a37e459a8 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -445,9 +445,6 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, unscheduleEvent(req_slot_num); } - // Mark request for later removal - cpu->reqRemoveList.push(req_ptr); - // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); } -- cgit v1.2.3 From c8837290251a300114975861575f59a58990b51a Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:28:22 -0500 Subject: inorder: add valid bit for resource requests this will allow us to reuse resource requests within a resource instead of always dynamically allocating --- src/cpu/inorder/resource.cc | 2 +- src/cpu/inorder/resource.hh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 568e2759d..e6a0db021 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -407,7 +407,7 @@ int ResourceRequest::maxReqCount = 0; ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned _cmd) - : res(_res), inst(_inst), cmd(_cmd), stageNum(stage_num), + : res(_res), inst(_inst), cmd(_cmd), valid(false), stageNum(stage_num), resIdx(res_idx), slotNum(slot_num), completed(false), squashed(false), processing(false), memStall(false) { diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index 7f6cb6642..c02fe4014 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -299,6 +299,8 @@ class ResourceRequest static int maxReqCount; + friend class Resource; + public: ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned _cmd); @@ -321,7 +323,6 @@ class ResourceRequest ///////////////////////////////////////////// /** Get Resource Index */ int getResIdx() { return resIdx; } - /** Get Slot Number */ int getSlot() { return slotNum; } @@ -378,6 +379,7 @@ class ResourceRequest protected: /** Resource Identification */ + bool valid; ThreadID tid; int stageNum; int resIdx; -- cgit v1.2.3 From d64226750ef9b2ac85c116f90cdfdb2a755b32d4 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:28:30 -0500 Subject: inorder: remove request map, use request vector take away all instances of reqMap in the code and make all references use the built-in request vectors inside of each resource. The request map was dynamically allocating a request per instruction. The request vector just allocates N number of requests during instantiation and then the surrounding code is fixed up to reuse those N requests *** setRequest() and clearRequest() are the new accessors needed to define a new request in a resource --- src/cpu/inorder/cpu.cc | 7 +- src/cpu/inorder/pipeline_stage.cc | 1 + src/cpu/inorder/resource.cc | 153 +++++++++----------------- src/cpu/inorder/resource.hh | 21 ++-- src/cpu/inorder/resources/agen_unit.cc | 4 +- src/cpu/inorder/resources/branch_predictor.cc | 2 +- src/cpu/inorder/resources/cache_unit.cc | 69 ++++-------- src/cpu/inorder/resources/cache_unit.hh | 43 ++++++-- src/cpu/inorder/resources/decode_unit.cc | 4 +- src/cpu/inorder/resources/execution_unit.cc | 4 +- src/cpu/inorder/resources/fetch_seq_unit.cc | 6 +- src/cpu/inorder/resources/fetch_unit.cc | 29 ++--- src/cpu/inorder/resources/graduation_unit.cc | 4 +- src/cpu/inorder/resources/inst_buffer.cc | 2 +- src/cpu/inorder/resources/mult_div_unit.cc | 18 +-- src/cpu/inorder/resources/tlb_unit.cc | 53 ++++----- src/cpu/inorder/resources/tlb_unit.hh | 15 ++- src/cpu/inorder/resources/use_def.cc | 45 +++----- src/cpu/inorder/resources/use_def.hh | 16 ++- 19 files changed, 214 insertions(+), 282 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 56b27e0d0..196682621 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -324,12 +324,7 @@ InOrderCPU::InOrderCPU(Params *params) tid, asid[tid]); - dummyReq[tid] = new ResourceRequest(resPool->getResource(0), - dummyInst[tid], - 0, - 0, - 0, - 0); + dummyReq[tid] = new ResourceRequest(resPool->getResource(0)); } dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0); diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 5de963b0e..610b6f264 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -938,6 +938,7 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) "\n", tid, inst->seqNum, cpu->resPool->name(res_num)); ResReqPtr req = cpu->resPool->request(res_num, inst); + assert(req->valid); if (req->isCompleted()) { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s " diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index e6a0db021..757c17e6a 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -43,7 +43,7 @@ Resource::Resource(string res_name, int res_id, int res_width, reqs.resize(width); // Use to deny a instruction a resource. - deniedReq = new ResourceRequest(this, NULL, 0, 0, 0, 0); + deniedReq = new ResourceRequest(this); } Resource::~Resource() @@ -60,7 +60,7 @@ Resource::init() resourceEvent = new ResourceEvent[width]; for (int i = 0; i < width; i++) { - reqs[i] = new ResourceRequest(this, NULL, 0, 0, 0, 0); + reqs[i] = new ResourceRequest(this); } initSlots(); @@ -100,39 +100,28 @@ Resource::freeSlot(int slot_idx) // Put slot number on this resource's free list availSlots.push_back(slot_idx); - // Erase Request Pointer From Request Map - std::map::iterator req_it = reqMap.find(slot_idx); - - assert(req_it != reqMap.end()); - reqMap.erase(req_it); - + // Invalidate Request & Reset it's flags + reqs[slot_idx]->clearRequest(); } -// TODO: More efficiently search for instruction's slot within -// resource. int Resource::findSlot(DynInstPtr inst) { - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - int slot_num = -1; - while (map_it != map_end) { - if ((*map_it).second->getInst()->seqNum == - inst->seqNum) { - slot_num = (*map_it).second->getSlot(); + for (int i = 0; i < width; i++) { + if (reqs[i]->valid && + reqs[i]->getInst()->seqNum == inst->seqNum) { + slot_num = reqs[i]->getSlot(); } - map_it++; } - return slot_num; } int Resource::getSlot(DynInstPtr inst) { - int slot_num; + int slot_num = -1; if (slotsAvail() != 0) { slot_num = availSlots[0]; @@ -142,24 +131,6 @@ Resource::getSlot(DynInstPtr inst) assert(slot_num == *vect_it); availSlots.erase(vect_it); - } else { - DPRINTF(Resource, "[tid:%i]: No slots in resource " - "available to service [sn:%i].\n", inst->readTid(), - inst->seqNum); - slot_num = -1; - - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - if ((*map_it).second) { - DPRINTF(Resource, "Currently Serving request from: " - "[tid:%i] [sn:%i].\n", - (*map_it).second->getInst()->readTid(), - (*map_it).second->getInst()->seqNum); - } - map_it++; - } } return slot_num; @@ -206,7 +177,7 @@ Resource::request(DynInstPtr inst) inst->readTid()); } - reqMap[slot_num] = inst_req; + reqs[slot_num] = inst_req; try_request = true; } @@ -242,32 +213,21 @@ ResReqPtr Resource::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - return new ResourceRequest(this, inst, stage_num, id, slot_num, - cmd); + reqs[slot_num]->setRequest(inst, stage_num, id, slot_num, cmd); + return reqs[slot_num]; } ResReqPtr Resource::findRequest(DynInstPtr inst) { - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - bool found = false; - ResReqPtr req = NULL; - - while (map_it != map_end) { - if ((*map_it).second && - (*map_it).second->getInst() == inst) { - req = (*map_it).second; - //return (*map_it).second; - assert(found == false); - found = true; + for (int i = 0; i < width; i++) { + if (reqs[i]->valid && + reqs[i]->getInst() == inst) { + return reqs[i]; } - map_it++; } - return req; - //return NULL; + return NULL; } void @@ -281,9 +241,9 @@ void Resource::execute(int slot_idx) { DPRINTF(Resource, "[tid:%i]: Executing %s resource.\n", - reqMap[slot_idx]->getTid(), name()); - reqMap[slot_idx]->setCompleted(true); - reqMap[slot_idx]->done(); + reqs[slot_idx]->getTid(), name()); + reqs[slot_idx]->setCompleted(true); + reqs[slot_idx]->done(); } void @@ -299,15 +259,10 @@ void Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { - std::vector slot_remove_list; - - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -322,16 +277,8 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, if (resourceEvent[req_slot_num].scheduled()) unscheduleEvent(req_slot_num); - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } - - map_it++; - } - - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) { - freeSlot(slot_remove_list[i]); } } @@ -366,8 +313,8 @@ void Resource::scheduleEvent(int slot_idx, int delay) { DPRINTF(Resource, "[tid:%i]: Scheduling event for [sn:%i] on tick %i.\n", - reqMap[slot_idx]->inst->readTid(), - reqMap[slot_idx]->inst->seqNum, + reqs[slot_idx]->inst->readTid(), + reqs[slot_idx]->inst->seqNum, cpu->ticks(delay) + curTick()); resourceEvent[slot_idx].scheduleEvent(delay); } @@ -404,32 +351,10 @@ int ResourceRequest::resReqID = 0; int ResourceRequest::maxReqCount = 0; -ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, - int stage_num, int res_idx, int slot_num, - unsigned _cmd) - : res(_res), inst(_inst), cmd(_cmd), valid(false), stageNum(stage_num), - resIdx(res_idx), slotNum(slot_num), completed(false), - squashed(false), processing(false), memStall(false) +ResourceRequest::ResourceRequest(Resource *_res) + : res(_res), inst(NULL), stagePasses(0), valid(false), complSlotNum(-1), + completed(false), squashed(false), processing(false), memStall(false) { -#ifdef DEBUG - reqID = resReqID++; - res->cpu->resReqCount++; - DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, - res->cpu->resReqCount); - - if (res->cpu->resReqCount > 100) { - fatal("Too many undeleted resource requests. Memory leak?\n"); - } - - if (res->cpu->resReqCount > maxReqCount) { - maxReqCount = res->cpu->resReqCount; - } - -#endif - - stagePasses = 0; - complSlotNum = -1; - } ResourceRequest::~ResourceRequest() @@ -441,6 +366,26 @@ ResourceRequest::~ResourceRequest() #endif } +void +ResourceRequest::setRequest(DynInstPtr _inst, int stage_num, + int res_idx, int slot_num, unsigned _cmd) +{ + valid = true; + inst = _inst; + stageNum = stage_num; + resIdx = res_idx; + slotNum = slot_num; + cmd = _cmd; +} + +void +ResourceRequest::clearRequest() +{ + valid = false; + inst = NULL; + stagePasses = 0; +} + void ResourceRequest::done(bool completed) { diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index c02fe4014..c40314c94 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -221,9 +221,9 @@ class Resource { const int latency; public: - /** Mapping of slot-numbers to the resource-request pointers */ - std::map reqMap; - + /** List of all Requests the Resource is Servicing. Each request + represents part of the resource's bandwidth + */ std::vector reqs; /** A list of all the available execution slots for this resource. @@ -302,19 +302,21 @@ class ResourceRequest friend class Resource; public: - ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num, - int res_idx, int slot_num, unsigned _cmd); + ResourceRequest(Resource *_res); virtual ~ResourceRequest(); int reqID; + virtual void setRequest(DynInstPtr _inst, int stage_num, + int res_idx, int slot_num, unsigned _cmd); + + virtual void clearRequest(); + /** Acknowledge that this is a request is done and remove * from resource. */ void done(bool completed = true); - - short stagePasses; ///////////////////////////////////////////// // @@ -356,6 +358,10 @@ class ResourceRequest /** Command For This Resource */ unsigned cmd; + short stagePasses; + + bool valid; + //////////////////////////////////////// // // GET RESOURCE REQUEST STATUS FROM VARIABLES @@ -379,7 +385,6 @@ class ResourceRequest protected: /** Resource Identification */ - bool valid; ThreadID tid; int stageNum; int resIdx; diff --git a/src/cpu/inorder/resources/agen_unit.cc b/src/cpu/inorder/resources/agen_unit.cc index f1862b94a..764cd9446 100644 --- a/src/cpu/inorder/resources/agen_unit.cc +++ b/src/cpu/inorder/resources/agen_unit.cc @@ -50,8 +50,8 @@ AGENUnit::regStats() void AGENUnit::execute(int slot_num) { - ResourceRequest* agen_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* agen_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; #if TRACING_ON ThreadID tid = inst->readTid(); #endif diff --git a/src/cpu/inorder/resources/branch_predictor.cc b/src/cpu/inorder/resources/branch_predictor.cc index 8ca5a9718..5a22e40eb 100644 --- a/src/cpu/inorder/resources/branch_predictor.cc +++ b/src/cpu/inorder/resources/branch_predictor.cc @@ -66,7 +66,7 @@ BranchPredictor::execute(int slot_num) { // After this is working, change this to a reinterpret cast // for performance considerations - ResourceRequest* bpred_req = reqMap[slot_num]; + ResourceRequest* bpred_req = reqs[slot_num]; DynInstPtr inst = bpred_req->inst; ThreadID tid = inst->readTid(); int seq_num = inst->seqNum; diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 11902f51e..4deb32992 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -134,8 +134,7 @@ void CacheUnit::init() { for (int i = 0; i < width; i++) { - reqs[i] = new CacheRequest(this, NULL, 0, 0, 0, 0, 0, - MemCmd::Command(0), 0, 0, 0); + reqs[i] = new CacheRequest(this); } // Currently Used to Model TLB Latency. Eventually @@ -255,20 +254,16 @@ CacheUnit::removeAddrDependency(DynInstPtr inst) ResReqPtr CacheUnit::findRequest(DynInstPtr inst) { - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { + for (int i = 0; i < width; i++) { CacheRequest* cache_req = - dynamic_cast((*map_it).second); + dynamic_cast(reqs[i]); assert(cache_req); - if (cache_req && + if (cache_req->valid && cache_req->getInst() == inst && cache_req->instIdx == inst->curSkedEntry->idx) { return cache_req; } - map_it++; } return NULL; @@ -277,20 +272,16 @@ CacheUnit::findRequest(DynInstPtr inst) ResReqPtr CacheUnit::findRequest(DynInstPtr inst, int idx) { - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { + for (int i = 0; i < width; i++) { CacheRequest* cache_req = - dynamic_cast((*map_it).second); + dynamic_cast(reqs[i]); assert(cache_req); - if (cache_req && + if (cache_req->valid && cache_req->getInst() == inst && cache_req->instIdx == idx) { return cache_req; } - map_it++; } return NULL; @@ -302,6 +293,7 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { ScheduleEntry* sched_entry = *inst->curSkedEntry; + CacheRequest* cache_req = dynamic_cast(reqs[slot_num]); if (!inst->validMemAddr()) { panic("Mem. Addr. must be set before requesting cache access\n"); @@ -348,10 +340,10 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, sched_entry->cmd, name()); } - return new CacheRequest(this, inst, stage_num, id, slot_num, - sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId(), - inst->curSkedEntry->idx); + cache_req->setRequest(inst, stage_num, id, slot_num, + sched_entry->cmd, pkt_cmd, + inst->curSkedEntry->idx); + return cache_req; } void @@ -672,7 +664,7 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, void CacheUnit::execute(int slot_num) { - CacheReqPtr cache_req = dynamic_cast(reqMap[slot_num]); + CacheReqPtr cache_req = dynamic_cast(reqs[slot_num]); assert(cache_req); if (cachePortBlocked) { @@ -813,7 +805,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr cache_req; if (split_req == NULL) { - cache_req = dynamic_cast(reqMap[inst->getCurResSlot()]); + cache_req = dynamic_cast(reqs[inst->getCurResSlot()]); } else{ cache_req = split_req; } @@ -1064,10 +1056,10 @@ CacheUnitEvent::CacheUnitEvent() void CacheUnitEvent::process() { - DynInstPtr inst = resource->reqMap[slotIdx]->inst; - int stage_num = resource->reqMap[slotIdx]->getStageNum(); + DynInstPtr inst = resource->reqs[slotIdx]->inst; + int stage_num = resource->reqs[slotIdx]->getStageNum(); ThreadID tid = inst->threadNumber; - CacheReqPtr req_ptr = dynamic_cast(resource->reqMap[slotIdx]); + CacheReqPtr req_ptr = dynamic_cast(resource->reqs[slotIdx]); DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n", inst->seqNum); @@ -1078,7 +1070,7 @@ CacheUnitEvent::process() tlb_res->tlbBlocked[tid] = false; tlb_res->cpu->pipelineStage[stage_num]-> - unsetResStall(tlb_res->reqMap[slotIdx], tid); + unsetResStall(tlb_res->reqs[slotIdx], tid); req_ptr->tlbStall = false; @@ -1129,15 +1121,10 @@ void CacheUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { - vector slot_remove_list; - - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -1150,7 +1137,6 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, "squashed, ignoring squash process.\n", req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); - map_it++; continue; } @@ -1164,15 +1150,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, if (cache_req->tlbStall) { tlbBlocked[tid] = false; - int stall_stage = reqMap[req_slot_num]->getStageNum(); + int stall_stage = reqs[req_slot_num]->getStageNum(); cpu->pipelineStage[stall_stage]-> - unsetResStall(reqMap[req_slot_num], tid); + unsetResStall(reqs[req_slot_num], tid); } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } else { DPRINTF(InOrderCachePort, "[tid:%i] Request from [sn:%i] squashed, but still " @@ -1184,14 +1169,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum, req_ptr->getInst()->splitInst); } - } - - map_it++; } - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) - freeSlot(slot_remove_list[i]); } diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index afcb36a24..b5effb2c3 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -219,20 +219,18 @@ class CacheUnitEvent : public ResourceEvent { void process(); }; +//@todo: Move into CacheUnit Class for private access to "valid" field class CacheRequest : public ResourceRequest { public: - CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx, - int slot_num, unsigned cmd, int req_size, - MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx) - : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd), - pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL), - retryPkt(NULL), memAccComplete(false), memAccPending(false), - tlbStall(false), splitAccess(false), splitAccessNum(-1), - split2ndAccess(false), instIdx(idx), fetchBufferFill(false) + CacheRequest(CacheUnit *cres) + : ResourceRequest(cres), memReq(NULL), reqData(NULL), + dataPkt(NULL), retryPkt(NULL), memAccComplete(false), + memAccPending(false), tlbStall(false), splitAccess(false), + splitAccessNum(-1), split2ndAccess(false), + fetchBufferFill(false) { } - virtual ~CacheRequest() { if (reqData && !splitAccess) { @@ -240,6 +238,33 @@ class CacheRequest : public ResourceRequest } } + void setRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, + unsigned _cmd, MemCmd::Command pkt_cmd, int idx) + { + pktCmd = pkt_cmd; + instIdx = idx; + + ResourceRequest::setRequest(_inst, stage_num, res_idx, slot_num, _cmd); + } + + void clearRequest() + { + memReq = NULL; + reqData = NULL; + dataPkt = NULL; + retryPkt = NULL; + memAccComplete = false; + memAccPending = false; + tlbStall = false; + splitAccess = false; + splitAccessNum = -1; + split2ndAccess = false; + instIdx = 0; + fetchBufferFill = false; + + ResourceRequest::clearRequest(); + } + virtual PacketDataPtr getData() { return reqData; } diff --git a/src/cpu/inorder/resources/decode_unit.cc b/src/cpu/inorder/resources/decode_unit.cc index 42857c783..71d33ab90 100644 --- a/src/cpu/inorder/resources/decode_unit.cc +++ b/src/cpu/inorder/resources/decode_unit.cc @@ -49,8 +49,8 @@ DecodeUnit::DecodeUnit(std::string res_name, int res_id, int res_width, void DecodeUnit::execute(int slot_num) { - ResourceRequest* decode_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* decode_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; ThreadID tid = inst->readTid(); switch (decode_req->cmd) diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index 36bf2a4dc..f715d6071 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -82,8 +82,8 @@ ExecutionUnit::regStats() void ExecutionUnit::execute(int slot_num) { - ResourceRequest* exec_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* exec_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; Fault fault = NoFault; int seq_num = inst->seqNum; diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 6f84a333d..08d162b07 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -62,13 +62,17 @@ FetchSeqUnit::init() { resourceEvent = new FetchSeqEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new ResourceRequest(this); + } + initSlots(); } void FetchSeqUnit::execute(int slot_num) { - ResourceRequest* fs_req = reqMap[slot_num]; + ResourceRequest* fs_req = reqs[slot_num]; DynInstPtr inst = fs_req->inst; ThreadID tid = inst->readTid(); int stage_num = fs_req->getStageNum(); diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc index 0a5483aff..dd875efdb 100644 --- a/src/cpu/inorder/resources/fetch_unit.cc +++ b/src/cpu/inorder/resources/fetch_unit.cc @@ -119,32 +119,23 @@ FetchUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { ScheduleEntry* sched_entry = *inst->curSkedEntry; + CacheRequest* cache_req = dynamic_cast(reqs[slot_num]); if (!inst->validMemAddr()) { panic("Mem. Addr. must be set before requesting cache access\n"); } - MemCmd::Command pkt_cmd; + assert(sched_entry->cmd == InitiateFetch); - switch (sched_entry->cmd) - { - case InitiateFetch: - pkt_cmd = MemCmd::ReadReq; - - DPRINTF(InOrderCachePort, - "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n", - inst->readTid(), inst->seqNum, inst->getMemAddr()); - break; + DPRINTF(InOrderCachePort, + "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->getMemAddr()); - default: - panic("%i: Unexpected request type (%i) to %s", curTick(), - sched_entry->cmd, name()); - } + cache_req->setRequest(inst, stage_num, id, slot_num, + sched_entry->cmd, MemCmd::ReadReq, + inst->curSkedEntry->idx); - return new CacheRequest(this, inst, stage_num, id, slot_num, - sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId(), - inst->curSkedEntry->idx); + return cache_req; } void @@ -214,7 +205,7 @@ FetchUnit::markBlockUsed(std::list::iterator block_it) void FetchUnit::execute(int slot_num) { - CacheReqPtr cache_req = dynamic_cast(reqMap[slot_num]); + CacheReqPtr cache_req = dynamic_cast(reqs[slot_num]); assert(cache_req); if (cachePortBlocked) { diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 362641b54..68adcbd1d 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -49,8 +49,8 @@ GraduationUnit::GraduationUnit(std::string res_name, int res_id, int res_width, void GraduationUnit::execute(int slot_num) { - ResourceRequest* grad_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* grad_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; ThreadID tid = inst->readTid(); int stage_num = inst->curSkedEntry->stageNum; diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc index 988fcd4da..46f5cce72 100644 --- a/src/cpu/inorder/resources/inst_buffer.cc +++ b/src/cpu/inorder/resources/inst_buffer.cc @@ -62,7 +62,7 @@ InstBuffer::regStats() void InstBuffer::execute(int slot_idx) { - ResReqPtr ib_req = reqMap[slot_idx]; + ResReqPtr ib_req = reqs[slot_idx]; DynInstPtr inst = ib_req->inst; ThreadID tid = inst->readTid(); int stage_num = ib_req->getStageNum(); diff --git a/src/cpu/inorder/resources/mult_div_unit.cc b/src/cpu/inorder/resources/mult_div_unit.cc index 042fb590b..ad8b2b47b 100644 --- a/src/cpu/inorder/resources/mult_div_unit.cc +++ b/src/cpu/inorder/resources/mult_div_unit.cc @@ -76,6 +76,10 @@ MultDivUnit::init() // Set Up Resource Events to Appropriate Resource BandWidth resourceEvent = new MDUEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new ResourceRequest(this); + } + initSlots(); } @@ -92,7 +96,7 @@ void MultDivUnit::freeSlot(int slot_idx) { DPRINTF(InOrderMDU, "Freeing slot for inst:%i\n | slots-free:%i | " - "slots-used:%i\n", reqMap[slot_idx]->getInst()->seqNum, + "slots-used:%i\n", reqs[slot_idx]->getInst()->seqNum, slotsAvail(), slotsInUse()); Resource::freeSlot(slot_idx); @@ -132,7 +136,7 @@ MultDivUnit::getSlot(DynInstPtr inst) // If we have this instruction's request already then return if (slot_num != -1 && - inst->curSkedEntry->cmd == reqMap[slot_num]->cmd) + inst->curSkedEntry->cmd == reqs[slot_num]->cmd) return slot_num; unsigned repeat_rate = 0; @@ -202,8 +206,8 @@ MultDivUnit::getDivOpSize(DynInstPtr inst) void MultDivUnit::execute(int slot_num) { - ResourceRequest* mult_div_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* mult_div_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; switch (mult_div_req->cmd) { @@ -275,8 +279,8 @@ MultDivUnit::execute(int slot_num) void MultDivUnit::exeMulDiv(int slot_num) { - ResourceRequest* mult_div_req = reqMap[slot_num]; - DynInstPtr inst = reqMap[slot_num]->inst; + ResourceRequest* mult_div_req = reqs[slot_num]; + DynInstPtr inst = reqs[slot_num]->inst; inst->fault = inst->execute(); @@ -310,7 +314,7 @@ MDUEvent::process() mdu_res->exeMulDiv(slotIdx); - ResourceRequest* mult_div_req = resource->reqMap[slotIdx]; + ResourceRequest* mult_div_req = resource->reqs[slotIdx]; mult_div_req->done(); } diff --git a/src/cpu/inorder/resources/tlb_unit.cc b/src/cpu/inorder/resources/tlb_unit.cc index 0b273eabc..37aec2209 100644 --- a/src/cpu/inorder/resources/tlb_unit.cc +++ b/src/cpu/inorder/resources/tlb_unit.cc @@ -72,6 +72,10 @@ TLBUnit::init() { resourceEvent = new TLBUnitEvent[width]; + for (int i = 0; i < width; i++) { + reqs[i] = new TLBUnitRequest(this); + } + initSlots(); } @@ -90,8 +94,9 @@ TLBUnit::getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - return new TLBUnitRequest(this, _inst, stage_num, res_idx, slot_num, - cmd); + TLBUnitRequest *tlb_req = dynamic_cast(reqs[slot_num]); + tlb_req->setRequest(inst, stage_num, id, slot_num, cmd); + return ud_req; } void @@ -99,7 +104,7 @@ TLBUnit::execute(int slot_idx) { // After this is working, change this to a reinterpret cast // for performance considerations - TLBUnitRequest* tlb_req = dynamic_cast(reqMap[slot_idx]); + TLBUnitRequest* tlb_req = dynamic_cast(reqs[slot_idx]); assert(tlb_req != 0x0); DynInstPtr inst = tlb_req->inst; @@ -200,8 +205,8 @@ TLBUnitEvent::TLBUnitEvent() void TLBUnitEvent::process() { - DynInstPtr inst = resource->reqMap[slotIdx]->inst; - int stage_num = resource->reqMap[slotIdx]->getStageNum(); + DynInstPtr inst = resource->reqs[slotIdx]->inst; + int stage_num = resource->reqs[slotIdx]->getStageNum(); ThreadID tid = inst->threadNumber; DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n", @@ -212,31 +217,18 @@ TLBUnitEvent::process() tlb_res->tlbBlocked[tid] = false; - tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid); - - // Effectively NOP the instruction but still allow it - // to commit - //while (!inst->resSched.empty() && - // inst->curSkedEntry->stageNum != ThePipeline::NumStages - 1) { - //inst->resSched.pop(); - //} + tlb_res->cpu->pipelineStage[stage_num]-> + unsetResStall(tlb_res->reqs[slotIdx], tid); } void TLBUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { - //@TODO: Figure out a way to consolidate common parts - // of this squash code - std::vector slot_remove_list; - - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; - - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -250,23 +242,16 @@ TLBUnit::squash(DynInstPtr inst, int stage_num, tlbBlocked[tid] = false; - int stall_stage = reqMap[req_slot_num]->getStageNum(); + int stall_stage = reqs[req_slot_num]->getStageNum(); - cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid); + cpu->pipelineStage[stall_stage]-> + unsetResStall(reqs[req_slot_num], tid); if (resourceEvent[req_slot_num].scheduled()) unscheduleEvent(req_slot_num); - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); + freeSlot(req_slot_num); } - - map_it++; - } - - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) { - freeSlot(slot_remove_list[i]); } } diff --git a/src/cpu/inorder/resources/tlb_unit.hh b/src/cpu/inorder/resources/tlb_unit.hh index eb1bf55f0..904ac3eba 100644 --- a/src/cpu/inorder/resources/tlb_unit.hh +++ b/src/cpu/inorder/resources/tlb_unit.hh @@ -99,9 +99,15 @@ class TLBUnitRequest : public ResourceRequest { typedef ThePipeline::DynInstPtr DynInstPtr; public: - TLBUnitRequest(TLBUnit *res, DynInstPtr inst, int stage_num, int res_idx, int slot_num, - unsigned _cmd) - : ResourceRequest(res, inst, stage_num, res_idx, slot_num, _cmd) + TLBUnitRequest(TLBUnit *res) + : ResourceRequest(res), memReq(NULL) + { + } + + RequestPtr memReq; + + void setRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, + unsigned _cmd) { Addr aligned_addr; int req_size; @@ -131,9 +137,10 @@ class TLBUnitRequest : public ResourceRequest { inst->readTid()); memReq = inst->dataMemReq; } + + ResourceRequest::setRequest(inst, stage_num, res_idx, slot_num, _cmd); } - RequestPtr memReq; }; diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index a37e459a8..85bf14500 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -95,7 +95,7 @@ UseDefUnit::init() resourceEvent = new ResourceEvent[width]; for (int i = 0; i < width; i++) { - reqs[i] = new UseDefRequest(this, NULL, 0, 0, 0, 0, 0); + reqs[i] = new UseDefRequest(this); } initSlots(); @@ -105,29 +105,27 @@ ResReqPtr UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) { - return new UseDefRequest(this, inst, stage_num, id, slot_num, cmd, - inst->curSkedEntry->idx); + UseDefRequest *ud_req = dynamic_cast(reqs[slot_num]); + ud_req->setRequest(inst, stage_num, id, slot_num, cmd, + inst->curSkedEntry->idx); + return ud_req; } ResReqPtr UseDefUnit::findRequest(DynInstPtr inst) { - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - UseDefRequest* ud_req = - dynamic_cast((*map_it).second); + for (int i = 0; i < width; i++) { + UseDefRequest* ud_req = + dynamic_cast(reqs[i]); assert(ud_req); - if (ud_req && + if (ud_req->valid && ud_req->getInst() == inst && ud_req->cmd == inst->curSkedEntry->cmd && ud_req->useDefIdx == inst->curSkedEntry->idx) { return ud_req; } - map_it++; } return NULL; @@ -138,7 +136,7 @@ UseDefUnit::execute(int slot_idx) { // After this is working, change this to a reinterpret cast // for performance considerations - UseDefRequest* ud_req = dynamic_cast(reqMap[slot_idx]); + UseDefRequest* ud_req = dynamic_cast(reqs[slot_idx]); assert(ud_req); DynInstPtr inst = ud_req->inst; @@ -421,15 +419,10 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, DPRINTF(InOrderUseDef, "[tid:%i]: Updating Due To Squash After [sn:%i].\n", tid, squash_seq_num); - std::vector slot_remove_list; - - map::iterator map_it = reqMap.begin(); - map::iterator map_end = reqMap.end(); - - while (map_it != map_end) { - ResReqPtr req_ptr = (*map_it).second; + for (int i = 0; i < width; i++) { + ResReqPtr req_ptr = reqs[i]; - if (req_ptr && + if (req_ptr->valid && req_ptr->getInst()->readTid() == tid && req_ptr->getInst()->seqNum > squash_seq_num) { @@ -444,17 +437,9 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, unscheduleEvent(req_slot_num); } - - // Mark slot for removal from resource - slot_remove_list.push_back(req_ptr->getSlot()); - } - map_it++; - } - - // Now Delete Slot Entry from Req. Map - for (int i = 0; i < slot_remove_list.size(); i++) { - freeSlot(slot_remove_list[i]); + freeSlot(req_slot_num); + } } if (outReadSeqNum[tid] >= squash_seq_num) { diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh index 6db8ed987..21770cec6 100644 --- a/src/cpu/inorder/resources/use_def.hh +++ b/src/cpu/inorder/resources/use_def.hh @@ -98,14 +98,20 @@ class UseDefUnit : public Resource { typedef ThePipeline::DynInstPtr DynInstPtr; public: - UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, - int res_idx, int slot_num, unsigned cmd, - int use_def_idx) - : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd), - useDefIdx(use_def_idx) + UseDefRequest(UseDefUnit *res) + : ResourceRequest(res) { } int useDefIdx; + + void setRequest(DynInstPtr _inst, int stage_num, int res_idx, + int slot_num, unsigned _cmd, int idx) + { + useDefIdx = idx; + + ResourceRequest::setRequest(_inst, stage_num, res_idx, slot_num, + _cmd); + } }; protected: -- cgit v1.2.3 From d5961b2b20b498db28c0598f4344f5cb31be850f Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:28:37 -0500 Subject: inorder: update pipeline interface for handling finished resource reqs formerly, to free up bandwidth in a resource, we could just change the pointer in that resource but at the same time the pipeline stages had visibility to see what happened to a resource request. Now that we are recycling these requests (to avoid too much dynamic allocation), we can't throw away the request too early or the pipeline stage gets bad information. Instead, mark when a request is done with the resource all together and then let the pipeline stage call back to the resource that it's time to free up the bandwidth for more instructions *** inteface notes *** - When an instruction completes and is done in a resource for that cycle, call done() - When an instruction fails and is done with a resource for that cycle, call done(false) - When an instruction completes, but isnt finished with a resource, call completed() - When an instruction fails, but isnt finished with a resource, call completed(false) * * * inorder: tlbmiss wakeup bug fix --- src/cpu/inorder/pipeline_stage.cc | 20 ++++++++++++--- src/cpu/inorder/resource.cc | 38 +++++++++++++++++++++-------- src/cpu/inorder/resource.hh | 4 +++ src/cpu/inorder/resources/cache_unit.cc | 26 ++++++++++++++------ src/cpu/inorder/resources/fetch_seq_unit.cc | 2 +- src/cpu/inorder/resources/fetch_unit.cc | 7 +++--- 6 files changed, 71 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 610b6f264..15f143251 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -940,7 +940,9 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) ResReqPtr req = cpu->resPool->request(res_num, inst); assert(req->valid); - if (req->isCompleted()) { + bool req_completed = req->isCompleted(); + bool done_in_pipeline = false; + if (req_completed) { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s " "completed.\n", tid, inst->seqNum, cpu->resPool->name(res_num)); @@ -949,11 +951,10 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) req->stagePasses++; - bool done_in_pipeline = inst->finishSkedEntry(); + done_in_pipeline = inst->finishSkedEntry(); if (done_in_pipeline) { DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] finished " "in pipeline.\n", tid, inst->seqNum); - break; } } else { DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed." @@ -990,7 +991,18 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed) "thread due to cache miss.\n"); cpu->activateNextReadyContext(); } - + } + + // If this request is no longer needs to take up bandwidth in the + // resource, go ahead and free that bandwidth up + if (req->doneInResource) { + req->freeSlot(); + } + + // No longer need to process this instruction if the last + // request it had wasn't completed or if there is nothing + // else for it to do in the pipeline + if (done_in_pipeline || !req_completed) { break; } diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 757c17e6a..076084d16 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -44,6 +44,7 @@ Resource::Resource(string res_name, int res_id, int res_width, // Use to deny a instruction a resource. deniedReq = new ResourceRequest(this); + deniedReq->valid = true; } Resource::~Resource() @@ -97,6 +98,9 @@ Resource::slotsInUse() void Resource::freeSlot(int slot_idx) { + DPRINTF(Resource, "Deallocating [slot:%i].\n", + slot_idx); + // Put slot number on this resource's free list availSlots.push_back(slot_idx); @@ -160,6 +164,9 @@ Resource::request(DynInstPtr inst) slot_num = getSlot(inst); if (slot_num != -1) { + DPRINTF(Resource, "Allocating [slot:%i] for [tid:%i]: [sn:%i]\n", + slot_num, inst->readTid(), inst->seqNum); + // Get Stage # from Schedule Entry stage_num = inst->curSkedEntry->stageNum; unsigned cmd = inst->curSkedEntry->cmd; @@ -177,10 +184,12 @@ Resource::request(DynInstPtr inst) inst->readTid()); } - reqs[slot_num] = inst_req; - try_request = true; + } else { + DPRINTF(Resource, "No slot available for [tid:%i]: [sn:%i]\n", + inst->readTid(), inst->seqNum); } + } if (try_request) { @@ -352,8 +361,9 @@ int ResourceRequest::resReqID = 0; int ResourceRequest::maxReqCount = 0; ResourceRequest::ResourceRequest(Resource *_res) - : res(_res), inst(NULL), stagePasses(0), valid(false), complSlotNum(-1), - completed(false), squashed(false), processing(false), memStall(false) + : res(_res), inst(NULL), stagePasses(0), valid(false), doneInResource(false), + complSlotNum(-1), completed(false), squashed(false), processing(false), + memStall(false) { } @@ -384,6 +394,19 @@ ResourceRequest::clearRequest() valid = false; inst = NULL; stagePasses = 0; + completed = false; + doneInResource = false; + squashed = false; + memStall = false; +} + +void +ResourceRequest::freeSlot() +{ + assert(res); + + // Free Slot So Another Instruction Can Use This Resource + res->freeSlot(slotNum); } void @@ -399,13 +422,8 @@ ResourceRequest::done(bool completed) if (completed) { complSlotNum = slotNum; } - - // Free Slot So Another Instruction Can Use This Resource - res->freeSlot(slotNum); - // change slot # to -1, since we check slotNum to see if request is - // still valid - slotNum = -1; + doneInResource = true; } ResourceEvent::ResourceEvent() diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index c40314c94..fc16df099 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -318,6 +318,8 @@ class ResourceRequest */ void done(bool completed = true); + void freeSlot(); + ///////////////////////////////////////////// // // GET RESOURCE REQUEST IDENTIFICATION / INFO @@ -362,6 +364,8 @@ class ResourceRequest bool valid; + bool doneInResource; + //////////////////////////////////////// // // GET RESOURCE REQUEST STATUS FROM VARIABLES diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index 4deb32992..a734f1ebd 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -667,13 +667,16 @@ CacheUnit::execute(int slot_num) CacheReqPtr cache_req = dynamic_cast(reqs[slot_num]); assert(cache_req); - if (cachePortBlocked) { + if (cachePortBlocked && + (cache_req->cmd == InitiateReadData || + cache_req->cmd == InitiateWriteData || + cache_req->cmd == InitSecondSplitRead || + cache_req->cmd == InitSecondSplitWrite)) { DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); - cache_req->setCompleted(false); + cache_req->done(false); return; } - DynInstPtr inst = cache_req->inst; #if TRACING_ON ThreadID tid = inst->readTid(); @@ -690,7 +693,12 @@ CacheUnit::execute(int slot_num) acc_type = "read"; #endif case InitiateWriteData: - + if (cachePortBlocked) { + DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); + cache_req->done(false); + return; + } + DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i] Initiating data %s access to %s for " "addr. %08p\n", tid, inst->seqNum, acc_type, name(), @@ -864,7 +872,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, "[tid:%i] [sn:%i] cannot access cache, because port " "is blocked. now waiting to retry request\n", tid, inst->seqNum); - cache_req->setCompleted(false); + cache_req->done(false); cachePortBlocked = true; } else { DPRINTF(InOrderCachePort, @@ -888,7 +896,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, // Make cache request again since access due to // inability to access DPRINTF(InOrderStall, "STALL: \n"); - cache_req->setCompleted(false); + cache_req->done(false); } } @@ -911,7 +919,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) cache_pkt->cacheReq->getTid(), cache_pkt->cacheReq->seqNum); - cache_pkt->cacheReq->done(); + cache_pkt->cacheReq->freeSlot(); delete cache_pkt; cpu->wakeCPU(); @@ -1075,8 +1083,10 @@ CacheUnitEvent::process() req_ptr->tlbStall = false; if (req_ptr->isSquashed()) { - req_ptr->done(); + req_ptr->freeSlot(); } + + tlb_res->cpu->wakeCPU(); } void diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 08d162b07..d23ea0a82 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -100,7 +100,7 @@ FetchSeqUnit::execute(int slot_num) fs_req->done(); } else { DPRINTF(InOrderStall, "STALL: [tid:%i]: NPC not valid\n", tid); - fs_req->setCompleted(false); + fs_req->done(false); } } break; diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc index dd875efdb..40f2f4ee4 100644 --- a/src/cpu/inorder/resources/fetch_unit.cc +++ b/src/cpu/inorder/resources/fetch_unit.cc @@ -208,9 +208,9 @@ FetchUnit::execute(int slot_num) CacheReqPtr cache_req = dynamic_cast(reqs[slot_num]); assert(cache_req); - if (cachePortBlocked) { + if (cachePortBlocked && cache_req->cmd == InitiateFetch) { DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); - cache_req->setCompleted(false); + cache_req->done(false); return; } @@ -261,7 +261,7 @@ FetchUnit::execute(int slot_num) // If not, block this request. if (pendingFetch.size() >= fetchBuffSize) { DPRINTF(InOrderCachePort, "No room available in fetch buffer.\n"); - cache_req->setCompleted(false); + cache_req->done(); return; } @@ -405,6 +405,7 @@ FetchUnit::processCacheCompletion(PacketPtr pkt) cache_pkt->cacheReq->seqNum); cache_pkt->cacheReq->done(); + cache_pkt->cacheReq->freeSlot(); delete cache_pkt; cpu->wakeCPU(); -- cgit v1.2.3 From 72b5233112a41cb879ca63866f9f0ecf8638dbfb Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:02 -0500 Subject: inorder: remove events for zero-cycle resources if a resource has a zero cycle latency (e.g. RegFile write), then dont allocate an event for it to use --- src/cpu/inorder/resource.cc | 20 ++++++++++++++++---- src/cpu/inorder/resources/use_def.cc | 6 +++++- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 076084d16..c371f8244 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -49,7 +49,10 @@ Resource::Resource(string res_name, int res_id, int res_width, Resource::~Resource() { - delete [] resourceEvent; + if (resourceEvent) { + delete [] resourceEvent; + } + delete deniedReq; } @@ -57,8 +60,14 @@ Resource::~Resource() void Resource::init() { - // Set Up Resource Events to Appropriate Resource BandWidth - resourceEvent = new ResourceEvent[width]; + // If the resource has a zero-cycle (no latency) + // function, then no reason to have events + // that will process them for the right tick + if (latency > 0) { + resourceEvent = new ResourceEvent[width]; + } else { + resourceEvent = NULL; + } for (int i = 0; i < width; i++) { reqs[i] = new ResourceRequest(this); @@ -73,7 +82,10 @@ Resource::initSlots() // Add available slot numbers for resource for (int slot_idx = 0; slot_idx < width; slot_idx++) { availSlots.push_back(slot_idx); - resourceEvent[slot_idx].init(this, slot_idx); + + if (resourceEvent) { + resourceEvent[slot_idx].init(this, slot_idx); + } } } diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 85bf14500..19246a30b 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -92,7 +92,11 @@ void UseDefUnit::init() { // Set Up Resource Events to Appropriate Resource BandWidth - resourceEvent = new ResourceEvent[width]; + if (latency > 0) { + resourceEvent = new ResourceEvent[width]; + } else { + resourceEvent = NULL; + } for (int i = 0; i < width; i++) { reqs[i] = new UseDefRequest(this); -- cgit v1.2.3 From 8b4b4a1ba50a6f422ab75ccf0fb09568f1805ce6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:17 -0500 Subject: inorder: fix cache/fetch unit memory leaks --- need to delete the cache request's data on clearRequest() now that we are recycling requests --- fetch unit needs to deallocate the fetch buffer blocks when they are replaced or squashed. --- src/cpu/inorder/resources/cache_unit.cc | 2 -- src/cpu/inorder/resources/cache_unit.hh | 4 ++++ src/cpu/inorder/resources/fetch_unit.cc | 31 +++++++++++++++++++++++++++++++ src/cpu/inorder/resources/fetch_unit.hh | 2 ++ 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index a734f1ebd..b17e5b3da 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -648,8 +648,6 @@ CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size, if (inst->fault == NoFault) { if (!cache_req->splitAccess) { - // Remove this line since storeData is saved in INST? - cache_req->reqData = new uint8_t[size]; doCacheAccess(inst, write_res); } else { doCacheAccess(inst, write_res, cache_req); diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index b5effb2c3..097b6fa7a 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -249,6 +249,10 @@ class CacheRequest : public ResourceRequest void clearRequest() { + if (reqData && !splitAccess) { + delete [] reqData; + } + memReq = NULL; reqData = NULL; dataPkt = NULL; diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc index 40f2f4ee4..a0d830ecf 100644 --- a/src/cpu/inorder/resources/fetch_unit.cc +++ b/src/cpu/inorder/resources/fetch_unit.cc @@ -56,6 +56,31 @@ FetchUnit::FetchUnit(string res_name, int res_id, int res_width, predecoder(NULL) { } +FetchUnit::~FetchUnit() +{ + std::list::iterator fetch_it = fetchBuffer.begin(); + std::list::iterator end_it = fetchBuffer.end(); + while (fetch_it != end_it) { + delete (*fetch_it)->block; + delete *fetch_it; + fetch_it++; + } + fetchBuffer.clear(); + + + std::list::iterator pend_it = pendingFetch.begin(); + std::list::iterator pend_end = pendingFetch.end(); + while (pend_it != pend_end) { + if ((*pend_it)->block) { + delete (*pend_it)->block; + } + + delete *pend_it; + pend_it++; + } + pendingFetch.clear(); +} + void FetchUnit::createMachInst(std::list::iterator fetch_it, DynInstPtr inst) @@ -328,6 +353,8 @@ FetchUnit::execute(int slot_num) return; } + delete [] (*repl_it)->block; + delete *repl_it; fetchBuffer.erase(repl_it); } @@ -506,6 +533,10 @@ FetchUnit::squashCacheRequest(CacheReqPtr req_ptr) DPRINTF(InOrderCachePort, "[sn:%i] Removing Pending Fetch " "for block %08p (cnt=%i)\n", inst->seqNum, block_addr, (*block_it)->cnt); + if ((*block_it)->block) { + delete [] (*block_it)->block; + } + delete *block_it; pendingFetch.erase(block_it); } } diff --git a/src/cpu/inorder/resources/fetch_unit.hh b/src/cpu/inorder/resources/fetch_unit.hh index 035f3f4a1..fa133b9eb 100644 --- a/src/cpu/inorder/resources/fetch_unit.hh +++ b/src/cpu/inorder/resources/fetch_unit.hh @@ -55,6 +55,8 @@ class FetchUnit : public CacheUnit FetchUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); + virtual ~FetchUnit(); + typedef ThePipeline::DynInstPtr DynInstPtr; typedef TheISA::ExtMachInst ExtMachInst; -- cgit v1.2.3 From 91c48b1c3ba6e46324b96fb76762a4d973ce6007 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:26 -0500 Subject: inorder: cleanup in destructors cleanup hanging pointers and other cruft in the destructors --- src/cpu/inorder/cpu.cc | 13 ++++++++++++- src/cpu/inorder/cpu.hh | 1 + src/cpu/inorder/pipeline_stage.cc | 20 ++++++++++++++------ src/cpu/inorder/pipeline_stage.hh | 15 +-------------- src/cpu/inorder/reg_dep_map.cc | 8 ++++++++ src/cpu/inorder/reg_dep_map.hh | 2 +- src/cpu/inorder/resource.cc | 7 ++++++- 7 files changed, 43 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 196682621..4130b9e14 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -356,6 +356,17 @@ InOrderCPU::InOrderCPU(Params *params) InOrderCPU::~InOrderCPU() { delete resPool; + + std::map::iterator sked_it = + skedCache.begin(); + std::map::iterator sked_end = + skedCache.end(); + + while (sked_it != sked_end) { + delete (*sked_it).second; + sked_it++; + } + skedCache.clear(); } std::map InOrderCPU::skedCache; @@ -460,7 +471,7 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) W.needs(Grad, GraduationUnit::GraduateInst); - // Insert Front Schedule into our cache of + // Insert Back Schedule into our cache of // resource schedules addToSkedCache(inst, res_sked); diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh index 073a0a982..2fa6bdc59 100644 --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -315,6 +315,7 @@ class InOrderCPU : public BaseCPU void addToSkedCache(DynInstPtr inst, ThePipeline::RSkedPtr inst_sked) { SkedID sked_id = genSkedID(inst); + assert(skedCache.find(sked_id) == skedCache.end()); skedCache[sked_id] = inst_sked; } diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc index 15f143251..b267ac00e 100644 --- a/src/cpu/inorder/pipeline_stage.cc +++ b/src/cpu/inorder/pipeline_stage.cc @@ -44,12 +44,17 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num) stageBufferMax(params->stageWidth), prevStageValid(false), nextStageValid(false), idle(false) { - switchedOutBuffer.resize(ThePipeline::MaxThreads); - switchedOutValid.resize(ThePipeline::MaxThreads); - init(params); } +PipelineStage::~PipelineStage() +{ + for(ThreadID tid = 0; tid < numThreads; tid++) { + skidBuffer[tid].clear(); + stalls[tid].resources.clear(); + } +} + void PipelineStage::init(Params *params) { @@ -66,6 +71,12 @@ PipelineStage::init(Params *params) else lastStallingStage[tid] = NumStages - 1; } + + if ((InOrderCPU::ThreadModel) params->threadModel == + InOrderCPU::SwitchOnCacheMiss) { + switchedOutBuffer.resize(ThePipeline::MaxThreads); + switchedOutValid.resize(ThePipeline::MaxThreads); + } } @@ -190,9 +201,6 @@ PipelineStage::takeOverFrom() stalls[tid].resources.clear(); - while (!insts[tid].empty()) - insts[tid].pop(); - skidBuffer[tid].clear(); } wroteToTimeBuffer = false; diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh index dfa88de87..ec70fefc5 100644 --- a/src/cpu/inorder/pipeline_stage.hh +++ b/src/cpu/inorder/pipeline_stage.hh @@ -91,10 +91,7 @@ class PipelineStage public: PipelineStage(Params *params, unsigned stage_num); - /** MUST use init() function if this constructor is used. */ - PipelineStage() { } - - virtual ~PipelineStage() { } + virtual ~PipelineStage(); /** PipelineStage initialization. */ void init(Params *params); @@ -268,16 +265,6 @@ class PipelineStage */ unsigned instsProcessed; - /** Queue of all instructions coming from previous stage on this cycle. */ - std::queue insts[ThePipeline::MaxThreads]; - - /** Queue of instructions that are finished processing and ready to go - * next stage. This is used to prevent from processing an instrution more - * than once on any stage. NOTE: It is up to the PROGRAMMER must manage - * this as a queue - */ - std::list instsToNextStage; - /** Skid buffer between previous stage and this one. */ std::list skidBuffer[ThePipeline::MaxThreads]; diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc index 98a0727a9..48820b50e 100644 --- a/src/cpu/inorder/reg_dep_map.cc +++ b/src/cpu/inorder/reg_dep_map.cc @@ -45,6 +45,14 @@ RegDepMap::RegDepMap(int size) regMap.resize(size); } +RegDepMap::~RegDepMap() +{ + for (int i = 0; i < regMap.size(); i++) { + regMap[i].clear(); + } + regMap.clear(); +} + string RegDepMap::name() { diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh index fa4fe45f3..047e4d129 100644 --- a/src/cpu/inorder/reg_dep_map.hh +++ b/src/cpu/inorder/reg_dep_map.hh @@ -48,7 +48,7 @@ class RegDepMap public: RegDepMap(int size = TheISA::TotalNumRegs); - ~RegDepMap() { } + ~RegDepMap(); std::string name(); diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index c371f8244..6a2f5e62a 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -53,7 +53,11 @@ Resource::~Resource() delete [] resourceEvent; } - delete deniedReq; + delete deniedReq; + + for (int i = 0; i < width; i++) { + delete reqs[i]; + } } @@ -386,6 +390,7 @@ ResourceRequest::~ResourceRequest() DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, res->cpu->resReqCount); #endif + inst = NULL; } void -- cgit v1.2.3 From 37df92595328d7d1443c1670cd72d4eae3b78bf7 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:31 -0500 Subject: inorder: update max. resource bandwidths each resource has a certain # of requests it can take per cycle. update the #s here to be more realistic based off of the pipeline width and if the resource needs to be accessed on multiple cycles --- src/cpu/inorder/resource_pool.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc index e1914623a..4e2f930ab 100644 --- a/src/cpu/inorder/resource_pool.cc +++ b/src/cpu/inorder/resource_pool.cc @@ -55,7 +55,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) memObjects.push_back(ICache); resources.push_back(new FetchUnit("icache_port", ICache, - stage_width * MaxThreads, 0, _cpu, + stage_width * 2 + MaxThreads, 0, _cpu, params)); resources.push_back(new DecodeUnit("Decode-Unit", Decode, @@ -68,7 +68,7 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) 0, _cpu, params)); resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, - stage_width * MaxThreads, 0, _cpu, + stage_width * 3, 0, _cpu, params)); resources.push_back(new AGENUnit("AGEN-Unit", AGEN, @@ -77,16 +77,16 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params) resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, stage_width, 0, _cpu, params)); - resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, - params)); + resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, + stage_width * 2, 0, _cpu, params)); memObjects.push_back(DCache); resources.push_back(new CacheUnit("dcache_port", DCache, - stage_width * MaxThreads, 0, _cpu, + stage_width * 2 + MaxThreads, 0, _cpu, params)); resources.push_back(new GraduationUnit("Graduation-Unit", Grad, - stage_width * MaxThreads, 0, _cpu, + stage_width, 0, _cpu, params)); resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, -- cgit v1.2.3 From a278df0b95daacb788b8442c7331f7a7f7c8aab6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:40 -0500 Subject: inorder: don't overuse getLatency() resources don't need to call getLatency because the latency is already a member in the class. If there is some type of special case where different instructions impose a different latency inside a resource then we can revisit this and add getLatency() back in --- src/cpu/inorder/resource.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 6a2f5e62a..701785658 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -325,10 +325,8 @@ Resource::ticks(int num_cycles) void Resource::scheduleExecution(int slot_num) { - int res_latency = getLatency(slot_num); - - if (res_latency >= 1) { - scheduleEvent(slot_num, res_latency); + if (latency >= 1) { + scheduleEvent(slot_num, latency); } else { execute(slot_num); } -- cgit v1.2.3 From bbffd9419dca7e52423aa9def277b619c3523b72 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:44 -0500 Subject: inorder: update default thread size(=1) a lot of structures get allocated based off that MaxThreads parameter so this is an effort to not abuse it --- src/cpu/inorder/pipeline_traits.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh index 75f01adb1..573c0200a 100644 --- a/src/cpu/inorder/pipeline_traits.hh +++ b/src/cpu/inorder/pipeline_traits.hh @@ -51,7 +51,7 @@ class ResourceSked; namespace ThePipeline { // Pipeline Constants const unsigned NumStages = 5; - const ThreadID MaxThreads = 8; + const ThreadID MaxThreads = 1; const unsigned BackEndStartStage = 2; // List of Resources The Pipeline Uses -- cgit v1.2.3 From 89335118a5b50d44e653d9763908a427e8687f46 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:29:48 -0500 Subject: inorder: recognize isSerializeAfter flag keep track of when an instruction needs the execution behind it to be serialized. Without this, in SE Mode instructions can execute behind a system call exit(). --- src/cpu/inorder/resources/execution_unit.cc | 34 +++++++++++++++++++++-------- src/cpu/inorder/resources/execution_unit.hh | 1 + 2 files changed, 26 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index f715d6071..e4d0c20b9 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -42,7 +42,7 @@ ExecutionUnit::ExecutionUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - lastExecuteTick(0), lastControlTick(0) + lastExecuteTick(0), lastControlTick(0), serializeTick(0) { } void @@ -86,23 +86,39 @@ ExecutionUnit::execute(int slot_num) DynInstPtr inst = reqs[slot_num]->inst; Fault fault = NoFault; int seq_num = inst->seqNum; + Tick cur_tick = curTick(); + + if (cur_tick == serializeTick) { + DPRINTF(InOrderExecute, "Can not execute [tid:%i][sn:%i][PC:%s] %s. " + "All instructions are being serialized this cycle\n", + inst->readTid(), seq_num, inst->pcState(), inst->instName()); + exec_req->done(false); + return; + } - DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n", - inst->readTid(), seq_num, inst->pcState(), inst->instName()); switch (exec_req->cmd) { case ExecuteInst: { - if (curTick() != lastExecuteTick) { - lastExecuteTick = curTick(); + DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n", + inst->readTid(), seq_num, inst->pcState(), inst->instName()); + + if (cur_tick != lastExecuteTick) { + lastExecuteTick = cur_tick; } + assert(!inst->isMemRef()); + + if (inst->isSerializeAfter()) { + serializeTick = cur_tick; + DPRINTF(InOrderExecute, "Serializing execution after [tid:%i] " + "[sn:%i] [PC:%s] %s.\n", inst->readTid(), seq_num, + inst->pcState(), inst->instName()); + } - if (inst->isMemRef()) { - panic("%s not configured to handle memory ops.\n", resName); - } else if (inst->isControl()) { - if (lastControlTick == curTick()) { + if (inst->isControl()) { + if (lastControlTick == cur_tick) { DPRINTF(InOrderExecute, "Can not Execute More than One Control " "Inst Per Cycle. Blocking Request.\n"); exec_req->done(false); diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh index a6694ddb5..b03a6655e 100644 --- a/src/cpu/inorder/resources/execution_unit.hh +++ b/src/cpu/inorder/resources/execution_unit.hh @@ -76,6 +76,7 @@ class ExecutionUnit : public Resource { Stats::Scalar executions; Tick lastExecuteTick; Tick lastControlTick; + Tick serializeTick; }; -- cgit v1.2.3 From 0fe19836c7101fea012148a4852b9619087e42d6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:30:05 -0500 Subject: inorder: update graduation unit make sure instructions are able to commit before writing back to the RF do not commit more than 1 non-speculative instruction per cycle --- src/cpu/inorder/cpu.cc | 4 ++-- src/cpu/inorder/resources/graduation_unit.cc | 17 ++++++++++------- src/cpu/inorder/resources/graduation_unit.hh | 4 +--- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc index 4130b9e14..0ec4c9861 100644 --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -465,12 +465,12 @@ InOrderCPU::createBackEndSked(DynInstPtr inst) W.needs(ExecUnit, ExecutionUnit::ExecuteInst); } + W.needs(Grad, GraduationUnit::GraduateInst); + for (int idx=0; idx < inst->numDestRegs(); idx++) { W.needs(RegManager, UseDefUnit::WriteDestReg, idx); } - W.needs(Grad, GraduationUnit::GraduateInst); - // Insert Back Schedule into our cache of // resource schedules addToSkedCache(inst, res_sked); diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 68adcbd1d..edc2fb3ff 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -37,8 +37,7 @@ GraduationUnit::GraduationUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - lastCycleGrad(0), numCycleGrad(0) - + lastNonSpecTick(0) { for (ThreadID tid = 0; tid < ThePipeline::MaxThreads; tid++) { nonSpecInstActive[tid] = &cpu->nonSpecInstActive[tid]; @@ -58,15 +57,18 @@ GraduationUnit::execute(int slot_num) { case GraduateInst: { - // Make sure this is the last thing on the resource schedule - // @todo: replace this check - // assert(inst->resSched.size() == 1); + if (lastNonSpecTick == curTick()) { + DPRINTF(InOrderGraduation, "Unable to graduate [sn:%i]. " + "Only 1 nonspec inst. per cycle can graduate.\n"); + grad_req->done(false); + return; + } - // Handle Any Faults Before Graduating Instruction + // Handle Any Faults Before Graduating Instruction if (inst->fault != NoFault) { cpu->trap(inst->fault, tid, inst); grad_req->setCompleted(false); - return; + return; } DPRINTF(InOrderGraduation, @@ -81,6 +83,7 @@ GraduationUnit::execute(int slot_num) DPRINTF(InOrderGraduation, "[tid:%i] Non-speculative inst [sn:%i] graduated\n", tid, inst->seqNum); + lastNonSpecTick = curTick(); } if (inst->traceData) { diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh index aae41993f..59631bfcb 100644 --- a/src/cpu/inorder/resources/graduation_unit.hh +++ b/src/cpu/inorder/resources/graduation_unit.hh @@ -57,9 +57,7 @@ class GraduationUnit : public Resource { void execute(int slot_num); protected: - Tick lastCycleGrad; - int numCycleGrad; - + Tick lastNonSpecTick; bool *nonSpecInstActive[ThePipeline::MaxThreads]; InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads]; -- cgit v1.2.3 From 64d31e75b960f08de57cf78f8bdc793d1135c5cd Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:30:38 -0500 Subject: inorder: ignore nops in execution unit --- src/cpu/inorder/resources/execution_unit.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index e4d0c20b9..b2540cff8 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -101,8 +101,17 @@ ExecutionUnit::execute(int slot_num) { case ExecuteInst: { - DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n", - inst->readTid(), seq_num, inst->pcState(), inst->instName()); + if (inst->isNop()) { + DPRINTF(InOrderExecute, "[tid:%i] [sn:%i] [PC:%s] Ignoring execution" + "of %s.\n", inst->readTid(), seq_num, inst->pcState(), + inst->instName()); + inst->setExecuted(); + exec_req->done(); + return; + } else { + DPRINTF(InOrderExecute, "[tid:%i] Executing [sn:%i] [PC:%s] %s.\n", + inst->readTid(), seq_num, inst->pcState(), inst->instName()); + } if (cur_tick != lastExecuteTick) { lastExecuteTick = cur_tick; -- cgit v1.2.3 From bc16bbc158613b7eaebe7d2021a6a0503c4a0635 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 18 Feb 2011 14:31:31 -0500 Subject: inorder: add names and slot #s to res. dprints --- src/cpu/inorder/resource.cc | 18 +++++++++++------- src/cpu/inorder/resource.hh | 6 +++--- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc index 701785658..24211532e 100644 --- a/src/cpu/inorder/resource.cc +++ b/src/cpu/inorder/resource.cc @@ -31,6 +31,8 @@ #include #include + +#include "base/str.hh" #include "cpu/inorder/resource.hh" #include "cpu/inorder/cpu.hh" using namespace std; @@ -376,7 +378,7 @@ int ResourceRequest::maxReqCount = 0; ResourceRequest::ResourceRequest(Resource *_res) : res(_res), inst(NULL), stagePasses(0), valid(false), doneInResource(false), - complSlotNum(-1), completed(false), squashed(false), processing(false), + completed(false), squashed(false), processing(false), memStall(false) { } @@ -391,6 +393,12 @@ ResourceRequest::~ResourceRequest() inst = NULL; } +std::string +ResourceRequest::name() +{ + return res->name() + "." + to_string(slotNum); +} + void ResourceRequest::setRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned _cmd) @@ -433,11 +441,6 @@ ResourceRequest::done(bool completed) setCompleted(completed); - // Used for debugging purposes - if (completed) { - complSlotNum = slotNum; - } - doneInResource = true; } @@ -466,7 +469,8 @@ ResourceEvent::process() const char * ResourceEvent::description() { - string desc = resource->name() + " event"; + string desc = resource->name() + "-event:slot[" + to_string(slotIdx) + + "]"; return desc.c_str(); } diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh index fc16df099..7899a215f 100644 --- a/src/cpu/inorder/resource.hh +++ b/src/cpu/inorder/resource.hh @@ -247,7 +247,7 @@ class Resource { class ResourceEvent : public Event { public: - /** Pointer to the CPU. */ + /** Pointer to the Resource this is an event for */ Resource *resource; @@ -305,6 +305,8 @@ class ResourceRequest ResourceRequest(Resource *_res); virtual ~ResourceRequest(); + + std::string name(); int reqID; @@ -330,7 +332,6 @@ class ResourceRequest /** Get Slot Number */ int getSlot() { return slotNum; } - int getComplSlot() { return complSlotNum; } bool hasSlot() { return slotNum >= 0; } /** Get Stage Number */ @@ -393,7 +394,6 @@ class ResourceRequest int stageNum; int resIdx; int slotNum; - int complSlotNum; /** Resource Request Status */ bool completed; -- cgit v1.2.3 From 293ccb703738093db84ae7ec78434433560fe00b Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Sat, 19 Feb 2011 17:32:00 -0600 Subject: Ruby: clean MOESI CMP directory protocol The L1 cache controller file contains references to foo and goo queues, which are not in use at all. These have been removed. --- src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'src') diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index 7f0ab62a8..bb5b35dc1 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -44,7 +44,6 @@ machine(L1Cache, "Directory protocol") // From this node's L1 cache TO the network // a local L1 -> this L2 bank, currently ordered with directory forwarded requests MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false"; - MessageBuffer foo, network="To", virtual_network="1", ordered="false"; // a local L1 -> this L2 bank MessageBuffer responseFromL1Cache, network="To", virtual_network="2", ordered="false"; // MessageBuffer writebackFromL1Cache, network="To", virtual_network="3", ordered="false"; @@ -53,7 +52,6 @@ machine(L1Cache, "Directory protocol") // To this node's L1 cache FROM the network // a L2 bank -> this L1 MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false"; - MessageBuffer goo, network="From", virtual_network="1", ordered="false"; // a L2 bank -> this L1 MessageBuffer responseToL1Cache, network="From", virtual_network="2", ordered="false"; @@ -229,7 +227,6 @@ machine(L1Cache, "Directory protocol") out_port(requestNetwork_out, RequestMsg, requestFromL1Cache); out_port(responseNetwork_out, ResponseMsg, responseFromL1Cache); out_port(triggerQueue_out, TriggerMsg, triggerQueue); - out_port(foo_out, ResponseMsg, foo); // ** IN_PORTS ** @@ -242,15 +239,6 @@ machine(L1Cache, "Directory protocol") } } - - in_port(goo_in, RequestMsg, goo) { - if (goo_in.isReady()) { - peek(goo_in, RequestMsg) { - assert(false); - } - } - } - // Trigger Queue in_port(triggerQueue_in, TriggerMsg, triggerQueue) { if (triggerQueue_in.isReady()) { -- cgit v1.2.3 From 77eed184f529c4ccbef59ad2018d18ff3fbb54af Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Sat, 19 Feb 2011 17:32:43 -0600 Subject: Ruby: Machine Type missing in MOESI CMP directory protocol In certain actions of the L1 cache controller, while creating an outgoing message, the machine type was not being set. This results in a segmentation fault when trace is collected. Joseph Pusudesris provided his patch for fixing this issue. --- src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm index bb5b35dc1..e590c952a 100644 --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -399,6 +399,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; @@ -443,6 +444,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceRequestType:PUTO; out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; @@ -455,6 +457,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceRequestType:PUTS; out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; @@ -469,6 +472,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; @@ -484,6 +488,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := cache_entry.DataBlk; // out_msg.Dirty := cache_entry.Dirty; @@ -502,6 +507,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; @@ -580,6 +586,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:UNBLOCK; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; @@ -678,6 +685,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := tbe.DataBlk; // out_msg.Dirty := tbe.Dirty; @@ -691,6 +699,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.DataBlk := tbe.DataBlk; @@ -711,6 +720,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(in_msg.Requestor); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; @@ -723,6 +733,7 @@ machine(L1Cache, "Directory protocol") out_msg.Address := address; out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; out_msg.Sender := machineID; + out_msg.SenderMachine := MachineType:L1Cache; out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits)); out_msg.DataBlk := tbe.DataBlk; -- cgit v1.2.3