From e2507407b17188dca802082434cfe0230d9bfa61 Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: O3: Enhance data address translation by supporting hardware page table walkers. Some ISAs (like ARM) relies on hardware page table walkers. For those ISAs, when a TLB miss occurs, initiateTranslation() can return with NoFault but with the translation unfinished. Instructions experiencing a delayed translation due to a hardware page table walk are deferred until the translation completes and kept into the IQ. In order to keep track of them, the IQ has been augmented with a queue of the outstanding delayed memory instructions. When their translation completes, instructions are re-executed (only their initiateAccess() was already executed; their DTB translation is now skipped). The IEW stage has been modified to support such a 2-pass execution. --- src/cpu/o3/fetch.hh | 4 ++++ src/cpu/o3/iew_impl.hh | 21 +++++++++++++++++ src/cpu/o3/inst_queue.hh | 28 +++++++++++++++++++++++ src/cpu/o3/inst_queue_impl.hh | 53 +++++++++++++++++++++++++++++++++++++++++-- src/cpu/o3/lsq_unit_impl.hh | 10 +++++++- 5 files changed, 113 insertions(+), 3 deletions(-) (limited to 'src/cpu/o3') diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 92691720b..647c48a76 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -136,6 +136,10 @@ class DefaultFetch : fetch(_fetch) {} + void + markDelayed() + {} + void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode) diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3f3761ff3..03f73c798 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1241,12 +1241,33 @@ DefaultIEW::executeInsts() // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); + + if (inst->isTranslationDelayed() && + fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(IEW, "Execute: Delayed translation, deferring " + "load.\n"); + instQueue.deferMemInst(inst); + continue; + } + if (inst->isDataPrefetch() || inst->isInstPrefetch()) { fault = NoFault; } } else if (inst->isStore()) { fault = ldstQueue.executeStore(inst); + if (inst->isTranslationDelayed() && + fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(IEW, "Execute: Delayed translation, deferring " + "store.\n"); + instQueue.deferMemInst(inst); + continue; + } + // If the store had a fault then it may not have a mem req if (fault != NoFault || inst->readPredicate() == false || !inst->isStoreConditional()) { diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index be936e204..64df35743 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -180,6 +192,11 @@ class InstructionQueue */ DynInstPtr getInstToExecute(); + /** Returns a memory instruction that was referred due to a delayed DTB + * translation if it is now ready to execute. + */ + DynInstPtr getDeferredMemInstToExecute(); + /** * Records the instruction as the producer of a register without * adding it to the rest of the IQ. @@ -223,6 +240,12 @@ class InstructionQueue /** Completes a memory operation. */ void completeMemInst(DynInstPtr &completed_inst); + /** + * Defers a memory instruction when its DTB translation incurs a hw + * page table walk. + */ + void deferMemInst(DynInstPtr &deferred_inst); + /** Indicates an ordering violation between a store and a load. */ void violation(DynInstPtr &store, DynInstPtr &faulting_load); @@ -284,6 +307,11 @@ class InstructionQueue /** List of instructions that are ready to be executed. */ std::list instsToExecute; + /** List of instructions waiting for their DTB translation to + * complete (hw page table walk in progress). + */ + std::list deferredMemInsts; + /** * Struct for comparing entries to be added to the priority queue. * This gives reverse ordering to the instructions in terms of diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 91cb2f0c8..d6da4b818 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -397,6 +409,7 @@ InstructionQueue::resetState() } nonSpecInsts.clear(); listOrder.clear(); + deferredMemInsts.clear(); } template @@ -733,6 +746,15 @@ InstructionQueue::scheduleReadyInsts() IssueStruct *i2e_info = issueToExecuteQueue->access(0); + DynInstPtr deferred_mem_inst; + int total_deferred_mem_issued = 0; + while (total_deferred_mem_issued < totalWidth && + (deferred_mem_inst = getDeferredMemInstToExecute()) != NULL) { + issueToExecuteQueue->access(0)->size++; + instsToExecute.push_back(deferred_mem_inst); + total_deferred_mem_issued++; + } + // Have iterator to head of the list // While I haven't exceeded bandwidth or reached the end of the list, // Try to get a FU that can do what this op needs. @@ -745,7 +767,7 @@ InstructionQueue::scheduleReadyInsts() ListOrderIt order_end_it = listOrder.end(); int total_issued = 0; - while (total_issued < totalWidth && + while (total_issued < (totalWidth - total_deferred_mem_issued) && iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -858,7 +880,7 @@ InstructionQueue::scheduleReadyInsts() iqInstsIssued+= total_issued; // If we issued any instructions, tell the CPU we had activity. - if (total_issued) { + if (total_issued || total_deferred_mem_issued) { cpu->activityThisCycle(); } else { DPRINTF(IQ, "Not able to schedule any instructions.\n"); @@ -1021,6 +1043,11 @@ void InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) { DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + + // Reset DTB translation state + resched_inst->translationStarted = false; + resched_inst->translationCompleted = false; + resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); } @@ -1049,6 +1076,28 @@ InstructionQueue::completeMemInst(DynInstPtr &completed_inst) count[tid]--; } +template +void +InstructionQueue::deferMemInst(DynInstPtr &deferred_inst) +{ + deferredMemInsts.push_back(deferred_inst); +} + +template +typename Impl::DynInstPtr +InstructionQueue::getDeferredMemInstToExecute() +{ + for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); + ++it) { + if ((*it)->translationCompleted) { + DynInstPtr ret = *it; + deferredMemInsts.erase(it); + return ret; + } + } + return NULL; +} + template void InstructionQueue::violation(DynInstPtr &store, diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index dd3604ffe..b5d337935 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -445,12 +445,16 @@ LSQUnit::executeLoad(DynInstPtr &inst) Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", - inst->pcState(),inst->seqNum); + inst->pcState(), inst->seqNum); assert(!inst->isSquashed()); load_fault = inst->initiateAcc(); + if (inst->isTranslationDelayed() && + load_fault == NoFault) + return load_fault; + // If the instruction faulted or predicated false, then we need to send it // along to commit without the instruction completing. if (load_fault != NoFault || inst->readPredicate() == false) { @@ -532,6 +536,10 @@ LSQUnit::executeStore(DynInstPtr &store_inst) Fault store_fault = store_inst->initiateAcc(); + if (store_inst->isTranslationDelayed() && + store_fault == NoFault) + return store_fault; + if (store_inst->readPredicate() == false) store_inst->forwardOldRegs(); -- cgit v1.2.3