From e2507407b17188dca802082434cfe0230d9bfa61 Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Fri, 11 Feb 2011 18:29:35 -0600 Subject: O3: Enhance data address translation by supporting hardware page table walkers. Some ISAs (like ARM) relies on hardware page table walkers. For those ISAs, when a TLB miss occurs, initiateTranslation() can return with NoFault but with the translation unfinished. Instructions experiencing a delayed translation due to a hardware page table walk are deferred until the translation completes and kept into the IQ. In order to keep track of them, the IQ has been augmented with a queue of the outstanding delayed memory instructions. When their translation completes, instructions are re-executed (only their initiateAccess() was already executed; their DTB translation is now skipped). The IEW stage has been modified to support such a 2-pass execution. --- src/cpu/base_dyn_inst.hh | 125 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 29 deletions(-) (limited to 'src/cpu/base_dyn_inst.hh') diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 0c566ec65..8b6662d70 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * Copyright (c) 2009 The University of Edinburgh * All rights reserved. @@ -150,6 +162,29 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Finish a DTB address translation. */ void finishTranslation(WholeTranslationState *state); + /** True if the DTB address translation has started. */ + bool translationStarted; + + /** True if the DTB address translation has completed. */ + bool translationCompleted; + + /** + * Returns true if the DTB address translation is being delayed due to a hw + * page table walk. + */ + bool isTranslationDelayed() const + { + return (translationStarted && !translationCompleted); + } + + /** + * Saved memory requests (needed when the DTB address translation is + * delayed due to a hw page table walk). + */ + RequestPtr savedReq; + RequestPtr savedSreqLow; + RequestPtr savedSreqHigh; + /** @todo: Consider making this private. */ public: /** The sequence number of the instruction. */ @@ -835,33 +870,42 @@ BaseDynInst::readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags) { reqMade = true; - Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - + Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); - - if (fault == NoFault) { - effAddr = req->getVaddr(); - effAddrValid = true; - fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); + if (reqMade && translationStarted) { + req = savedReq; + sreqLow = savedSreqLow; + sreqHigh = savedSreqHigh; } else { - // Commit will have to clean up whatever happened. Set this - // instruction as executed. - this->setExecuted(); + req = new Request(asid, addr, size, flags, this->pc.instAddr(), + thread->contextId(), threadNumber); + + // Only split the request if the ISA supports unaligned accesses. + if (TheISA::HasUnalignedMemAcc) { + splitRequest(req, sreqLow, sreqHigh); + } + initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); } - if (fault != NoFault) { - // Return a fixed value to keep simulation deterministic even - // along misspeculated paths. - if (data) - bzero(data, size); + if (translationCompleted) { + if (fault == NoFault) { + effAddr = req->getVaddr(); + effAddrValid = true; + fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); + } else { + // Commit will have to clean up whatever happened. Set this + // instruction as executed. + this->setExecuted(); + } + + if (fault != NoFault) { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + if (data) + bzero(data, size); + } } if (traceData) { @@ -897,19 +941,26 @@ BaseDynInst::writeBytes(uint8_t *data, unsigned size, } reqMade = true; - Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - + Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); + if (reqMade && translationStarted) { + req = savedReq; + sreqLow = savedSreqLow; + sreqHigh = savedSreqHigh; + } else { + req = new Request(asid, addr, size, flags, this->pc.instAddr(), + thread->contextId(), threadNumber); + + // Only split the request if the ISA supports unaligned accesses. + if (TheISA::HasUnalignedMemAcc) { + splitRequest(req, sreqLow, sreqHigh); + } + initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); } - initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); - if (fault == NoFault) { + if (fault == NoFault && translationCompleted) { effAddr = req->getVaddr(); effAddrValid = true; fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx); @@ -953,6 +1004,8 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh, uint64_t *res, BaseTLB::Mode mode) { + translationStarted = true; + if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { WholeTranslationState *state = new WholeTranslationState(req, NULL, res, mode); @@ -961,6 +1014,12 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, DataTranslation > *trans = new DataTranslation >(this, state); cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); + if (!translationCompleted) { + // Save memory requests. + savedReq = state->mainReq; + savedSreqLow = state->sreqLow; + savedSreqHigh = state->sreqHigh; + } } else { WholeTranslationState *state = new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode); @@ -973,6 +1032,12 @@ BaseDynInst::initiateTranslation(RequestPtr req, RequestPtr sreqLow, cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode); cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode); + if (!translationCompleted) { + // Save memory requests. + savedReq = state->mainReq; + savedSreqLow = state->sreqLow; + savedSreqHigh = state->sreqHigh; + } } } @@ -998,6 +1063,8 @@ BaseDynInst::finishTranslation(WholeTranslationState *state) state->deleteReqs(); } delete state; + + translationCompleted = true; } #endif // __CPU_BASE_DYN_INST_HH__ -- cgit v1.2.3