From 2f40b3b8ae4fddcdd167fc86469254f40736c888 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: O3: Fix unaligned stores when cache blocked Without this change the a store can be issued to the cache multiple times. If this case occurs when the l1 cache is out of mshrs (and thus blocked) the processor will never make forward progress because each cycle it will send a single request using the recently freed mshr and not completing the multipart store. This will continue forever. --- src/cpu/o3/lsq_unit_impl.hh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index b5d337935..1a4e686a3 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -1103,7 +1103,9 @@ LSQUnit::recvRetry() dynamic_cast(retryPkt->senderState); // Don't finish the store unless this is the last packet. - if (!TheISA::HasUnalignedMemAcc || !state->pktToSend) { + if (!TheISA::HasUnalignedMemAcc || !state->pktToSend || + state->pendingPacket == retryPkt) { + state->pktToSend = false; storePostSend(retryPkt); } retryPkt = NULL; -- cgit v1.2.3 From a432d8e0851de8d090676697e29ca6ed4be64fb7 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: Mem: Fix issue with dirty block being lost when entire block transferred to non-cache. This change fixes the problem for all the cases we actively use. If you want to try more creative I/O device attachments (E.g. sharing an L2), this won't work. You would need another level of caching between the I/O device and the cache (which you actually need anyway with our current code to make sure writes propagate). This is required so that you can mark the cache in between as top level and it won't try to send ownership of a block to the I/O device. Asserts have been added that should catch any issues. --- src/cpu/o3/fetch_impl.hh | 3 +++ src/dev/io_device.cc | 3 +++ src/mem/cache/BaseCache.py | 1 + src/mem/cache/base.cc | 1 + src/mem/cache/base.hh | 5 +++++ src/mem/cache/cache_impl.hh | 2 +- 6 files changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index a2f2b4f8a..3092bd937 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -112,6 +112,9 @@ DefaultFetch::IcachePort::recvTiming(PacketPtr pkt) { DPRINTF(Fetch, "Received timing\n"); if (pkt->isResponse()) { + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + fetch->processCacheCompletion(pkt); } //else Snooped a coherence request, just return diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index be97bc4ad..ffe8fdf06 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -139,6 +139,9 @@ DmaPort::recvTiming(PacketPtr pkt) assert(pendingCount >= 0); assert(state); + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + state->numBytes += pkt->req->getSize(); assert(state->totBytes >= state->numBytes); if (state->totBytes == state->numBytes) { diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index dffac2234..5c7ae5274 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -48,6 +48,7 @@ class BaseCache(MemObject): size = Param.MemorySize("capacity in bytes") forward_snoops = Param.Bool(True, "forward snoops from mem side to cpu side") + is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 9166e1a09..b7e331d54 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -58,6 +58,7 @@ BaseCache::BaseCache(const Params *p) hitLatency(p->latency), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), + isTopLevel(p->is_top_level), blocked(0), noTargetMSHR(NULL), missCount(p->max_miss_count), diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index e8a644296..28ddf5054 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -194,6 +194,11 @@ class BaseCache : public MemObject /** Do we forward snoops from mem side port through to cpu side port? */ bool forwardSnoops; + /** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should + * never try to forward ownership and similar optimizations to the cpu + * side */ + bool isTopLevel; + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index e4e4a3c92..0b2b273f9 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -216,7 +216,7 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk, if (blk->isDirty()) { // special considerations if we're owner: - if (!deferred_response) { + if (!deferred_response && !isTopLevel) { // if we are responding immediately and can // signal that we're transferring ownership // along with exclusivity, do so -- cgit v1.2.3 From db350536555d7509b703b0707141e3f677645df0 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: ARM: Previous change didn't end up setting instFlags, this does. --- src/arch/arm/isa/insts/str.isa | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index f661961f7..e5d47c28f 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -222,7 +222,6 @@ let {{ decConstBase = 'StoreExImm' basePrefix = 'MemoryExImm' nameFunc = staticmethod(storeImmClassName) - instFlags = ['IsStoreConditional'] def __init__(self, *args, **kargs): super(StoreImmEx, self).__init__(*args, **kargs) @@ -302,7 +301,6 @@ let {{ decConstBase = 'StoreExDImm' basePrefix = 'MemoryExDImm' nameFunc = staticmethod(storeDoubleImmClassName) - instFlags = ['IsStoreConditional'] def __init__(self, *args, **kargs): super(StoreDoubleImmEx, self).__init__(*args, **kargs) @@ -370,10 +368,14 @@ let {{ buildDoubleStores("strd") - StoreImmEx("strex", False, True, False, size=4, flavor="exclusive").emit() - StoreImmEx("strexh", False, True, False, size=2, flavor="exclusive").emit() - StoreImmEx("strexb", False, True, False, size=1, flavor="exclusive").emit() - StoreDoubleImmEx("strexd", False, True, False, flavor="exclusive").emit() + StoreImmEx("strex", False, True, False, size=4, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() + StoreImmEx("strexh", False, True, False, size=2, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() + StoreImmEx("strexb", False, True, False, size=1, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() + StoreDoubleImmEx("strexd", False, True, False, flavor="exclusive", + instFlags = ['IsStoreConditional']).emit() StoreImm("vstr", False, True, False, size=4, flavor="fp").emit() StoreImm("vstr", False, False, False, size=4, flavor="fp").emit() -- cgit v1.2.3 From 30143baf7e35a73acaff1d02cf71278248a86515 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: O3: Cleanup the commitInfo comm struct. Get rid of unused members and use base types rather than derrived values where possible to limit amount of state. --- src/cpu/o3/comm.hh | 57 ++++++++++++++++++++++++++++++++++------------- src/cpu/o3/commit.hh | 3 ++- src/cpu/o3/commit_impl.hh | 28 ++++++++++++++--------- src/cpu/o3/fetch_impl.hh | 7 ++---- src/cpu/o3/iew_impl.hh | 6 ++--- 5 files changed, 64 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 897807fdb..840dde9ea 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -123,7 +135,6 @@ struct TimeBufStruct { bool branchTaken; Addr mispredPC; TheISA::PCState nextPC; - unsigned branchCount; }; @@ -151,29 +162,45 @@ struct TimeBufStruct { iewComm iewInfo[Impl::MaxThreads]; struct commitComm { - bool usedROB; - unsigned freeROBEntries; - bool emptyROB; + /////////////// For Decode, IEW, Rename, Fetch /////////// bool squash; bool robSquashing; - bool branchMispredict; - DynInstPtr mispredictInst; - bool branchTaken; - Addr mispredPC; - TheISA::PCState pc; - + ////////// For Fetch & IEW ///////////// // Represents the instruction that has either been retired or // squashed. Similar to having a single bus that broadcasts the // retired or squashed sequence number. InstSeqNum doneSeqNum; - //Just in case we want to do a commit/squash on a cycle - //(necessary for multiple ROBs?) - bool commitInsts; - InstSeqNum squashSeqNum; + ////////////// For Rename ///////////////// + // Rename should re-read number of free rob entries + bool usedROB; + // Notify Rename that the ROB is empty + bool emptyROB; + // Tell Rename how many free entries it has in the ROB + unsigned freeROBEntries; + + + ///////////// For Fetch ////////////////// + // Provide fetch the instruction that mispredicted, if this + // pointer is not-null a misprediction occured + DynInstPtr mispredictInst; + // Was the branch taken or not + bool branchTaken; + // The pc of the next instruction to execute. This is the next + // instruction for a branch mispredict, but the same instruction for + // order violation and the like + TheISA::PCState pc; + + // Instruction that caused the a non-mispredict squash + DynInstPtr squashInst; + // If an interrupt is pending and fetch should stall + bool interruptPending; + // If the interrupt ended up being cleared before being handled + bool clearInterrupt; + //////////// For IEW ////////////////// // Communication specifically to the IQ to tell the IQ that it can // schedule a non-speculative instruction. InstSeqNum nonSpecSeqNum; @@ -182,8 +209,6 @@ struct TimeBufStruct { bool uncached; DynInstPtr uncachedLoad; - bool interruptPending; - bool clearInterrupt; }; commitComm commitInfo[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 047e29f5d..ff7b53440 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -262,7 +262,8 @@ class DefaultCommit * instructions instead of the current instruction and doesn't * clean up various status bits about traps/tc writes pending. */ - void squashAfter(ThreadID tid, uint64_t squash_after_seq_num); + void squashAfter(ThreadID tid, DynInstPtr &head_inst, + uint64_t squash_after_seq_num); #if FULL_SYSTEM /** Handles processing an interrupt. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 104e7fb58..8c651e203 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -541,8 +541,8 @@ DefaultCommit::squashAll(ThreadID tid) // the ROB is in the process of squashing. toIEW->commitInfo[tid].robSquashing = true; - toIEW->commitInfo[tid].branchMispredict = false; toIEW->commitInfo[tid].mispredictInst = NULL; + toIEW->commitInfo[tid].squashInst = NULL; toIEW->commitInfo[tid].pc = pc[tid]; } @@ -584,7 +584,8 @@ DefaultCommit::squashFromTC(ThreadID tid) template void -DefaultCommit::squashAfter(ThreadID tid, uint64_t squash_after_seq_num) +DefaultCommit::squashAfter(ThreadID tid, DynInstPtr &head_inst, + uint64_t squash_after_seq_num) { youngestSeqNum[tid] = squash_after_seq_num; @@ -594,6 +595,7 @@ DefaultCommit::squashAfter(ThreadID tid, uint64_t squash_after_seq_num) // Send back the sequence number of the squashed instruction. toIEW->commitInfo[tid].doneSeqNum = squash_after_seq_num; + toIEW->commitInfo[tid].squashInst = head_inst; // Send back the squash signal to tell stages that they should squash. toIEW->commitInfo[tid].squash = true; @@ -601,7 +603,7 @@ DefaultCommit::squashAfter(ThreadID tid, uint64_t squash_after_seq_num) // the ROB is in the process of squashing. toIEW->commitInfo[tid].robSquashing = true; - toIEW->commitInfo[tid].branchMispredict = false; + toIEW->commitInfo[tid].mispredictInst = NULL; toIEW->commitInfo[tid].pc = pc[tid]; DPRINTF(Commit, "Executing squash after for [tid:%i] inst [sn:%lli]\n", @@ -801,10 +803,17 @@ DefaultCommit::commit() commitStatus[tid] != TrapPending && fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) { - DPRINTF(Commit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n", + if (fromIEW->mispredictInst[tid]) { + DPRINTF(Commit, + "[tid:%i]: Squashing due to branch mispred PC:%#x [sn:%i]\n", tid, - fromIEW->mispredPC[tid], + fromIEW->mispredictInst[tid]->instAddr(), fromIEW->squashedSeqNum[tid]); + } else { + DPRINTF(Commit, + "[tid:%i]: Squashing due to order violation [sn:%i]\n", + tid, fromIEW->squashedSeqNum[tid]); + } DPRINTF(Commit, "[tid:%i]: Redirecting to PC %#x\n", tid, @@ -835,18 +844,15 @@ DefaultCommit::commit() // the ROB is in the process of squashing. toIEW->commitInfo[tid].robSquashing = true; - toIEW->commitInfo[tid].branchMispredict = - fromIEW->branchMispredict[tid]; toIEW->commitInfo[tid].mispredictInst = fromIEW->mispredictInst[tid]; toIEW->commitInfo[tid].branchTaken = fromIEW->branchTaken[tid]; + toIEW->commitInfo[tid].squashInst = NULL; toIEW->commitInfo[tid].pc = fromIEW->pc[tid]; - toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; - - if (toIEW->commitInfo[tid].branchMispredict) { + if (toIEW->commitInfo[tid].mispredictInst) { ++branchMispredicts; } } @@ -988,7 +994,7 @@ DefaultCommit::commitInsts() // If this is an instruction that doesn't play nicely with // others squash everything and restart fetch if (head_inst->isSquashAfter()) - squashAfter(tid, head_inst->seqNum); + squashAfter(tid, head_inst, head_inst->seqNum); int count = 0; Addr oldpc; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 3092bd937..5f9be039f 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -934,15 +934,12 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid) // In any case, squash. squash(fromCommit->commitInfo[tid].pc, fromCommit->commitInfo[tid].doneSeqNum, - tid); + fromCommit->commitInfo[tid].squashInst, tid); // If it was a branch mispredict on a control instruction, update the // branch predictor with that instruction, otherwise just kill the // invalid state we generated in after sequence number - assert(!fromCommit->commitInfo[tid].branchMispredict || - fromCommit->commitInfo[tid].mispredictInst); - - if (fromCommit->commitInfo[tid].branchMispredict && + if (fromCommit->commitInfo[tid].mispredictInst && fromCommit->commitInfo[tid].mispredictInst->isControl()) { branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, fromCommit->commitInfo[tid].pc, diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index dff287ff5..8bf3c56f4 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -456,8 +456,6 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, ThreadID tid) inst->seqNum < toCommit->squashedSeqNum[tid]) { toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; - toCommit->mispredPC[tid] = inst->instAddr(); - toCommit->branchMispredict[tid] = true; toCommit->branchTaken[tid] = inst->pcState().branching(); TheISA::PCState pc = inst->pcState(); @@ -486,7 +484,7 @@ DefaultIEW::squashDueToMemOrder(DynInstPtr &inst, ThreadID tid) TheISA::PCState pc = inst->pcState(); TheISA::advancePC(pc, inst->staticInst); toCommit->pc[tid] = pc; - toCommit->branchMispredict[tid] = false; + toCommit->mispredictInst[tid] = NULL; toCommit->includeSquashInst[tid] = false; @@ -506,7 +504,7 @@ DefaultIEW::squashDueToMemBlocked(DynInstPtr &inst, ThreadID tid) toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->pc[tid] = inst->pcState(); - toCommit->branchMispredict[tid] = false; + toCommit->mispredictInst[tid] = NULL; // Must include the broadcasted SN in the squash. toCommit->includeSquashInst[tid] = true; -- cgit v1.2.3 From 799c3da8d0086bfdfbae532e05018828387e4497 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: O3: Send instruction back to fetch on squash to seed predecoder correctly. --- src/arch/alpha/predecoder.hh | 6 ++++++ src/arch/arm/predecoder.hh | 6 ++++++ src/arch/mips/predecoder.hh | 6 ++++++ src/arch/power/predecoder.hh | 6 ++++++ src/arch/sparc/predecoder.hh | 7 +++++++ src/arch/x86/predecoder.hh | 6 ++++++ src/cpu/o3/cpu.cc | 5 +++-- src/cpu/o3/fetch.hh | 4 ++-- src/cpu/o3/fetch_impl.hh | 5 ++++- src/kern/linux/events.cc | 12 ++++++++++++ 10 files changed, 58 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/arch/alpha/predecoder.hh b/src/arch/alpha/predecoder.hh index a8788051f..2f8c4c2ef 100644 --- a/src/arch/alpha/predecoder.hh +++ b/src/arch/alpha/predecoder.hh @@ -76,6 +76,12 @@ class Predecoder emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + // Use this to give data to the predecoder. This should be used // when there is control flow. void diff --git a/src/arch/arm/predecoder.hh b/src/arch/arm/predecoder.hh index a99e38ce7..511bc29bc 100644 --- a/src/arch/arm/predecoder.hh +++ b/src/arch/arm/predecoder.hh @@ -83,6 +83,12 @@ namespace ArmISA predAddrValid = false; } + void reset(const ExtMachInst &old_emi) + { + reset(); + itstate = old_emi.newItstate; + } + Predecoder(ThreadContext * _tc) : tc(_tc), data(0) { diff --git a/src/arch/mips/predecoder.hh b/src/arch/mips/predecoder.hh index 4220b768c..110493cc5 100644 --- a/src/arch/mips/predecoder.hh +++ b/src/arch/mips/predecoder.hh @@ -75,6 +75,12 @@ class Predecoder emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + //Use this to give data to the predecoder. This should be used //when there is control flow. void diff --git a/src/arch/power/predecoder.hh b/src/arch/power/predecoder.hh index 8b1089095..c10bc51bf 100644 --- a/src/arch/power/predecoder.hh +++ b/src/arch/power/predecoder.hh @@ -82,6 +82,12 @@ class Predecoder emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + // Use this to give data to the predecoder. This should be used // when there is control flow. void diff --git a/src/arch/sparc/predecoder.hh b/src/arch/sparc/predecoder.hh index 670c547d0..082adeb72 100644 --- a/src/arch/sparc/predecoder.hh +++ b/src/arch/sparc/predecoder.hh @@ -68,12 +68,19 @@ class Predecoder } void process() {} + void reset() { emiIsReady = false; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + // Use this to give data to the predecoder. This should be used // when there is control flow. void diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh index 5c67e28e1..790453b98 100644 --- a/src/arch/x86/predecoder.hh +++ b/src/arch/x86/predecoder.hh @@ -174,6 +174,12 @@ namespace X86ISA state = ResetState; } + void + reset(const ExtMachInst &old_emi) + { + reset(); + } + ThreadContext * getTC() { return tc; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 2d3bc3f72..4088f2399 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -808,8 +808,9 @@ FullO3CPU::removeThread(ThreadID tid) } // Squash Throughout Pipeline - InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; - fetch.squash(0, squash_seq_num, tid); + DynInstPtr inst = commit.rob->readHeadInst(tid); + InstSeqNum squash_seq_num = inst->seqNum; + fetch.squash(0, squash_seq_num, inst, tid); decode.squash(tid); rename.squash(squash_seq_num, tid); iew.squash(tid); diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index c51658104..4a4ac0902 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -312,8 +312,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const TheISA::PCState &newPC, - const InstSeqNum &seq_num, ThreadID tid); + void squash(const TheISA::PCState &newPC, const InstSeqNum &seq_num, + DynInstPtr &squashInst, ThreadID tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 5f9be039f..6c1ac456d 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -815,11 +815,14 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const TheISA::PCState &newPC, - const InstSeqNum &seq_num, ThreadID tid) + const InstSeqNum &seq_num, DynInstPtr &squashInst, + ThreadID tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid); doSquash(newPC, tid); + if (squashInst) + predecoder.reset(squashInst->staticInst->machInst); // Tell the CPU to remove any instructions that are not in the ROB. cpu->removeInstsNotInROB(tid); diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc index f619dd11b..60aa857ac 100644 --- a/src/kern/linux/events.cc +++ b/src/kern/linux/events.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * -- cgit v1.2.3 From e65f480d62e0112e89af6130e2f2024d89417df0 Mon Sep 17 00:00:00 2001 From: Matt Horsnell Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: ARM: Rename registers used as temporary state by microops. --- src/arch/arm/isa/insts/macromem.isa | 32 ++++++++++++++++---------------- src/arch/arm/isa/operands.isa | 6 +++--- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index 33d57a60b..0e3bcc648 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -51,7 +51,7 @@ let {{ microLdrUopIop = InstObjParams('ldr_uop', 'MicroLdrUop', 'MicroMemOp', {'memacc_code': microLdrUopCode, - 'ea_code': 'EA = Rb + (up ? imm : -imm);', + 'ea_code': 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -60,7 +60,7 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrFpUopCode, 'ea_code': vfpEnabledCheckCode + - 'EA = Rb + (up ? imm : -imm);', + 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -69,7 +69,7 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrFpUopCode, 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) + + EA = URb + (up ? imm : -imm) + (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -80,7 +80,7 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrFpUopCode, 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) - + EA = URb + (up ? imm : -imm) - (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -101,16 +101,16 @@ let {{ 'MicroMemOp', {'memacc_code': microLdrRetUopCode, 'ea_code': - 'EA = Rb + (up ? imm : -imm);', + 'EA = URb + (up ? imm : -imm);', 'predicate_test': condPredicateTest}, ['IsMicroop','IsNonSpeculative','IsSerializeAfter']) - microStrUopCode = "Mem = cSwap(Ra.uw, ((CPSR)Cpsr).e);" + microStrUopCode = "Mem = cSwap(URa.uw, ((CPSR)Cpsr).e);" microStrUopIop = InstObjParams('str_uop', 'MicroStrUop', 'MicroMemOp', {'memacc_code': microStrUopCode, 'postacc_code': "", - 'ea_code': 'EA = Rb + (up ? imm : -imm);', + 'ea_code': 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -120,7 +120,7 @@ let {{ {'memacc_code': microStrFpUopCode, 'postacc_code': "", 'ea_code': vfpEnabledCheckCode + - 'EA = Rb + (up ? imm : -imm);', + 'EA = URb + (up ? imm : -imm);', 'predicate_test': predicateTest}, ['IsMicroop']) @@ -130,7 +130,7 @@ let {{ {'memacc_code': microStrFpUopCode, 'postacc_code': "", 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) + + EA = URb + (up ? imm : -imm) + (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -142,7 +142,7 @@ let {{ {'memacc_code': microStrFpUopCode, 'postacc_code': "", 'ea_code': vfpEnabledCheckCode + ''' - EA = Rb + (up ? imm : -imm) - + EA = URb + (up ? imm : -imm) - (((CPSR)Cpsr).e ? 4 : 0); ''', 'predicate_test': predicateTest}, @@ -170,7 +170,7 @@ let {{ let {{ exec_output = header_output = '' - eaCode = 'EA = Ra + imm;' + eaCode = 'EA = URa + imm;' for size in (1, 2, 3, 4, 6, 8, 12, 16): # Set up the memory access. @@ -572,14 +572,14 @@ let {{ let {{ microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop', 'MicroIntImmOp', - {'code': 'Ra = Rb + imm;', + {'code': 'URa = URb + imm;', 'predicate_test': predicateTest}, ['IsMicroop']) microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', 'MicroIntRegOp', {'code': - '''Ra = Rb + shift_rm_imm(Rc, shiftAmt, + '''URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, CondCodes<29:>); ''', @@ -588,14 +588,14 @@ let {{ microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop', 'MicroIntImmOp', - {'code': 'Ra = Rb - imm;', + {'code': 'URa = URb - imm;', 'predicate_test': predicateTest}, ['IsMicroop']) microSubUopIop = InstObjParams('sub_uop', 'MicroSubUop', 'MicroIntRegOp', {'code': - '''Ra = Rb - shift_rm_imm(Rc, shiftAmt, + '''URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, CondCodes<29:>); ''', @@ -604,7 +604,7 @@ let {{ microUopRegMovIop = InstObjParams('uopReg_uop', 'MicroUopRegMov', 'MicroIntMov', - {'code': 'IWRa = Rb;', + {'code': 'IWRa = URb;', 'predicate_test': predicateTest}, ['IsMicroop']) diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index f403f9372..7b014acd0 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -228,11 +228,11 @@ def operands {{ 'SevMailbox': cntrlRegNC('MISCREG_SEV_MAILBOX'), #Register fields for microops - 'Ra' : intReg('ura'), + 'URa' : intReg('ura'), 'IWRa' : intRegIWPC('ura'), 'Fa' : floatReg('ura'), - 'Rb' : intReg('urb'), - 'Rc' : intReg('urc'), + 'URb' : intReg('urb'), + 'URc' : intReg('urc'), #Memory Operand 'Mem': ('Mem', 'uw', None, ('IsMemRef', 'IsLoad', 'IsStore'), srtNormal), -- cgit v1.2.3 From 031f396c71e750fede19651ba3a14e262a87e117 Mon Sep 17 00:00:00 2001 From: Matt Horsnell Date: Thu, 17 Mar 2011 19:20:19 -0500 Subject: ARM: Fix RFE macrop. This changes the RFE macroop into 3 microops: URa = [sp]; URb = [sp+4]; // load CPSR,PC values from stack sp = sp + offset; // optionally auto-increment PC = URa; CPSR = URb; // write to the PC and CPSR. Importantly: - writing to PC is handled in the last micro-op. - loading occurs prior to state changes. --- src/arch/arm/insts/macromem.cc | 9 +++++++++ src/arch/arm/insts/macromem.hh | 21 ++++++++++++++++++++ src/arch/arm/insts/mem.hh | 8 ++++++-- src/arch/arm/intregs.hh | 2 ++ src/arch/arm/isa/insts/ldr.isa | 25 +++++++++-------------- src/arch/arm/isa/insts/macromem.isa | 31 ++++++++++++++++++++++++++--- src/arch/arm/isa/insts/mem.isa | 25 +++++++++++++++++++---- src/arch/arm/isa/templates/macromem.isa | 35 +++++++++++++++++++++++++++++++++ src/arch/arm/isa/templates/mem.isa | 24 +++++++++++++--------- 9 files changed, 146 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 2a45cf2e6..c03b7ccc1 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -895,6 +895,15 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + ss << "[PC,CPSR]"; + return ss.str(); +} + std::string MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const { diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index 1a2db8b9a..4933a1e7c 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -134,6 +134,27 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp { } }; + +/** + * Microops of the form + * PC = IntRegA + * CPSR = IntRegB + */ +class MicroSetPCCPSR : public MicroOp +{ + protected: + IntRegIndex ura, urb, urc; + + MicroSetPCCPSR(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex _ura, IntRegIndex _urb, IntRegIndex _urc) + : MicroOp(mnem, machInst, __opClass), + ura(_ura), urb(_urb), urc(_urc) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB */ diff --git a/src/arch/arm/insts/mem.hh b/src/arch/arm/insts/mem.hh index a4fc62603..324d86fed 100644 --- a/src/arch/arm/insts/mem.hh +++ b/src/arch/arm/insts/mem.hh @@ -97,14 +97,18 @@ class RfeOp : public MightBeMicro IntRegIndex base; AddrMode mode; bool wb; - static const unsigned numMicroops = 2; + IntRegIndex ura, urb, urc; + static const unsigned numMicroops = 3; StaticInstPtr *uops; RfeOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, IntRegIndex _base, AddrMode _mode, bool _wb) : MightBeMicro(mnem, _machInst, __opClass), - base(_base), mode(_mode), wb(_wb), uops(NULL) + base(_base), mode(_mode), wb(_wb), + ura(INTREG_UREG0), urb(INTREG_UREG1), + urc(INTREG_UREG2), + uops(NULL) {} virtual diff --git a/src/arch/arm/intregs.hh b/src/arch/arm/intregs.hh index 4b2cc560d..2cbed6c59 100644 --- a/src/arch/arm/intregs.hh +++ b/src/arch/arm/intregs.hh @@ -110,6 +110,8 @@ enum IntRegIndex INTREG_ZERO, // Dummy zero reg since there has to be one. INTREG_UREG0, + INTREG_UREG1, + INTREG_UREG2, INTREG_CONDCODES, INTREG_FPCONDCODES, diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index c60a91a50..2e45f2875 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -67,7 +67,7 @@ let {{ self.memFlags = ["ArmISA::TLB::MustBeOne"] self.codeBlobs = {"postacc_code" : ""} - def emitHelper(self, base = 'Memory', wbDecl = None, instFlags = []): + def emitHelper(self, base = 'Memory', wbDecl = None, instFlags = [], pcDecl = None): global header_output, decoder_output, exec_output @@ -76,7 +76,8 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, instFlags, base, wbDecl) + self.memFlags, instFlags, base, + wbDecl, pcDecl) header_output += newHeader decoder_output += newDecoder @@ -104,26 +105,18 @@ let {{ wbDiff = 8 accCode = ''' CPSR cpsr = Cpsr; - SCTLR sctlr = Sctlr; - // Use the version of NPC that gets set before NextThumb - pNPC = cSwap(Mem.ud, cpsr.e); - uint32_t tempSpsr = cSwap(Mem.ud >> 32, cpsr.e); - uint32_t newCpsr = - cpsrWriteByInstr(cpsr | CondCodes, tempSpsr, - 0xF, true, sctlr.nmfi); - Cpsr = ~CondCodesMask & newCpsr; - NextThumb = ((CPSR)newCpsr).t; - NextJazelle = ((CPSR)newCpsr).j; - ForcedItState = ((((CPSR)tempSpsr).it2 << 2) & 0xFC) - | (((CPSR)tempSpsr).it1 & 0x3); - CondCodes = CondCodesMask & newCpsr; + URc = cpsr | CondCodes; + URa = cSwap(Mem.ud, cpsr.e); + URb = cSwap(Mem.ud >> 32, cpsr.e); ''' self.codeBlobs["memacc_code"] = accCode wbDecl = None + pcDecl = "MicroUopSetPCCPSR(machInst, INTREG_UREG0, INTREG_UREG1, INTREG_UREG2);" + if self.writeback: wbDecl = "MicroAddiUop(machInst, base, base, %d);" % wbDiff - self.emitHelper('RfeOp', wbDecl, ["IsSerializeAfter", "IsNonSpeculative"]) + self.emitHelper('RfeOp', wbDecl, ["IsSerializeAfter", "IsNonSpeculative"], pcDecl) class LoadImmInst(LoadInst): def __init__(self, *args, **kargs): diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index 0e3bcc648..d6c929353 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -608,23 +608,48 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + setPCCPSRDecl = ''' + CPSR cpsrOrCondCodes = URc; + SCTLR sctlr = Sctlr; + pNPC = URa; + uint32_t newCpsr = + cpsrWriteByInstr(cpsrOrCondCodes, URb, + 0xF, true, sctlr.nmfi); + Cpsr = ~CondCodesMask & newCpsr; + NextThumb = ((CPSR)newCpsr).t; + NextJazelle = ((CPSR)newCpsr).j; + ForcedItState = ((((CPSR)URb).it2 << 2) & 0xFC) + | (((CPSR)URb).it1 & 0x3); + CondCodes = CondCodesMask & newCpsr; + ''' + + microUopSetPCCPSRIop = InstObjParams('uopSet_uop', 'MicroUopSetPCCPSR', + 'MicroSetPCCPSR', + {'code': setPCCPSRDecl, + 'predicate_test': predicateTest}, + ['IsMicroop']) + header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ MicroIntRegDeclare.subst(microAddUopIop) + \ MicroIntRegDeclare.subst(microSubUopIop) + \ - MicroIntMovDeclare.subst(microUopRegMovIop) + MicroIntMovDeclare.subst(microUopRegMovIop) + \ + MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop) decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ MicroIntRegConstructor.subst(microAddUopIop) + \ MicroIntRegConstructor.subst(microSubUopIop) + \ - MicroIntMovConstructor.subst(microUopRegMovIop) + MicroIntMovConstructor.subst(microUopRegMovIop) + \ + MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop) exec_output = PredOpExecute.subst(microAddiUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ PredOpExecute.subst(microAddUopIop) + \ PredOpExecute.subst(microSubUopIop) + \ - PredOpExecute.subst(microUopRegMovIop) + PredOpExecute.subst(microUopRegMovIop) + \ + PredOpExecute.subst(microUopSetPCCPSRIop) + }}; let {{ diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index 507f8cd4b..d0c0f0710 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -48,7 +48,7 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory', wbDecl = None): + base = 'Memory', wbDecl = None, pcDecl = None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -65,12 +65,26 @@ let {{ macroName = Name instFlagsCopy = list(instFlags) codeBlobsCopy = dict(codeBlobs) - if wbDecl is not None: + + use_uops = 0 + if wbDecl is not None or pcDecl is not None: instFlagsCopy.append('IsMicroop') Name = Name + 'Acc' + use_uops = 1 + + use_wb = 0 + use_pc = 0 + if wbDecl is not None: + use_wb = 1 + if pcDecl is not None: + use_pc = 1 + codeBlobsCopy['acc_name'] = Name codeBlobsCopy['wb_decl'] = wbDecl + codeBlobsCopy['pc_decl'] = pcDecl codeBlobsCopy['use_uops'] = 0 + codeBlobsCopy['use_wb'] = 0 + codeBlobsCopy['use_pc'] = 0 iop = InstObjParams(name, Name, base, codeBlobsCopy, instFlagsCopy) @@ -81,11 +95,14 @@ let {{ self.initiateAccTemplate.subst(iop) + \ self.completeAccTemplate.subst(iop) - if wbDecl is not None: + if wbDecl is not None or pcDecl is not None: iop = InstObjParams(name, macroName, base, { "wb_decl" : wbDecl, + "pc_decl" : pcDecl, "acc_name" : Name, - "use_uops" : 1 }, + "use_uops" : use_uops, + "use_pc" : use_pc, + "use_wb" : use_wb }, ['IsMacroop']) header_output += self.declareTemplate.subst(iop) decoder_output += self.constructTemplate.subst(iop) diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa index b7ca7fa48..a7f7f0da8 100644 --- a/src/arch/arm/isa/templates/macromem.isa +++ b/src/arch/arm/isa/templates/macromem.isa @@ -107,6 +107,41 @@ def template MicroNeonMemDeclare {{ }; }}; +//////////////////////////////////////////////////////////////////// +// +// PC = Integer(ura) +// CPSR = Integer(urb) +// + +def template MicroSetPCCPSRDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, + IntRegIndex _ura, + IntRegIndex _urb, + IntRegIndex _urc); + %(BasicExecDeclare)s + }; +}}; + +def template MicroSetPCCPSRConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _ura, + IntRegIndex _urb, + IntRegIndex _urc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _urc) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + //////////////////////////////////////////////////////////////////// // // Integer = Integer op Integer microops diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index 3d073b322..dcfd47ace 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -917,9 +917,9 @@ def template CompleteAccDeclare {{ def template RfeConstructor {{ inline %(class_name)s::%(class_name)s(ExtMachInst machInst, - uint32_t _base, int _mode, bool _wb) - : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, - (IntRegIndex)_base, (AddrMode)_mode, _wb) + uint32_t _base, int _mode, bool _wb) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_base, (AddrMode)_mode, _wb) { %(constructor)s; if (!(condCode == COND_AL || condCode == COND_UC)) { @@ -928,12 +928,18 @@ def template RfeConstructor {{ } } #if %(use_uops)d - assert(numMicroops >= 2); - uops = new StaticInstPtr[numMicroops]; - uops[0] = new %(acc_name)s(machInst, _base, _mode, _wb); - uops[0]->setDelayedCommit(); - uops[1] = new %(wb_decl)s; - uops[1]->setLastMicroop(); + uops = new StaticInstPtr[1 + %(use_wb)d + %(use_pc)d]; + int uopIdx = 0; + uops[uopIdx] = new %(acc_name)s(machInst, _base, _mode, _wb); + uops[uopIdx]->setDelayedCommit(); +#if %(use_wb)d + uops[++uopIdx] = new %(wb_decl)s; + uops[uopIdx]->setDelayedCommit(); +#endif +#if %(use_pc)d + uops[++uopIdx] = new %(pc_decl)s; +#endif + uops[uopIdx]->setLastMicroop(); #endif } }}; -- cgit v1.2.3 From fe3d790ac8da41e8a0b9af93510cd874585c37e7 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:20 -0500 Subject: ARM: Allow conditional quiesce instructions. This patch prevents not executed conditional instructions marked as IsQuiesce from stalling the pipeline indefinitely. If the instruction is not executed the quiesceSkip psuedoinst is called which schedules a wakes up call to the fetch stage. --- src/arch/arm/isa/insts/m5ops.isa | 6 +++--- src/arch/arm/isa/insts/misc.isa | 20 +++++++++++++------- src/arch/arm/isa/templates/pred.isa | 32 ++++++++++++++++++++++++++++++++ src/sim/pseudo_inst.cc | 34 ++++++++++++++++++++++++++++++++++ src/sim/pseudo_inst.hh | 1 + 5 files changed, 83 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index da3609bbc..8521cbc97 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -66,7 +66,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce"]) header_output += BasicDeclare.subst(quiesceIop) decoder_output += BasicConstructor.subst(quiesceIop) - exec_output += PredOpExecute.subst(quiesceIop) + exec_output += QuiescePredOpExecute.subst(quiesceIop) quiesceNsCode = ''' #if FULL_SYSTEM @@ -80,7 +80,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce"]) header_output += BasicDeclare.subst(quiesceNsIop) decoder_output += BasicConstructor.subst(quiesceNsIop) - exec_output += PredOpExecute.subst(quiesceNsIop) + exec_output += QuiescePredOpExecute.subst(quiesceNsIop) quiesceCyclesCode = ''' #if FULL_SYSTEM @@ -94,7 +94,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"]) header_output += BasicDeclare.subst(quiesceCyclesIop) decoder_output += BasicConstructor.subst(quiesceCyclesIop) - exec_output += PredOpExecute.subst(quiesceCyclesIop) + exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) quiesceTimeCode = ''' #if FULL_SYSTEM diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index be51d927d..cf5c7b47a 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -491,10 +491,13 @@ let {{ wfeCode = ''' #if FULL_SYSTEM - if (SevMailbox) + if (SevMailbox) { SevMailbox = 0; - else + PseudoInst::quiesceSkip(xc->tcBase()); + } + else { PseudoInst::quiesce(xc->tcBase()); + } #endif ''' wfeIop = InstObjParams("wfe", "WfeInst", "PredOp", \ @@ -502,7 +505,7 @@ let {{ ["IsNonSpeculative", "IsQuiesce", "IsSerializeAfter"]) header_output += BasicDeclare.subst(wfeIop) decoder_output += BasicConstructor.subst(wfeIop) - exec_output += PredOpExecute.subst(wfeIop) + exec_output += QuiescePredOpExecute.subst(wfeIop) wfiCode = ''' #if FULL_SYSTEM @@ -511,22 +514,25 @@ let {{ ''' wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \ { "code" : wfiCode, "predicate_test" : predicateTest }, - ["IsNonSpeculative", "IsQuiesce"]) + ["IsNonSpeculative", "IsQuiesce", "IsSerializeAfter"]) header_output += BasicDeclare.subst(wfiIop) decoder_output += BasicConstructor.subst(wfiIop) - exec_output += PredOpExecute.subst(wfiIop) + exec_output += QuiescePredOpExecute.subst(wfiIop) sevCode = ''' // Need a way for O3 to not scoreboard these accesses as pipe flushes. + SevMailbox = 1; System *sys = xc->tcBase()->getSystemPtr(); for (int x = 0; x < sys->numContexts(); x++) { ThreadContext *oc = sys->getThreadContext(x); - oc->setMiscReg(MISCREG_SEV_MAILBOX, 1); + if (oc != xc->tcBase()) { + oc->setMiscReg(MISCREG_SEV_MAILBOX, 1); + } } ''' sevIop = InstObjParams("sev", "SevInst", "PredOp", \ { "code" : sevCode, "predicate_test" : predicateTest }, - ["IsNonSpeculative", "IsQuiesce", "IsSerializeAfter"]) + ["IsNonSpeculative", "IsSquashAfter"]) header_output += BasicDeclare.subst(sevIop) decoder_output += BasicConstructor.subst(sevIop) exec_output += PredOpExecute.subst(sevIop) diff --git a/src/arch/arm/isa/templates/pred.isa b/src/arch/arm/isa/templates/pred.isa index c9e7b1803..2a4bd9dab 100644 --- a/src/arch/arm/isa/templates/pred.isa +++ b/src/arch/arm/isa/templates/pred.isa @@ -170,6 +170,38 @@ def template PredOpExecute {{ } }}; +def template QuiescePredOpExecute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + uint64_t resTemp = 0; + resTemp = resTemp; + %(op_decl)s; + %(op_rd)s; + + if (%(predicate_test)s) + { + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + } else { + xc->setPredicate(false); +#if FULL_SYSTEM + PseudoInst::quiesceSkip(xc->tcBase()); +#endif + } + + if (fault == NoFault && machInst.itstateMask != 0&& + (!isMicroop() || isLastMicroop())) { + xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate); + } + + return fault; + } +}}; + def template DataDecode {{ if (machInst.opcode4 == 0) { if (machInst.sField == 0) diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index f3b10f6d2..bcff2f5c1 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2006 The Regents of The University of Michigan * All rights reserved. * @@ -85,6 +97,28 @@ quiesce(ThreadContext *tc) tc->getKernelStats()->quiesce(); } +void +quiesceSkip(ThreadContext *tc) +{ + BaseCPU *cpu = tc->getCpuPtr(); + + if (!cpu->params()->do_quiesce) + return; + + EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent(); + + Tick resume = curTick() + 1; + + cpu->reschedule(quiesceEvent, resume, true); + + DPRINTF(Quiesce, "%s: quiesceSkip() until %d\n", + cpu->name(), resume); + + tc->suspend(); + if (tc->getKernelStats()) + tc->getKernelStats()->quiesce(); +} + void quiesceNs(ThreadContext *tc, uint64_t ns) { diff --git a/src/sim/pseudo_inst.hh b/src/sim/pseudo_inst.hh index 296b1556b..aec3b5d8a 100644 --- a/src/sim/pseudo_inst.hh +++ b/src/sim/pseudo_inst.hh @@ -45,6 +45,7 @@ extern bool doQuiesce; #if FULL_SYSTEM void arm(ThreadContext *tc); void quiesce(ThreadContext *tc); +void quiesceSkip(ThreadContext *tc); void quiesceNs(ThreadContext *tc, uint64_t ns); void quiesceCycles(ThreadContext *tc, uint64_t cycles); uint64_t quiesceTime(ThreadContext *tc); -- cgit v1.2.3 From b78be240cf1c1269ed83548bf71095193487ca33 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 17 Mar 2011 19:20:20 -0500 Subject: ARM: Detect and skip udelay() functions in linux kernel. This change speeds up booting, especially in MP cases, by not executing udelay() on the core but instead skipping ahead tha amount of time that is being delayed. --- src/arch/arm/linux/system.cc | 24 ++++++++++++++++++++++++ src/arch/arm/linux/system.hh | 13 +++++++++++++ src/cpu/simple/atomic.cc | 3 +++ src/cpu/simple/timing.cc | 4 ++++ src/kern/linux/events.cc | 25 +++++++++++++++++++++++++ src/kern/linux/events.hh | 27 +++++++++++++++++++++++++++ 6 files changed, 96 insertions(+) (limited to 'src') diff --git a/src/arch/arm/linux/system.cc b/src/arch/arm/linux/system.cc index 38024c058..7aff2b6ef 100644 --- a/src/arch/arm/linux/system.cc +++ b/src/arch/arm/linux/system.cc @@ -47,9 +47,11 @@ #include "base/loader/object_file.hh" #include "base/loader/symtab.hh" #include "cpu/thread_context.hh" +#include "kern/linux/events.hh" #include "mem/physical.hh" using namespace ArmISA; +using namespace Linux; LinuxArmSystem::LinuxArmSystem(Params *p) : ArmSystem(p) @@ -96,6 +98,24 @@ LinuxArmSystem::LinuxArmSystem(Params *p) if (!kernelPanicEvent) panic("could not find kernel symbol \'panic\'"); #endif + + // With ARM udelay() is #defined to __udelay + Addr addr = 0; + if (kernelSymtab->findAddress("__udelay", addr)) { + uDelaySkipEvent = new UDelayEvent(&pcEventQueue, "__udelay", + fixFuncEventAddr(addr), 1000, 0); + } else { + panic("couldn't find kernel symbol \'udelay\'"); + } + + // constant arguments to udelay() have some precomputation done ahead of + // time. Constant comes from code. + if (kernelSymtab->findAddress("__const_udelay", addr)) { + constUDelaySkipEvent = new UDelayEvent(&pcEventQueue, "__const_udelay", + fixFuncEventAddr(addr), 1000, 107374); + } else { + panic("couldn't find kernel symbol \'udelay\'"); + } } void @@ -115,6 +135,10 @@ LinuxArmSystem::initState() LinuxArmSystem::~LinuxArmSystem() { + if (uDelaySkipEvent) + delete uDelaySkipEvent; + if (constUDelaySkipEvent) + delete constUDelaySkipEvent; } LinuxArmSystem * diff --git a/src/arch/arm/linux/system.hh b/src/arch/arm/linux/system.hh index 4e5ebcd73..2ef65fea2 100644 --- a/src/arch/arm/linux/system.hh +++ b/src/arch/arm/linux/system.hh @@ -74,6 +74,19 @@ class LinuxArmSystem : public ArmSystem /** Event to halt the simulator if the kernel calls panic() */ BreakPCEvent *kernelPanicEvent; #endif + /** + * PC based event to skip udelay(