From d53d04473e0d6ca1765f1117072eec59187a7f7b Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Tue, 28 Aug 2012 14:30:31 -0400 Subject: Clock: Rework clocks to avoid tick-to-cycle transformations This patch introduces the notion of a clock update function that aims to avoid costly divisions when turning the current tick into a cycle. Each clocked object advances a private (hidden) cycle member and a tick member and uses these to implement functions for getting the tick of the next cycle, or the tick of a cycle some time in the future. In the different modules using the clocks, changes are made to avoid counting in ticks only to later translate to cycles. There are a few oddities in how the O3 and inorder CPU count idle cycles, as seen by a few locations where a cycle is subtracted in the calculation. This is done such that the regression does not change any stats, but should be revisited in a future patch. Another, much needed, change that is not done as part of this patch is to introduce a new typedef uint64_t Cycle to be able to at least hint at the unit of the variables counting Ticks vs Cycles. This will be done as a follow-up patch. As an additional follow up, the thread context still uses ticks for the book keeping of last activate and last suspend and this should probably also be changed into cycles as well. --- src/cpu/simple/atomic.cc | 10 +++++----- src/cpu/simple/timing.cc | 42 +++++++++++++++++++----------------------- src/cpu/simple/timing.hh | 2 +- 3 files changed, 25 insertions(+), 29 deletions(-) (limited to 'src/cpu/simple') diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index fc6724939..d1b0391fc 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -208,10 +208,10 @@ AtomicSimpleCPU::activateContext(ThreadID thread_num, int delay) assert(!tickEvent.scheduled()); notIdleFraction++; - numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend); + numCycles += tickToCycle(thread->lastActivate - thread->lastSuspend); //Make sure ticks are still on multiples of cycles - schedule(tickEvent, nextCycle(curTick() + ticks(delay))); + schedule(tickEvent, clockEdge(delay)); _status = Running; } @@ -518,7 +518,7 @@ AtomicSimpleCPU::tick() stall_ticks += dcache_latency; if (stall_ticks) { - Tick stall_cycles = stall_ticks / ticks(1); + Tick stall_cycles = stall_ticks / clockPeriod(); Tick aligned_stall_ticks = ticks(stall_cycles); if (aligned_stall_ticks < stall_ticks) @@ -533,8 +533,8 @@ AtomicSimpleCPU::tick() } // instruction takes at least one cycle - if (latency < ticks(1)) - latency = ticks(1); + if (latency < clockPeriod()) + latency = clockPeriod(); if (_status != Idle) schedule(tickEvent, curTick() + latency); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 9022845ce..5437e77aa 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -87,13 +87,11 @@ TimingSimpleCPU::TimingCPUPort::TickEvent::schedule(PacketPtr _pkt, Tick t) TimingSimpleCPU::TimingSimpleCPU(TimingSimpleCPUParams *p) : BaseSimpleCPU(p), fetchTranslation(this), icachePort(this), - dcachePort(this), fetchEvent(this) + dcachePort(this), ifetch_pkt(NULL), dcache_pkt(NULL), previousCycle(0), + fetchEvent(this) { _status = Idle; - ifetch_pkt = dcache_pkt = NULL; - drainEvent = NULL; - previousTick = 0; changeState(SimObject::Running); system->totalNumInsts = 0; } @@ -156,7 +154,7 @@ TimingSimpleCPU::switchOut() { assert(_status == Running || _status == Idle); _status = SwitchedOut; - numCycles += tickToCycles(curTick() - previousTick); + numCycles += curCycle() - previousCycle; // If we've been scheduled to resume but are then told to switch out, // we'll need to cancel it. @@ -184,7 +182,7 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) _status = Idle; } assert(threadContexts.size() == 1); - previousTick = curTick(); + previousCycle = curCycle(); } @@ -202,7 +200,7 @@ TimingSimpleCPU::activateContext(ThreadID thread_num, int delay) _status = Running; // kick things off by initiating the fetch of the next instruction - schedule(fetchEvent, nextCycle(curTick() + ticks(delay))); + schedule(fetchEvent, clockEdge(delay)); } @@ -231,9 +229,8 @@ TimingSimpleCPU::handleReadPacket(PacketPtr pkt) { RequestPtr req = pkt->req; if (req->isMmappedIpr()) { - Tick delay; - delay = TheISA::handleIprRead(thread->getTC(), pkt); - new IprEvent(pkt, this, nextCycle(curTick() + delay)); + Tick delay = TheISA::handleIprRead(thread->getTC(), pkt); + new IprEvent(pkt, this, clockEdge(delay)); _status = DcacheWaitResponse; dcache_pkt = NULL; } else if (!dcachePort.sendTimingReq(pkt)) { @@ -322,8 +319,8 @@ TimingSimpleCPU::translationFault(Fault fault) { // fault may be NoFault in cases where a fault is suppressed, // for instance prefetches. - numCycles += tickToCycles(curTick() - previousTick); - previousTick = curTick(); + numCycles += curCycle() - previousCycle; + previousCycle = curCycle(); if (traceData) { // Since there was a fault, we shouldn't trace this instruction. @@ -446,9 +443,8 @@ TimingSimpleCPU::handleWritePacket() { RequestPtr req = dcache_pkt->req; if (req->isMmappedIpr()) { - Tick delay; - delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt); - new IprEvent(dcache_pkt, this, nextCycle(curTick() + delay)); + Tick delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt); + new IprEvent(dcache_pkt, this, clockEdge(delay)); _status = DcacheWaitResponse; dcache_pkt = NULL; } else if (!dcachePort.sendTimingReq(dcache_pkt)) { @@ -567,8 +563,8 @@ TimingSimpleCPU::fetch() _status = IcacheWaitResponse; completeIfetch(NULL); - numCycles += tickToCycles(curTick() - previousTick); - previousTick = curTick(); + numCycles += curCycle() - previousCycle; + previousCycle = curCycle(); } } @@ -600,8 +596,8 @@ TimingSimpleCPU::sendFetch(Fault fault, RequestPtr req, ThreadContext *tc) advanceInst(fault); } - numCycles += tickToCycles(curTick() - previousTick); - previousTick = curTick(); + numCycles += curCycle() - previousCycle; + previousCycle = curCycle(); } @@ -647,8 +643,8 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) _status = Running; - numCycles += tickToCycles(curTick() - previousTick); - previousTick = curTick(); + numCycles += curCycle() - previousCycle; + previousCycle = curCycle(); if (getState() == SimObject::Draining) { if (pkt) { @@ -754,8 +750,8 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) assert(_status == DcacheWaitResponse || _status == DTBWaitResponse || pkt->req->getFlags().isSet(Request::NO_ACCESS)); - numCycles += tickToCycles(curTick() - previousTick); - previousTick = curTick(); + numCycles += curCycle() - previousCycle; + previousCycle = curCycle(); if (pkt->senderState) { SplitFragmentSenderState * send_state = diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index b6b78c5db..c4d1573af 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -234,7 +234,7 @@ class TimingSimpleCPU : public BaseSimpleCPU PacketPtr ifetch_pkt; PacketPtr dcache_pkt; - Tick previousTick; + Tick previousCycle; protected: -- cgit v1.2.3