diff options
Diffstat (limited to 'src')
34 files changed, 1013 insertions, 301 deletions
diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 274407be5..757c9e7b7 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -58,6 +58,7 @@ baseFlags = [ 'BusAddrRanges', 'BusBridge', 'Cache', + 'CachePort', 'Chains', 'Checker', 'Clock', @@ -93,6 +94,7 @@ baseFlags = [ 'Flow', 'FreeList', 'FullCPU', + 'FunctionalAccess', 'GDBAcc', 'GDBExtra', 'GDBMisc', @@ -121,6 +123,7 @@ baseFlags = [ 'MSHR', 'Mbox', 'MemDepUnit', + 'MemoryAccess', 'O3CPU', 'OzoneCPU', 'OzoneLSQ', diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index 8c0186dae..b2806d40b 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -133,7 +133,7 @@ class CheckerThreadContext : public ThreadContext void takeOverFrom(ThreadContext *oldContext) { actualTC->takeOverFrom(oldContext); - checkerTC->takeOverFrom(oldContext); + checkerTC->copyState(oldContext); } void regStats(const std::string &name) { actualTC->regStats(name); } diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 609a07a8e..024cd7e41 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -71,7 +71,11 @@ MemTest::CpuPort::recvAtomic(Packet *pkt) void MemTest::CpuPort::recvFunctional(Packet *pkt) { - memtest->completeRequest(pkt); + //Do nothing if we see one come through + if (curTick != 0)//Supress warning durring initialization + warn("Functional Writes not implemented in MemTester\n"); + //Need to find any response values that intersect and update + return; } void @@ -89,6 +93,20 @@ MemTest::CpuPort::recvRetry() memtest->doRetry(); } +void +MemTest::sendPkt(Packet *pkt) { + if (atomic) { + cachePort.sendAtomic(pkt); + pkt->makeAtomicResponse(); + completeRequest(pkt); + } + else if (!cachePort.sendTiming(pkt)) { + accessRetry = true; + retryPkt = pkt; + } + +} + MemTest::MemTest(const string &name, // MemInterface *_cache_interface, // PhysicalMemory *main_mem, @@ -101,7 +119,8 @@ MemTest::MemTest(const string &name, unsigned _percentSourceUnaligned, unsigned _percentDestUnaligned, Addr _traceAddr, - Counter _max_loads) + Counter _max_loads, + bool _atomic) : MemObject(name), tickEvent(this), cachePort("test", this), @@ -117,7 +136,8 @@ MemTest::MemTest(const string &name, nextProgressMessage(_progressInterval), percentSourceUnaligned(_percentSourceUnaligned), percentDestUnaligned(percentDestUnaligned), - maxLoads(_max_loads) + maxLoads(_max_loads), + atomic(_atomic) { vector<string> cmd; cmd.push_back("/bin/ls"); @@ -325,7 +345,7 @@ MemTest::tick() } else { paddr = ((base) ? baseAddr1 : baseAddr2) + offset; } - // bool probe = (random() % 2 == 1) && !req->isUncacheable(); + //bool probe = (random() % 2 == 1) && !req->isUncacheable(); bool probe = false; paddr &= ~((1 << access_size) - 1); @@ -340,7 +360,11 @@ MemTest::tick() //For now we only allow one outstanding request per addreess per tester //This means we assume CPU does write forwarding to reads that alias something //in the cpu store buffer. - if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; + if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) { + delete result; + delete req; + return; + } else outstandingAddrs.insert(paddr); // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin @@ -364,13 +388,10 @@ MemTest::tick() if (probe) { cachePort.sendFunctional(pkt); -// completeRequest(pkt, result); + completeRequest(pkt); } else { // req->completionEvent = new MemCompleteEvent(req, result, this); - if (!cachePort.sendTiming(pkt)) { - accessRetry = true; - retryPkt = pkt; - } + sendPkt(pkt); } } else { // write @@ -378,7 +399,12 @@ MemTest::tick() //For now we only allow one outstanding request per addreess per tester //This means we assume CPU does write forwarding to reads that alias something //in the cpu store buffer. - if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; + if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) { + delete [] result; + delete req; + return; + } + else outstandingAddrs.insert(paddr); /* @@ -405,13 +431,10 @@ MemTest::tick() if (probe) { cachePort.sendFunctional(pkt); -// completeRequest(req, NULL); + completeRequest(pkt); } else { // req->completionEvent = new MemCompleteEvent(req, NULL, this); - if (!cachePort.sendTiming(pkt)) { - accessRetry = true; - retryPkt = pkt; - } + sendPkt(pkt); } } /* else { @@ -483,6 +506,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) Param<unsigned> percent_dest_unaligned; Param<Addr> trace_addr; Param<Counter> max_loads; + Param<bool> atomic; END_DECLARE_SIM_OBJECT_PARAMS(MemTest) @@ -502,7 +526,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) INIT_PARAM(percent_dest_unaligned, "percent of copy dest address that are unaligned"), INIT_PARAM(trace_addr, "address to trace"), - INIT_PARAM(max_loads, "terminate when we have reached this load count") + INIT_PARAM(max_loads, "terminate when we have reached this load count"), + INIT_PARAM(atomic, "Is the tester testing atomic mode (or timing)") END_INIT_SIM_OBJECT_PARAMS(MemTest) @@ -513,7 +538,7 @@ CREATE_SIM_OBJECT(MemTest) /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/ percent_uncacheable, progress_interval, percent_source_unaligned, percent_dest_unaligned, - trace_addr, max_loads); + trace_addr, max_loads, atomic); } REGISTER_SIM_OBJECT("MemTest", MemTest) diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 278012eba..5de41f0d8 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -61,7 +61,8 @@ class MemTest : public MemObject unsigned _percentSourceUnaligned, unsigned _percentDestUnaligned, Addr _traceAddr, - Counter _max_loads); + Counter _max_loads, + bool _atomic); virtual void init(); @@ -113,7 +114,7 @@ class MemTest : public MemObject virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } }; CpuPort cachePort; @@ -175,6 +176,9 @@ class MemTest : public MemObject uint64_t numReads; uint64_t maxLoads; + + bool atomic; + Stats::Scalar<> numReadsStat; Stats::Scalar<> numWritesStat; Stats::Scalar<> numCopiesStat; @@ -182,6 +186,8 @@ class MemTest : public MemObject // called by MemCompleteEvent::process() void completeRequest(Packet *pkt); + void sendPkt(Packet *pkt); + void doRetry(); friend class MemCompleteEvent; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c80e4d8c1..ecf6ed632 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -342,12 +342,6 @@ DefaultCommit<Impl>::drain() { drainPending = true; - // If it's already drained, return true. - if (rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalDrained(); - return true; - } - return false; } @@ -1218,16 +1212,16 @@ DefaultCommit<Impl>::skidInsert() for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) { DynInstPtr inst = fromRename->insts[inst_num]; - int tid = inst->threadNumber; if (!inst->isSquashed()) { DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", - "skidBuffer.\n", inst->readPC(), inst->seqNum, tid); + "skidBuffer.\n", inst->readPC(), inst->seqNum, + inst->threadNumber); skidBuffer.push(inst); } else { DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " "squashed, skipping.\n", - inst->readPC(), inst->seqNum, tid); + inst->readPC(), inst->seqNum, inst->threadNumber); } } } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7386dfadd..4c9a8e91f 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -88,7 +88,7 @@ FullO3CPU<Impl>::TickEvent::description() template <class Impl> FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() - : Event(&mainEventQueue, CPU_Tick_Pri) + : Event(&mainEventQueue, CPU_Switch_Pri) { } @@ -135,7 +135,8 @@ void FullO3CPU<Impl>::DeallocateContextEvent::process() { cpu->deactivateThread(tid); - cpu->removeThread(tid); + if (remove) + cpu->removeThread(tid); } template <class Impl> @@ -191,7 +192,11 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) deferRegistration(params->deferRegistration), numThreads(number_of_threads) { - _status = Idle; + if (!deferRegistration) { + _status = Running; + } else { + _status = Idle; + } checker = NULL; @@ -304,6 +309,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) tid, bindRegs); + + activateThreadEvent[tid].init(tid, this); + deallocateContextEvent[tid].init(tid, this); } rename.setRenameMap(renameMap); @@ -447,13 +455,16 @@ FullO3CPU<Impl>::tick() if (!tickEvent.scheduled()) { if (_status == SwitchedOut || getState() == SimObject::Drained) { + DPRINTF(O3CPU, "Switched out!\n"); // increment stat lastRunningCycle = curTick; - } else if (!activityRec.active()) { + } else if (!activityRec.active() || _status == Idle) { + DPRINTF(O3CPU, "Idle!\n"); lastRunningCycle = curTick; timesIdled++; } else { tickEvent.schedule(curTick + cycles(1)); + DPRINTF(O3CPU, "Scheduling next tick!\n"); } } @@ -512,6 +523,8 @@ FullO3CPU<Impl>::activateThread(unsigned tid) list<unsigned>::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid); + if (isActive == activeThreads.end()) { DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", tid); @@ -528,6 +541,8 @@ FullO3CPU<Impl>::deactivateThread(unsigned tid) list<unsigned>::iterator thread_it = find(activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid); + if (thread_it != activeThreads.end()) { DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); @@ -548,7 +563,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay) activateThread(tid); } - if(lastActivatedCycle < curTick) { + if (lastActivatedCycle < curTick) { scheduleTickEvent(delay); // Be sure to signal that there's some activity so the CPU doesn't @@ -563,17 +578,20 @@ FullO3CPU<Impl>::activateContext(int tid, int delay) } template <class Impl> -void -FullO3CPU<Impl>::deallocateContext(int tid, int delay) +bool +FullO3CPU<Impl>::deallocateContext(int tid, bool remove, int delay) { // Schedule removal of thread data from CPU if (delay){ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleDeallocateContextEvent(tid, delay); + scheduleDeallocateContextEvent(tid, remove, delay); + return false; } else { deactivateThread(tid); - removeThread(tid); + if (remove) + removeThread(tid); + return true; } } @@ -582,8 +600,9 @@ void FullO3CPU<Impl>::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - deactivateThread(tid); - if (activeThreads.size() == 0) + bool deallocated = deallocateContext(tid, false, 1); + // If this was the last thread then unschedule the tick event. + if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0) unscheduleTickEvent(); _status = Idle; } @@ -594,7 +613,7 @@ FullO3CPU<Impl>::haltContext(int tid) { //For now, this is the same as deallocate DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); - deallocateContext(tid, 1); + deallocateContext(tid, true, 1); } template <class Impl> @@ -682,10 +701,17 @@ FullO3CPU<Impl>::removeThread(unsigned tid) assert(iew.ldstQueue.getCount(tid) == 0); // Reset ROB/IQ/LSQ Entries + + // Commented out for now. This should be possible to do by + // telling all the pipeline stages to drain first, and then + // checking until the drain completes. Once the pipeline is + // drained, call resetEntries(). - 10-09-06 ktlim +/* if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } +*/ } @@ -824,7 +850,9 @@ template <class Impl> void FullO3CPU<Impl>::resume() { +#if FULL_SYSTEM assert(system->getMemoryMode() == System::Timing); +#endif fetch.resume(); decode.resume(); rename.resume(); @@ -935,6 +963,25 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) } if (!tickEvent.scheduled()) tickEvent.schedule(curTick); + + Port *peer; + Port *icachePort = fetch.getIcachePort(); + if (icachePort->getPeer() == NULL) { + peer = oldCPU->getPort("icache_port")->getPeer(); + icachePort->setPeer(peer); + } else { + peer = icachePort->getPeer(); + } + peer->setPeer(icachePort); + + Port *dcachePort = iew.getDcachePort(); + if (dcachePort->getPeer() == NULL) { + peer = oldCPU->getPort("dcache_port")->getPeer(); + dcachePort->setPeer(peer); + } else { + peer = dcachePort->getPeer(); + } + peer->setPeer(dcachePort); } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index dcdcd1fe6..fe510519c 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU class DeallocateContextEvent : public Event { private: - /** Number of Thread to Activate */ + /** Number of Thread to deactivate */ int tid; + /** Should the thread be removed from the CPU? */ + bool remove; + /** Pointer to the CPU. */ FullO3CPU<Impl> *cpu; @@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU /** Processes the event, calling activateThread() on the CPU. */ void process(); + /** Sets whether the thread should also be removed from the CPU. */ + void setRemove(bool _remove) { remove = _remove; } + /** Returns the description of the event. */ const char *description(); }; /** Schedule cpu to deallocate thread context.*/ - void scheduleDeallocateContextEvent(int tid, int delay) + void scheduleDeallocateContextEvent(int tid, bool remove, int delay) { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) @@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU void suspendContext(int tid); /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. + * Possibly Remove Thread Context from CPU. */ - void deallocateContext(int tid, int delay = 1); + bool deallocateContext(int tid, bool remove, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; - /** Pointer to the icache interface. */ - MemInterface *icacheInterface; - /** Pointer to the dcache interface. */ - MemInterface *dcacheInterface; - /** Whether or not the CPU should defer its registration. */ bool deferRegistration; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index b3c3caaad..32210f1cd 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -623,6 +623,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Now do the timing access to see whether or not the instruction // exists within the cache. if (!icachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + fault = TheISA::genMachineCheckFault(); + delete mem_req; + memReq[tid] = NULL; + } assert(retryPkt == NULL); assert(retryTid == -1); DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b2baae296..ba5260fe2 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -600,6 +600,11 @@ template<class Impl> void DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) { + // This function should not be called after writebackInsts in a + // single cycle. That will cause problems with an instruction + // being added to the queue to commit without being processed by + // writebackInsts prior to being sent to commit. + // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, @@ -1286,6 +1291,7 @@ DefaultIEW<Impl>::executeInsts() } else if (fault != NoFault) { // If the instruction faulted, then we need to send it along to commit // without the instruction completing. + DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum); // Send this instruction to commit, also make sure iew stage // realizes there is activity. diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 58945f04e..11a02e7c7 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -626,18 +626,30 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(load_inst->memData); - - LSQSenderState *state = new LSQSenderState; - state->isLoad = true; - state->idx = load_idx; - state->inst = load_inst; - data_pkt->senderState = state; - // if we the cache is not blocked, do cache access if (!lsq->cacheBlocked()) { + PacketPtr data_pkt = + new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = load_inst; + data_pkt->senderState = state; + if (!dcachePort->sendTiming(data_pkt)) { + Packet::Result result = data_pkt->result; + + // Delete state and data packet because a load retry + // initiates a pipeline restart; it does not retry. + delete state; + delete data_pkt; + + if (result == Packet::BadAddress) { + return TheISA::genMachineCheckFault(); + } + // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); @@ -664,16 +676,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) return NoFault; } - if (data_pkt->result != Packet::Success) { - DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - load_inst->seqNum); - } else { - DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); - DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", - load_inst->seqNum); - } - return NoFault; } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 63ffcece1..3f9db912f 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -608,9 +608,9 @@ LSQUnit<Impl>::writebackStores() DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), + storeWBIdx, inst->readPC(), req->getPaddr(), *(inst->memData), - storeQueue[storeWBIdx].inst->seqNum); + inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. if (req->isLocked()) { @@ -619,10 +619,19 @@ LSQUnit<Impl>::writebackStores() } else { if (cpu->lockFlag) { req->setScResult(1); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.", + inst->seqNum); } else { req->setScResult(0); // Hack: Instantly complete this store. - completeDataAccess(data_pkt); +// completeDataAccess(data_pkt); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " + "Instantly completing it.\n", + inst->seqNum); + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); + wb->schedule(curTick + 1); + delete state; + completeStore(storeWBIdx); incrStIdx(storeWBIdx); continue; } @@ -633,7 +642,13 @@ LSQUnit<Impl>::writebackStores() } if (!dcachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } // Need to handle becoming blocked on a store. + DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + "retry later\n", + inst->seqNum); isStoreBlocked = true; ++lsqCacheBlocked; assert(retryPkt == NULL); @@ -880,6 +895,9 @@ LSQUnit<Impl>::recvRetry() assert(retryPkt != NULL); if (dcachePort->sendTiming(retryPkt)) { + if (retryPkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } storePostSend(retryPkt); retryPkt = NULL; isStoreBlocked = false; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index 25e1db21c..2bc194d53 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -165,14 +165,14 @@ template <class Impl> void O3ThreadContext<Impl>::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", - getThreadNum()); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n", + getThreadNum(), delay); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid(), delay); + cpu->deallocateContext(thread->readTid(), true, delay); } template <class Impl> diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 88aa882e3..ad5c0e5d6 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -103,6 +103,7 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; + previousTick = 0; changeState(SimObject::Running); } @@ -162,6 +163,7 @@ TimingSimpleCPU::resume() } changeState(SimObject::Running); + previousTick = curTick; } void @@ -169,6 +171,7 @@ TimingSimpleCPU::switchOut() { assert(status() == Running || status() == Idle); _status = SwitchedOut; + numCycles += curTick - previousTick; // If we've been scheduled to resume but are then told to switch out, // we'll need to cancel it. @@ -195,6 +198,23 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) if (_status != Running) { _status = Idle; } + + Port *peer; + if (icachePort.getPeer() == NULL) { + peer = oldCPU->getPort("icache_port")->getPeer(); + icachePort.setPeer(peer); + } else { + peer = icachePort.getPeer(); + } + peer->setPeer(&icachePort); + + if (dcachePort.getPeer() == NULL) { + peer = oldCPU->getPort("dcache_port")->getPeer(); + dcachePort.setPeer(peer); + } else { + peer = dcachePort.getPeer(); + } + peer->setPeer(&dcachePort); } @@ -429,6 +449,9 @@ TimingSimpleCPU::fetch() // fetch fault: advance directly to next instruction (fault handler) advanceInst(fault); } + + numCycles += curTick - previousTick; + previousTick = curTick; } @@ -459,6 +482,9 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) delete pkt->req; delete pkt; + numCycles += curTick - previousTick; + previousTick = curTick; + if (getState() == SimObject::Draining) { completeDrain(); return; @@ -538,6 +564,9 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; + numCycles += curTick - previousTick; + previousTick = curTick; + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); if (pkt->isRead() && pkt->req->isLocked()) { @@ -547,6 +576,8 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) delete pkt->req; delete pkt; + postExecute(); + if (getState() == SimObject::Draining) { advancePC(fault); completeDrain(); @@ -554,7 +585,6 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) return; } - postExecute(); advanceInst(fault); } diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 18e13aeb2..988ddeded 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -167,6 +167,7 @@ class TimingSimpleCPU : public BaseSimpleCPU Packet *dcache_pkt; int cpu_id; + Tick previousTick; public: diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 1646cbd57..b11b6de58 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -61,12 +61,79 @@ Bus::getPort(const std::string &if_name, int idx) void Bus::init() { - std::vector<Port*>::iterator intIter; + std::vector<BusPort*>::iterator intIter; for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++) (*intIter)->sendStatusChange(Port::RangeChange); } +Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) : Event(&mainEventQueue), bus(_bus) +{} + +void Bus::BusFreeEvent::process() +{ + bus->recvRetry(-1); +} + +const char * Bus::BusFreeEvent::description() +{ + return "bus became available"; +} + +void Bus::occupyBus(PacketPtr pkt) +{ + //Bring tickNextIdle up to the present tick + //There is some potential ambiguity where a cycle starts, which might make + //a difference when devices are acting right around a cycle boundary. Using + //a < allows things which happen exactly on a cycle boundary to take up only + //the following cycle. Anthing that happens later will have to "wait" for + //the end of that cycle, and then start using the bus after that. + while (tickNextIdle < curTick) + tickNextIdle += clock; + + // The packet will be sent. Figure out how long it occupies the bus, and + // how much of that time is for the first "word", aka bus width. + int numCycles = 0; + // Requests need one cycle to send an address + if (pkt->isRequest()) + numCycles++; + else if (pkt->isResponse() || pkt->hasData()) { + // If a packet has data, it needs ceil(size/width) cycles to send it + // We're using the "adding instead of dividing" trick again here + if (pkt->hasData()) { + int dataSize = pkt->getSize(); + for (int transmitted = 0; transmitted < dataSize; + transmitted += width) { + numCycles++; + } + } else { + // If the packet didn't have data, it must have been a response. + // Those use the bus for one cycle to send their data. + numCycles++; + } + } + + // The first word will be delivered after the current tick, the delivery + // of the address if any, and one bus cycle to deliver the data + pkt->firstWordTime = + tickNextIdle + + pkt->isRequest() ? clock : 0 + + clock; + + //Advance it numCycles bus cycles. + //XXX Should this use the repeated addition trick as well? + tickNextIdle += (numCycles * clock); + if (!busIdle.scheduled()) { + busIdle.schedule(tickNextIdle); + } else { + busIdle.reschedule(tickNextIdle); + } + DPRINTF(Bus, "The bus is now occupied from tick %d to %d\n", + curTick, tickNextIdle); + + // The bus will become idle once the current packet is delivered. + pkt->finishTime = tickNextIdle; +} /** Function called by the port when the bus is receiving a Timing * transaction.*/ @@ -77,23 +144,40 @@ Bus::recvTiming(Packet *pkt) DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); + BusPort *pktPort; + if (pkt->getSrc() == defaultId) + pktPort = defaultPort; + else pktPort = interfaces[pkt->getSrc()]; + + // If the bus is busy, or other devices are in line ahead of the current + // one, put this device on the retry list. + if (tickNextIdle > curTick || + (retryList.size() && (!inRetry || pktPort != retryList.front()))) { + addToRetryList(pktPort); + return false; + } + short dest = pkt->getDest(); if (dest == Packet::Broadcast) { - if (timingSnoop(pkt)) - { + if (timingSnoop(pkt)) { pkt->flags |= SNOOP_COMMIT; bool success = timingSnoop(pkt); assert(success); if (pkt->flags & SATISFIED) { //Cache-Cache transfer occuring + if (inRetry) { + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + } + occupyBus(pkt); return true; } port = findPort(pkt->getAddr(), pkt->getSrc()); - } - else - { + } else { //Snoop didn't succeed - retryList.push_back(interfaces[pkt->getSrc()]); + DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); + addToRetryList(pktPort); return false; } } else { @@ -101,35 +185,60 @@ Bus::recvTiming(Packet *pkt) assert(dest != pkt->getSrc()); // catch infinite loops port = interfaces[dest]; } + + occupyBus(pkt); + if (port->sendTiming(pkt)) { - // packet was successfully sent, just return true. + // Packet was successfully sent. Return true. + // Also take care of retries + if (inRetry) { + DPRINTF(Bus, "Remove retry from list %i\n", retryList.front()); + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + } return true; } - // packet not successfully sent - retryList.push_back(interfaces[pkt->getSrc()]); + // Packet not successfully sent. Leave or put it on the retry list. + DPRINTF(Bus, "Adding a retry to RETRY list %i\n", pktPort); + addToRetryList(pktPort); return false; } void Bus::recvRetry(int id) { - // Go through all the elements on the list calling sendRetry on each - // This is not very efficient at all but it works. Ultimately we should end - // up with something that is more intelligent. - int initialSize = retryList.size(); - int i; - Port *p; - - for (i = 0; i < initialSize; i++) { - assert(retryList.size() > 0); - p = retryList.front(); - retryList.pop_front(); - p->sendRetry(); + DPRINTF(Bus, "Received a retry\n"); + // If there's anything waiting, and the bus isn't busy... + if (retryList.size() && curTick >= tickNextIdle) { + //retryingPort = retryList.front(); + inRetry = true; + DPRINTF(Bus, "Sending a retry\n"); + retryList.front()->sendRetry(); + // If inRetry is still true, sendTiming wasn't called + if (inRetry) + { + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; + + //Bring tickNextIdle up to the present + while (tickNextIdle < curTick) + tickNextIdle += clock; + + //Burn a cycle for the missed grant. + tickNextIdle += clock; + + if (!busIdle.scheduled()) { + busIdle.schedule(tickNextIdle); + } else { + busIdle.reschedule(tickNextIdle); + } + } } } - Port * Bus::findPort(Addr addr, int id) { @@ -180,24 +289,30 @@ Bus::findSnoopPorts(Addr addr, int id) //Careful to not overlap ranges //or snoop will be called more than once on the port ports.push_back(portSnoopList[i].portId); - DPRINTF(Bus, " found snoop addr %#llx on device%d\n", addr, - portSnoopList[i].portId); +// DPRINTF(Bus, " found snoop addr %#llx on device%d\n", addr, +// portSnoopList[i].portId); } i++; } return ports; } -void +Tick Bus::atomicSnoop(Packet *pkt) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + Tick response_time = 0; while (!ports.empty()) { - interfaces[ports.back()]->sendAtomic(pkt); + Tick response = interfaces[ports.back()]->sendAtomic(pkt); + if (response) { + assert(!response_time); //Multiple responders + response_time = response; + } ports.pop_back(); } + return response_time; } void @@ -205,7 +320,7 @@ Bus::functionalSnoop(Packet *pkt) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); - while (!ports.empty()) + while (!ports.empty() && pkt->result != Packet::Success) { interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); @@ -236,8 +351,11 @@ Bus::recvAtomic(Packet *pkt) DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - atomicSnoop(pkt); - return findPort(pkt->getAddr(), pkt->getSrc())->sendAtomic(pkt); + Tick snoopTime = atomicSnoop(pkt); + if (snoopTime) + return snoopTime; //Snoop satisfies it + else + return findPort(pkt->getAddr(), pkt->getSrc())->sendAtomic(pkt); } /** Function called by the port when the bus is receiving a Functional @@ -249,7 +367,10 @@ Bus::recvFunctional(Packet *pkt) pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); functionalSnoop(pkt); - findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); + + // If the snooping found what we were looking for, we're done. + if (pkt->result != Packet::Success) + findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); } /** Function called by the port when the bus is receiving a status change.*/ @@ -277,7 +398,7 @@ Bus::recvStatusChange(Port::Status status, int id) } } else { - assert((id < interfaces.size() && id >= 0) || id == -1); + assert((id < interfaces.size() && id >= 0) || id == defaultId); Port *port = interfaces[id]; std::vector<DevMap>::iterator portIter; std::vector<DevMap>::iterator snoopIter; @@ -377,16 +498,20 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bus) Param<int> bus_id; + Param<int> clock; + Param<int> width; END_DECLARE_SIM_OBJECT_PARAMS(Bus) BEGIN_INIT_SIM_OBJECT_PARAMS(Bus) - INIT_PARAM(bus_id, "a globally unique bus id") + INIT_PARAM(bus_id, "a globally unique bus id"), + INIT_PARAM(clock, "bus clock speed"), + INIT_PARAM(width, "width of the bus (bits)") END_INIT_SIM_OBJECT_PARAMS(Bus) CREATE_SIM_OBJECT(Bus) { - return new Bus(getInstanceName(), bus_id); + return new Bus(getInstanceName(), bus_id, clock, width); } REGISTER_SIM_OBJECT("Bus", Bus) diff --git a/src/mem/bus.hh b/src/mem/bus.hh index ff4ec9c8c..509b8cf9b 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -46,13 +46,20 @@ #include "mem/packet.hh" #include "mem/port.hh" #include "mem/request.hh" +#include "sim/eventq.hh" class Bus : public MemObject { /** a globally unique id for this bus. */ int busId; + /** the clock speed for the bus */ + int clock; + /** the width of the bus in bytes */ + int width; + /** the next tick at which the bus will be idle */ + Tick tickNextIdle; - static const int defaultId = -1; + static const int defaultId = -3; //Make it unique from Broadcast struct DevMap { int portId; @@ -100,7 +107,7 @@ class Bus : public MemObject std::vector<int> findSnoopPorts(Addr addr, int id); /** Snoop all relevant ports atomicly. */ - void atomicSnoop(Packet *pkt); + Tick atomicSnoop(Packet *pkt); /** Snoop all relevant ports functionally. */ void functionalSnoop(Packet *pkt); @@ -118,11 +125,15 @@ class Bus : public MemObject */ void addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id); + /** Occupy the bus with transmitting the packet pkt */ + void occupyBus(PacketPtr pkt); /** Declaration of the buses port type, one will be instantiated for each of the interfaces connecting to the bus. */ class BusPort : public Port { + bool _onRetryList; + /** A pointer to the bus to which this port belongs. */ Bus *bus; @@ -133,9 +144,15 @@ class Bus : public MemObject /** Constructor for the BusPort.*/ BusPort(const std::string &_name, Bus *_bus, int _id) - : Port(_name), bus(_bus), id(_id) + : Port(_name), _onRetryList(false), bus(_bus), id(_id) { } + bool onRetryList() + { return _onRetryList; } + + void onRetryList(bool newVal) + { _onRetryList = newVal; } + protected: /** When reciving a timing request from the peer port (at id), @@ -176,16 +193,52 @@ class Bus : public MemObject }; + class BusFreeEvent : public Event + { + Bus * bus; + + public: + BusFreeEvent(Bus * _bus); + void process(); + const char *description(); + }; + + BusFreeEvent busIdle; + + bool inRetry; + /** An array of pointers to the peer port interfaces connected to this bus.*/ - std::vector<Port*> interfaces; + std::vector<BusPort*> interfaces; /** An array of pointers to ports that retry should be called on because the * original send failed for whatever reason.*/ - std::list<Port*> retryList; + std::list<BusPort*> retryList; + + void addToRetryList(BusPort * port) + { + if (!inRetry) { + // The device wasn't retrying a packet, or wasn't at an appropriate + // time. + assert(!port->onRetryList()); + port->onRetryList(true); + retryList.push_back(port); + } else { + if (port->onRetryList()) { + // The device was retrying a packet. It didn't work, so we'll leave + // it at the head of the retry list. + assert(port == retryList.front()); + inRetry = false; + } + else { + port->onRetryList(true); + retryList.push_back(port); + } + } + } /** Port that handles requests that don't match any of the interfaces.*/ - Port *defaultPort; + BusPort *defaultPort; public: @@ -194,8 +247,16 @@ class Bus : public MemObject virtual void init(); - Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id), defaultPort(NULL) {} + Bus(const std::string &n, int bus_id, int _clock, int _width) + : MemObject(n), busId(bus_id), clock(_clock), width(_width), + tickNextIdle(0), busIdle(this), inRetry(false), defaultPort(NULL) + { + //Both the width and clock period must be positive + if (width <= 0) + fatal("Bus width must be positive\n"); + if (clock <= 0) + fatal("Bus clock period must be positive\n"); + } }; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 1a0f63d17..3f7a52fab 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -44,6 +44,8 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, : Port(_name), cache(_cache), isCpuSide(_isCpuSide) { blocked = false; + cshrRetry = NULL; + waitingOnRetry = false; //Start ports at null if more than one is created we should panic //cpuSidePort = NULL; //memSidePort = NULL; @@ -71,6 +73,22 @@ BaseCache::CachePort::deviceBlockSize() bool BaseCache::CachePort::recvTiming(Packet *pkt) { + if (isCpuSide + && !pkt->req->isUncacheable() + && pkt->isInvalidate() + && !pkt->isRead() && !pkt->isWrite()) { + //Upgrade or Invalidate + //Look into what happens if two slave caches on bus + DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)cache->blkSize - 1)); + + assert(!(pkt->flags & SATISFIED)); + pkt->flags |= SATISFIED; + //Invalidates/Upgrades need no response if they get the bus + return true; + } + if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); @@ -89,6 +107,42 @@ BaseCache::CachePort::recvAtomic(Packet *pkt) void BaseCache::CachePort::recvFunctional(Packet *pkt) { + //Check storage here first + list<Packet *>::iterator i = drainList.begin(); + list<Packet *>::iterator end = drainList.end(); + for (; i != end; ++i) { + Packet * target = *i; + // If the target contains data, and it overlaps the + // probed request, need to update data + if (target->intersect(pkt)) { + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (target->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - target->getAddr(); + pkt_data = pkt->getPtr<uint8_t>(); + write_data = target->getPtr<uint8_t>() + offset; + data_size = target->getSize() - offset; + assert(data_size > 0); + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); + } else { + int offset = target->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr<uint8_t>() + offset; + write_data = target->getPtr<uint8_t>(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > target->getSize()) + data_size = target->getSize(); + } + + if (pkt->isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + } + } cache->doFunctionalAccess(pkt, isCpuSide); } @@ -96,47 +150,69 @@ void BaseCache::CachePort::recvRetry() { Packet *pkt; + assert(waitingOnRetry); if (!drainList.empty()) { + DPRINTF(CachePort, "%s attempting to send a retry for response\n", name()); //We have some responses to drain first - bool result = true; - while (result && !drainList.empty()) { - result = sendTiming(drainList.front()); - if (result) - drainList.pop_front(); + if (sendTiming(drainList.front())) { + DPRINTF(CachePort, "%s sucessful in sending a retry for response\n", name()); + drainList.pop_front(); + if (!drainList.empty() || + !isCpuSide && cache->doMasterRequest() || + isCpuSide && cache->doSlaveRequest()) { + + DPRINTF(CachePort, "%s has more responses/requests\n", name()); + BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); + reqCpu->schedule(curTick + 1); + } + waitingOnRetry = false; } } - - if (!isCpuSide) + else if (!isCpuSide) { + DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name()); + if (!cache->doMasterRequest()) { + //This can happen if I am the owner of a block and see an upgrade + //while the block was in my WB Buffers. I just remove the + //wb and de-assert the masterRequest + waitingOnRetry = false; + return; + } pkt = cache->getPacket(); MSHR* mshr = (MSHR*)pkt->senderState; bool success = sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); cache->sendResult(pkt, mshr, success); + waitingOnRetry = !success; if (success && cache->doMasterRequest()) { + DPRINTF(CachePort, "%s has more requests\n", name()); //Still more to issue, rerequest in 1 cycle - pkt = NULL; BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); reqCpu->schedule(curTick + 1); } } else { + assert(cshrRetry); //pkt = cache->getCoherencePacket(); //We save the packet, no reordering on CSHRS pkt = cshrRetry; bool success = sendTiming(pkt); - if (success && cache->doSlaveRequest()) + waitingOnRetry = !success; + if (success) { - //Still more to issue, rerequest in 1 cycle - pkt = NULL; - BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); - reqCpu->schedule(curTick + 1); + if (cache->doSlaveRequest()) { + //Still more to issue, rerequest in 1 cycle + BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this); + reqCpu->schedule(curTick + 1); + } + cshrRetry = NULL; } - } + if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name()); + else DPRINTF(CachePort, "%s no longer waiting on retry\n", name()); return; } void @@ -181,17 +257,47 @@ BaseCache::CacheEvent::process() { if (!pkt) { - if (!cachePort->isCpuSide) - { - //MSHR + if (cachePort->waitingOnRetry) return; + //We have some responses to drain first + if (!cachePort->drainList.empty()) { + DPRINTF(CachePort, "%s trying to drain a response\n", cachePort->name()); + if (cachePort->sendTiming(cachePort->drainList.front())) { + DPRINTF(CachePort, "%s drains a response succesfully\n", cachePort->name()); + cachePort->drainList.pop_front(); + if (!cachePort->drainList.empty() || + !cachePort->isCpuSide && cachePort->cache->doMasterRequest() || + cachePort->isCpuSide && cachePort->cache->doSlaveRequest()) { + + DPRINTF(CachePort, "%s still has outstanding bus reqs\n", cachePort->name()); + this->schedule(curTick + 1); + } + } + else { + cachePort->waitingOnRetry = true; + DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); + } + } + else if (!cachePort->isCpuSide) + { //MSHR + DPRINTF(CachePort, "%s trying to send a MSHR request\n", cachePort->name()); + if (!cachePort->cache->doMasterRequest()) { + //This can happen if I am the owner of a block and see an upgrade + //while the block was in my WB Buffers. I just remove the + //wb and de-assert the masterRequest + return; + } + pkt = cachePort->cache->getPacket(); MSHR* mshr = (MSHR*) pkt->senderState; bool success = cachePort->sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); cachePort->cache->sendResult(pkt, mshr, success); + cachePort->waitingOnRetry = !success; + if (cachePort->waitingOnRetry) DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); if (success && cachePort->cache->doMasterRequest()) { + DPRINTF(CachePort, "%s still more MSHR requests to send\n", cachePort->name()); //Still more to issue, rerequest in 1 cycle pkt = NULL; this->schedule(curTick+1); @@ -200,32 +306,49 @@ BaseCache::CacheEvent::process() else { //CSHR - pkt = cachePort->cache->getCoherencePacket(); + if (!cachePort->cshrRetry) { + assert(cachePort->cache->doSlaveRequest()); + pkt = cachePort->cache->getCoherencePacket(); + } + else { + pkt = cachePort->cshrRetry; + } bool success = cachePort->sendTiming(pkt); if (!success) { //Need to send on a retry cachePort->cshrRetry = pkt; + cachePort->waitingOnRetry = true; } - else if (cachePort->cache->doSlaveRequest()) + else { - //Still more to issue, rerequest in 1 cycle - pkt = NULL; - this->schedule(curTick+1); + cachePort->cshrRetry = NULL; + if (cachePort->cache->doSlaveRequest()) { + //Still more to issue, rerequest in 1 cycle + pkt = NULL; + this->schedule(curTick+1); + } } } return; } //Response //Know the packet to send - pkt->result = Packet::Success; + if (pkt->flags & NACKED_LINE) + pkt->result = Packet::Nacked; + else + pkt->result = Packet::Success; pkt->makeTimingResponse(); - if (!cachePort->drainList.empty()) { - //Already blocked waiting for bus, just append + DPRINTF(CachePort, "%s attempting to send a response\n", cachePort->name()); + if (!cachePort->drainList.empty() || cachePort->waitingOnRetry) { + //Already have a list, just append cachePort->drainList.push_back(pkt); + DPRINTF(CachePort, "%s appending response onto drain list\n", cachePort->name()); } else if (!cachePort->sendTiming(pkt)) { //It failed, save it to list of drain events + DPRINTF(CachePort, "%s now waiting for a retry\n", cachePort->name()); cachePort->drainList.push_back(pkt); + cachePort->waitingOnRetry = true; } } diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index c45f3b71b..455e13d9c 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -112,6 +112,8 @@ class BaseCache : public MemObject bool isCpuSide; + bool waitingOnRetry; + std::list<Packet *> drainList; Packet *cshrRetry; @@ -210,10 +212,6 @@ class BaseCache : public MemObject protected: - /** True if this cache is connected to the CPU. */ - bool topLevelCache; - - /** Stores time the cache blocked for statistics. */ Tick blockedCycle; @@ -335,7 +333,7 @@ class BaseCache : public MemObject */ BaseCache(const std::string &name, Params ¶ms) : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0), - slaveRequests(0), topLevelCache(false), blkSize(params.blkSize), + slaveRequests(0), blkSize(params.blkSize), missCount(params.maxMisses) { //Start ports at null if more than one is created we should panic @@ -356,15 +354,6 @@ class BaseCache : public MemObject } /** - * Returns true if this cache is connect to the CPU. - * @return True if this is a L1 cache. - */ - bool isTopLevel() - { - return topLevelCache; - } - - /** * Returns true if the cache is blocked for accesses. */ bool isBlocked() @@ -392,11 +381,13 @@ class BaseCache : public MemObject blocked_causes[cause]++; blockedCycle = curTick; } + int old_state = blocked; if (!(blocked & flag)) { //Wasn't already blocked for this cause blocked |= flag; DPRINTF(Cache,"Blocking for cause %s\n", cause); - cpuSidePort->setBlocked(); + if (!old_state) + cpuSidePort->setBlocked(); } } @@ -408,10 +399,12 @@ class BaseCache : public MemObject void setBlockedForSnoop(BlockedCause cause) { uint8_t flag = 1 << cause; - if (!(blocked & flag)) { + uint8_t old_state = blockedSnoop; + if (!(blockedSnoop & flag)) { //Wasn't already blocked for this cause blockedSnoop |= flag; - memSidePort->setBlocked(); + if (!old_state) + memSidePort->setBlocked(); } } @@ -461,7 +454,7 @@ class BaseCache : public MemObject */ void setMasterRequest(RequestCause cause, Tick time) { - if (!doMasterRequest()) + if (!doMasterRequest() && !memSidePort->waitingOnRetry) { BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort); reqCpu->schedule(time); @@ -523,6 +516,10 @@ class BaseCache : public MemObject CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); reqCpu->schedule(time); } + else { + if (pkt->cmd == Packet::Writeback) delete pkt->req; + delete pkt; + } } /** @@ -539,6 +536,10 @@ class BaseCache : public MemObject CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); reqCpu->schedule(time); } + else { + if (pkt->cmd == Packet::Writeback) delete pkt->req; + delete pkt; + } } /** @@ -547,8 +548,6 @@ class BaseCache : public MemObject */ void respondToSnoop(Packet *pkt, Tick time) { -// assert("Implement\n" && 0); -// mi->respond(pkt,curTick + hitLatency); assert (pkt->needsResponse()); CacheEvent *reqMem = new CacheEvent(memSidePort, pkt); reqMem->schedule(time); @@ -571,15 +570,7 @@ class BaseCache : public MemObject { //This is where snoops get updated AddrRangeList dummy; -// if (!topLevelCache) -// { - cpuSidePort->getPeerAddressRanges(dummy, snoop); -// } -// else -// { -// snoop.push_back(RangeSize(0,-1)); -// } - + cpuSidePort->getPeerAddressRanges(dummy, snoop); return; } } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 923bf8255..41b270030 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -103,6 +103,7 @@ class Cache : public BaseCache * Used to append to target list, to cause an invalidation. */ Packet * invalidatePkt; + Request *invalidateReq; /** * Temporarily move a block into a MSHR. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index bde7ac04b..9db79b843 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -63,9 +63,8 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } - if (!(pkt->flags & SATISFIED)) { - access(pkt); - } + access(pkt); + } else { @@ -101,7 +100,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (pkt->isResponse()) handleResponse(pkt); else - snoopProbe(pkt); + return snoopProbe(pkt); } //Fix this timing info return hitLatency; @@ -149,14 +148,10 @@ Cache(const std::string &_name, prefetchAccess(params.prefetchAccess), tags(params.tags), missQueue(params.missQueue), coherence(params.coherence), prefetcher(params.prefetcher), - doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) + doCopy(params.doCopy), blockOnCopy(params.blockOnCopy), + hitLatency(params.hitLatency) { -//FIX BUS POINTERS -// if (params.in == NULL) { - topLevelCache = true; -// } -//PLEASE FIX THIS, BUS SIZES NOT BEING USED - tags->setCache(this, blkSize, 1/*params.out->width, params.out->clockRate*/); + tags->setCache(this); tags->setPrefetcher(prefetcher); missQueue->setCache(this); missQueue->setPrefetcher(prefetcher); @@ -164,10 +159,8 @@ Cache(const std::string &_name, prefetcher->setCache(this); prefetcher->setTags(tags); prefetcher->setBuffer(missQueue); -#if 0 - invalidatePkt = new Packet; - invalidatePkt->cmd = Packet::InvalidateReq; -#endif + invalidateReq = new Request((Addr) NULL, blkSize, 0); + invalidatePkt = new Packet(invalidateReq, Packet::InvalidateReq, 0); } template<class TagStore, class Buffering, class Coherence> @@ -196,20 +189,6 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) prefetcher->handleMiss(pkt, curTick); } if (!pkt->req->isUncacheable()) { - if (pkt->isInvalidate() && !pkt->isRead() - && !pkt->isWrite()) { - //Upgrade or Invalidate - //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), - pkt->getAddr() & ~((Addr)blkSize - 1)); - - //@todo Should this return latency have the hit latency in it? -// respond(pkt,curTick+lat); - pkt->flags |= SATISFIED; -// return MA_HIT; //@todo, return values - return true; - } blk = tags->handleAccess(pkt, lat, writebacks); } else { size = pkt->getSize(); @@ -245,7 +224,10 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) // clear dirty bit if write through if (pkt->needsResponse()) respond(pkt, curTick+lat); -// return MA_HIT; + if (pkt->cmd == Packet::Writeback) { + //Signal that you can kill the pkt/req + pkt->flags |= SATISFIED; + } return true; } @@ -269,6 +251,7 @@ template<class TagStore, class Buffering, class Coherence> Packet * Cache<TagStore,Buffering,Coherence>::getPacket() { + assert(missQueue->havePending()); Packet * pkt = missQueue->getPacket(); if (pkt) { if (!pkt->req->isUncacheable()) { @@ -289,13 +272,28 @@ template<class TagStore, class Buffering, class Coherence> void Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success) { - if (success) { - missQueue->markInService(pkt, mshr); - //Temp Hack for UPGRADES - if (pkt->cmd == Packet::UpgradeReq) { - handleResponse(pkt); - } + if (success && !(pkt->flags & NACKED_LINE)) { + missQueue->markInService(pkt, mshr); + //Temp Hack for UPGRADES + if (pkt->cmd == Packet::UpgradeReq) { + pkt->flags &= ~CACHE_LINE_FILL; + BlkType *blk = tags->findBlock(pkt); + CacheBlk::State old_state = (blk) ? blk->status : 0; + CacheBlk::State new_state = coherence->getNewState(pkt,old_state); + if (old_state != new_state) + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); + //Set the state on the upgrade + memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize); + PacketList writebacks; + tags->handleFill(blk, mshr, new_state, writebacks, pkt); + assert(writebacks.empty()); + missQueue->handleResponse(pkt, curTick + hitLatency); + } } else if (pkt && !pkt->req->isUncacheable()) { + pkt->flags &= ~NACKED_LINE; + pkt->flags &= ~SATISFIED; + pkt->flags &= ~SNOOP_COMMIT; missQueue->restoreOrigCmd(pkt); } } @@ -306,6 +304,14 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) { BlkType *blk = NULL; if (pkt->senderState) { + if (pkt->result == Packet::Nacked) { + //pkt->reinitFromRequest(); + warn("NACKs from devices not connected to the same bus not implemented\n"); + return; + } + if (pkt->result == Packet::BadAddress) { + //Make the response a Bad address and send it + } // MemDebug::cacheResponse(pkt); DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), pkt->getAddr() & (((ULL(1))<<48)-1)); @@ -315,8 +321,9 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) CacheBlk::State old_state = (blk) ? blk->status : 0; PacketList writebacks; CacheBlk::State new_state = coherence->getNewState(pkt,old_state); - DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", - pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); + if (old_state != new_state) + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); blk = tags->handleFill(blk, (MSHR*)pkt->senderState, new_state, writebacks, pkt); while (!writebacks.empty()) { @@ -377,10 +384,15 @@ template<class TagStore, class Buffering, class Coherence> void Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) { + if (pkt->req->isUncacheable()) { + //Can't get a hit on an uncacheable address + //Revisit this for multi level coherence + return; + } Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr); - if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req + if (coherence->hasProtocol()) { //@todo Move this into handle bus req //If we find an mshr, and it is in service, we need to NACK or invalidate if (mshr) { if (mshr->inService) { @@ -392,8 +404,9 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; pkt->flags |= NACKED_LINE; - assert("Don't detect these on the other side yet\n"); - respondToSnoop(pkt, curTick + hitLatency); + ///@todo NACK's from other levels + //warn("NACKs from devices not connected to the same bus not implemented\n"); + //respondToSnoop(pkt, curTick + hitLatency); return; } else { @@ -406,7 +419,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - assert("Don't have invalidates yet\n"); + //panic("Don't have invalidates yet\n"); invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on @@ -437,7 +450,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) pkt->flags |= SHARED_LINE; assert(pkt->isRead()); - Addr offset = pkt->getAddr() & ~(blkSize - 1); + Addr offset = pkt->getAddr() & (blkSize - 1); assert(offset < blkSize); assert(pkt->getSize() <= blkSize); assert(offset + pkt->getSize() <=blkSize); @@ -458,16 +471,16 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) CacheBlk::State new_state; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { - DPRINTF(Cache, "Cache snooped a %s request and now supplying data," + DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data," "new state is %i\n", - pkt->cmdString(), new_state); + pkt->cmdString(), blk_addr, new_state); tags->handleSnoop(blk, new_state, pkt); respondToSnoop(pkt, curTick + hitLatency); return; } - if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n", - pkt->cmdString(), new_state); + if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n", + pkt->cmdString(), blk_addr, new_state); tags->handleSnoop(blk, new_state); } @@ -521,6 +534,10 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); + DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), + pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->getAddr() & ~((Addr)blkSize - 1)); + if (!blk) { // Need to check for outstanding misses and writes Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); @@ -627,6 +644,11 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort busPkt->time = curTick; + DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n", + busPkt->cmdString(), + busPkt->getAddr() & (((ULL(1))<<48)-1), + busPkt->getAddr() & ~((Addr)blkSize - 1)); + lat = memSidePort->sendAtomic(busPkt); //Be sure to flip the response to a request for coherence @@ -642,13 +664,26 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort */ misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; CacheBlk::State old_state = (blk) ? blk->status : 0; + CacheBlk::State new_state = coherence->getNewState(busPkt, old_state); + DPRINTF(Cache, "Receive response:%s for blk addr %x in state %i\n", + busPkt->cmdString(), + busPkt->getAddr() & (((ULL(1))<<48)-1), old_state); + if (old_state != new_state) + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + busPkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); + tags->handleFill(blk, busPkt, - coherence->getNewState(busPkt, old_state), + new_state, writebacks, pkt); + //Free the packet + delete busPkt; + // Handle writebacks if needed while (!writebacks.empty()){ - memSidePort->sendAtomic(writebacks.front()); + Packet *wbPkt = writebacks.front(); + memSidePort->sendAtomic(wbPkt); writebacks.pop_front(); + delete wbPkt; } return lat + hitLatency; } else { @@ -669,7 +704,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort // Still need to change data in all locations. otherSidePort->sendFunctional(pkt); } - return curTick + lat; + return hitLatency; } fatal("Probe not handled.\n"); return 0; @@ -685,15 +720,15 @@ Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt) CacheBlk::State new_state = 0; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { - DPRINTF(Cache, "Cache snooped a %s request and now supplying data," + DPRINTF(Cache, "Cache snooped a %s request for addr %x and now supplying data," "new state is %i\n", - pkt->cmdString(), new_state); + pkt->cmdString(), blk_addr, new_state); tags->handleSnoop(blk, new_state, pkt); return hitLatency; } - if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n", - pkt->cmdString(), new_state); + if (blk) DPRINTF(Cache, "Cache snooped a %s request for addr %x, new state is %i\n", + pkt->cmdString(), blk_addr, new_state); tags->handleSnoop(blk, new_state); return 0; } diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index bcf3ce9c5..e28dda3dc 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -271,7 +271,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name, } Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; - Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp; //@todo add in hardware prefetch to this list if (protocol == "msi") { diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc index 5ab706269..0efe393f9 100644 --- a/src/mem/cache/coherence/uni_coherence.cc +++ b/src/mem/cache/coherence/uni_coherence.cc @@ -68,14 +68,12 @@ UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, if (pkt->isInvalidate()) { DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", pkt->getAddr(), blk); - if (!cache->isTopLevel()) { - // Forward to other caches - Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1); - cshrs.allocate(tmp); - cache->setSlaveRequest(Request_Coherence, curTick); - if (cshrs.isFull()) { - cache->setBlockedForSnoop(Blocked_Coherence); - } + // Forward to other caches + Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1); + cshrs.allocate(tmp); + cache->setSlaveRequest(Request_Coherence, curTick); + if (cshrs.isFull()) { + cache->setBlockedForSnoop(Blocked_Coherence); } } else { if (blk) { diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index bdb7a39c8..c23b542f5 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -352,7 +352,7 @@ MissQueue::setPrefetcher(BasePrefetcher *_prefetcher) MSHR* MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) { - MSHR* mshr = mq.allocate(pkt, blkSize); + MSHR* mshr = mq.allocate(pkt, size); mshr->order = order++; if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { // Mark this as a cache line fill @@ -515,6 +515,14 @@ MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) assert(pkt->senderState != 0); MSHR * mshr = (MSHR*)pkt->senderState; mshr->originalCmd = pkt->cmd; + if (cmd == Packet::UpgradeReq || cmd == Packet::InvalidateReq) { + pkt->flags |= NO_ALLOCATE; + pkt->flags &= ~CACHE_LINE_FILL; + } + else if (!pkt->req->isUncacheable() && !pkt->isNoAllocate() && + (cmd & (1 << 6)/*NeedsResponse*/)) { + pkt->flags |= CACHE_LINE_FILL; + } if (pkt->isCacheFill() || pkt->isNoAllocate()) pkt->cmd = cmd; } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index f36032672..455798f15 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -100,6 +100,7 @@ MSHR::deallocate() { assert(targets.empty()); assert(ntargets == 0); + delete pkt; pkt = NULL; inService = false; //allocIter = NULL; diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index bd9667529..1876a8987 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -213,8 +213,13 @@ void MSHRQueue::markInService(MSHR* mshr) { //assert(mshr == pendingList.front()); - if (!mshr->pkt->needsResponse()) { + if (!(mshr->pkt->needsResponse() || mshr->pkt->cmd == Packet::UpgradeReq)) { assert(mshr->getNumTargets() == 0); + if ((mshr->pkt->flags & SATISFIED) && (mshr->pkt->cmd == Packet::Writeback)) { + //Writeback hit, so delete it + //otherwise the consumer will delete it + delete mshr->pkt->req; + } deallocate(mshr); return; } diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 91298df8c..64c65dcca 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -34,14 +34,26 @@ * Definition of the Packet Class, a packet is a transaction occuring * between a single level of the memory heirarchy (ie L1->L2). */ + +#include <iostream> #include "base/misc.hh" #include "mem/packet.hh" +#include "base/trace.hh" static const std::string ReadReqString("ReadReq"); static const std::string WriteReqString("WriteReq"); -static const std::string WriteReqNoAckString("WriteReqNoAck"); +static const std::string WriteReqNoAckString("WriteReqNoAck|Writeback"); static const std::string ReadRespString("ReadResp"); static const std::string WriteRespString("WriteResp"); +static const std::string SoftPFReqString("SoftPFReq"); +static const std::string SoftPFRespString("SoftPFResp"); +static const std::string HardPFReqString("HardPFReq"); +static const std::string HardPFRespString("HardPFResp"); +static const std::string InvalidateReqString("InvalidateReq"); +static const std::string WriteInvalidateReqString("WriteInvalidateReq"); +static const std::string UpgradeReqString("UpgradeReq"); +static const std::string ReadExReqString("ReadExReq"); +static const std::string ReadExRespString("ReadExResp"); static const std::string OtherCmdString("<other>"); const std::string & @@ -53,6 +65,15 @@ Packet::cmdString() const case WriteReqNoAck: return WriteReqNoAckString; case ReadResp: return ReadRespString; case WriteResp: return WriteRespString; + case SoftPFReq: return SoftPFReqString; + case SoftPFResp: return SoftPFRespString; + case HardPFReq: return HardPFReqString; + case HardPFResp: return HardPFRespString; + case InvalidateReq: return InvalidateReqString; + case WriteInvalidateReq:return WriteInvalidateReqString; + case UpgradeReq: return UpgradeReqString; + case ReadExReq: return ReadExReqString; + case ReadExResp: return ReadExRespString; default: return OtherCmdString; } } @@ -66,6 +87,15 @@ Packet::cmdIdxToString(Packet::Command idx) case WriteReqNoAck: return WriteReqNoAckString; case ReadResp: return ReadRespString; case WriteResp: return WriteRespString; + case SoftPFReq: return SoftPFReqString; + case SoftPFResp: return SoftPFRespString; + case HardPFReq: return HardPFReqString; + case HardPFResp: return HardPFRespString; + case InvalidateReq: return InvalidateReqString; + case WriteInvalidateReq:return WriteInvalidateReqString; + case UpgradeReq: return UpgradeReqString; + case ReadExReq: return ReadExReqString; + case ReadExResp: return ReadExRespString; default: return OtherCmdString; } } @@ -102,19 +132,103 @@ bool Packet::intersect(Packet *p) { Addr s1 = getAddr(); - Addr e1 = getAddr() + getSize(); + Addr e1 = getAddr() + getSize() - 1; Addr s2 = p->getAddr(); - Addr e2 = p->getAddr() + p->getSize(); + Addr e2 = p->getAddr() + p->getSize() - 1; - if (s1 >= s2 && s1 < e2) - return true; - if (e1 >= s2 && e1 < e2) - return true; - return false; + return !(s1 > e2 || e1 < s2); } bool fixPacket(Packet *func, Packet *timing) { - panic("Need to implement!"); + Addr funcStart = func->getAddr(); + Addr funcEnd = func->getAddr() + func->getSize() - 1; + Addr timingStart = timing->getAddr(); + Addr timingEnd = timing->getAddr() + timing->getSize() - 1; + + assert(!(funcStart > timingEnd || timingStart < funcEnd)); + + if (DTRACE(FunctionalAccess)) { + DebugOut() << func; + DebugOut() << timing; + } + + // this packet can't solve our problem, continue on + if (!timing->hasData()) + return true; + + if (func->isRead()) { + if (funcStart >= timingStart && funcEnd <= timingEnd) { + func->allocate(); + memcpy(func->getPtr<uint8_t>(), timing->getPtr<uint8_t>() + + funcStart - timingStart, func->getSize()); + func->result = Packet::Success; + return false; + } else { + // In this case the timing packet only partially satisfies the + // requset, so we would need more information to make this work. + // Like bytes valid in the packet or something, so the request could + // continue and get this bit of possibly newer data along with the + // older data not written to yet. + panic("Timing packet only partially satisfies the functional" + "request. Now what?"); + } + } else if (func->isWrite()) { + if (funcStart >= timingStart) { + memcpy(timing->getPtr<uint8_t>() + (funcStart - timingStart), + func->getPtr<uint8_t>(), + funcStart - std::min(funcEnd, timingEnd)); + } else { // timingStart > funcStart + memcpy(timing->getPtr<uint8_t>(), + func->getPtr<uint8_t>() + (timingStart - funcStart), + timingStart - std::min(funcEnd, timingEnd)); + } + // we always want to keep going with a write + return true; + } else + panic("Don't know how to handle command type %#x\n", + func->cmdToIndex()); + +} + + +std::ostream & +operator<<(std::ostream &o, const Packet &p) +{ + + o << "[0x"; + o.setf(std::ios_base::hex, std::ios_base::showbase); + o << p.getAddr(); + o.unsetf(std::ios_base::hex| std::ios_base::showbase); + o << ":"; + o.setf(std::ios_base::hex, std::ios_base::showbase); + o << p.getAddr() + p.getSize() - 1 << "] "; + o.unsetf(std::ios_base::hex| std::ios_base::showbase); + + if (p.result == Packet::Success) + o << "Successful "; + if (p.result == Packet::BadAddress) + o << "BadAddress "; + if (p.result == Packet::Nacked) + o << "Nacked "; + if (p.result == Packet::Unknown) + o << "Inflight "; + + if (p.isRead()) + o << "Read "; + if (p.isWrite()) + o << "Read "; + if (p.isInvalidate()) + o << "Read "; + if (p.isRequest()) + o << "Request "; + if (p.isResponse()) + o << "Response "; + if (p.hasData()) + o << "w/Data "; + + o << std::endl; + return o; } + diff --git a/src/mem/packet.hh b/src/mem/packet.hh index be9bf5f57..48b32ec47 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -58,10 +58,8 @@ typedef std::list<PacketPtr> PacketList; #define NO_ALLOCATE 1 << 5 #define SNOOP_COMMIT 1 << 6 -//For statistics we need max number of commands, hard code it at -//20 for now. @todo fix later -#define NUM_MEM_CMDS 1 << 9 - +//for now. @todo fix later +#define NUM_MEM_CMDS 1 << 11 /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -94,7 +92,6 @@ class Packet * be called on it rather than simply delete.*/ bool arrayData; - /** The address of the request. This address could be virtual or * physical, depending on the system configuration. */ Addr addr; @@ -126,6 +123,12 @@ class Packet /** Used to calculate latencies for each packet.*/ Tick time; + /** The time at which the packet will be fully transmitted */ + Tick finishTime; + + /** The time at which the first chunk of the packet will be transmitted */ + Tick firstWordTime; + /** The special destination address indicating that the packet * should be routed based on its address. */ static const short Broadcast = -1; @@ -164,17 +167,21 @@ class Packet private: /** List of command attributes. */ + // If you add a new CommandAttribute, make sure to increase NUM_MEM_CMDS + // as well. enum CommandAttribute { - IsRead = 1 << 0, - IsWrite = 1 << 1, - IsPrefetch = 1 << 2, - IsInvalidate = 1 << 3, - IsRequest = 1 << 4, - IsResponse = 1 << 5, - NeedsResponse = 1 << 6, + IsRead = 1 << 0, + IsWrite = 1 << 1, + IsPrefetch = 1 << 2, + IsInvalidate = 1 << 3, + IsRequest = 1 << 4, + IsResponse = 1 << 5, + NeedsResponse = 1 << 6, IsSWPrefetch = 1 << 7, - IsHWPrefetch = 1 << 8 + IsHWPrefetch = 1 << 8, + IsUpgrade = 1 << 9, + HasData = 1 << 10 }; public: @@ -182,22 +189,24 @@ class Packet enum Command { InvalidCmd = 0, - ReadReq = IsRead | IsRequest | NeedsResponse, - WriteReq = IsWrite | IsRequest | NeedsResponse, - WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse | NeedsResponse, - WriteResp = IsWrite | IsResponse | NeedsResponse, - Writeback = IsWrite | IsRequest, + ReadReq = IsRead | IsRequest | NeedsResponse, + WriteReq = IsWrite | IsRequest | NeedsResponse | HasData, + WriteReqNoAck = IsWrite | IsRequest | HasData, + ReadResp = IsRead | IsResponse | NeedsResponse | HasData, + WriteResp = IsWrite | IsResponse | NeedsResponse, + Writeback = IsWrite | IsRequest | HasData, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, - SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, - HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch + | NeedsResponse | HasData, + HardPFResp = IsRead | IsResponse | IsHWPrefetch + | NeedsResponse | HasData, InvalidateReq = IsInvalidate | IsRequest, - WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, - UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest | HasData, + UpgradeReq = IsInvalidate | IsRequest | IsUpgrade, ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, - ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse + ReadExResp = IsRead | IsInvalidate | IsResponse + | NeedsResponse | HasData }; /** Return the string name of the cmd field (for debugging and @@ -213,16 +222,17 @@ class Packet /** The command field of the packet. */ Command cmd; - bool isRead() { return (cmd & IsRead) != 0; } - bool isWrite() { return (cmd & IsWrite) != 0; } - bool isRequest() { return (cmd & IsRequest) != 0; } - bool isResponse() { return (cmd & IsResponse) != 0; } - bool needsResponse() { return (cmd & NeedsResponse) != 0; } - bool isInvalidate() { return (cmd & IsInvalidate) != 0; } + bool isRead() const { return (cmd & IsRead) != 0; } + bool isWrite() const { return (cmd & IsWrite) != 0; } + bool isRequest() const { return (cmd & IsRequest) != 0; } + bool isResponse() const { return (cmd & IsResponse) != 0; } + bool needsResponse() const { return (cmd & NeedsResponse) != 0; } + bool isInvalidate() const { return (cmd & IsInvalidate) != 0; } + bool hasData() const { return (cmd & HasData) != 0; } - bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } - bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } - bool isCompressed() { return (flags & COMPRESSED) != 0; } + bool isCacheFill() const { return (flags & CACHE_LINE_FILL) != 0; } + bool isNoAllocate() const { return (flags & NO_ALLOCATE) != 0; } + bool isCompressed() const { return (flags & COMPRESSED) != 0; } bool nic_pkt() { assert("Unimplemented\n" && 0); return false; } @@ -320,6 +330,10 @@ class Packet int icmd = (int)cmd; icmd &= ~(IsRequest); icmd |= IsResponse; + if (isRead()) + icmd |= HasData; + if (isWrite()) + icmd &= ~HasData; cmd = (Command)icmd; dest = src; srcValid = false; @@ -334,6 +348,10 @@ class Packet int icmd = (int)cmd; icmd &= ~(IsRequest); icmd |= IsResponse; + if (isRead()) + icmd |= HasData; + if (isWrite()) + icmd &= ~HasData; cmd = (Command)icmd; } @@ -383,5 +401,14 @@ class Packet bool intersect(Packet *p); }; + +/** This function given a functional packet and a timing packet either satisfies + * the timing packet, or updates the timing packet to reflect the updated state + * in the timing packet. It returns if the functional packet should continue to + * traverse the memory hierarchy or not. + */ bool fixPacket(Packet *func, Packet *timing); + +std::ostream & operator<<(std::ostream &o, const Packet &p); + #endif //__MEM_PACKET_HH diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 96d78bd99..f5a0ade15 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -195,18 +195,22 @@ PhysicalMemory::checkLockedAddrList(Request *req) void PhysicalMemory::doFunctionalAccess(Packet *pkt) { - assert(pkt->getAddr() + pkt->getSize() < params()->addrRange.size()); + assert(pkt->getAddr() + pkt->getSize() <= params()->addrRange.size()); if (pkt->isRead()) { if (pkt->req->isLocked()) { trackLoadLocked(pkt->req); } + DPRINTF(MemoryAccess, "Performing Read of size %i on address 0x%x\n", + pkt->getSize(), pkt->getAddr()); memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - params()->addrRange.start, pkt->getSize()); } else if (pkt->isWrite()) { if (writeOK(pkt->req)) { + DPRINTF(MemoryAccess, "Performing Write of size %i on address 0x%x\n", + pkt->getSize(), pkt->getAddr()); memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, pkt->getPtr<uint8_t>(), pkt->getSize()); } diff --git a/src/mem/tport.cc b/src/mem/tport.cc index cef7a2a5b..21907c0ca 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -33,8 +33,22 @@ void SimpleTimingPort::recvFunctional(Packet *pkt) { - // just do an atomic access and throw away the returned latency - recvAtomic(pkt); + //First check queued events + std::list<Packet *>::iterator i = transmitList.begin(); + std::list<Packet *>::iterator end = transmitList.end(); + bool cont = true; + + while (i != end && cont) { + Packet * target = *i; + // If the target contains data, and it overlaps the + // probed request, need to update data + if (target->intersect(pkt)) + fixPacket(pkt, target); + + } + //Then just do an atomic access and throw away the returned latency + if (cont) + recvAtomic(pkt); } bool @@ -58,13 +72,17 @@ SimpleTimingPort::recvTiming(Packet *pkt) void SimpleTimingPort::recvRetry() { - bool result = true; - while (result && transmitList.size()) { - result = sendTiming(transmitList.front()); - if (result) - transmitList.pop_front(); + assert(outTiming > 0); + assert(!transmitList.empty()); + if (sendTiming(transmitList.front())) { + transmitList.pop_front(); + outTiming--; + DPRINTF(Bus, "No Longer waiting on retry\n"); + if (!transmitList.empty()) + sendTimingLater(transmitList.front(), 1); } - if (transmitList.size() == 0 && drainEvent) { + + if (transmitList.empty() && drainEvent) { drainEvent->process(); drainEvent = NULL; } @@ -73,18 +91,28 @@ SimpleTimingPort::recvRetry() void SimpleTimingPort::SendEvent::process() { - port->outTiming--; - assert(port->outTiming >= 0); - if (port->sendTiming(packet)) { - // send successfule - if (port->transmitList.size() == 0 && port->drainEvent) { + assert(port->outTiming > 0); + if (!port->transmitList.empty() && port->transmitList.front() != packet) { + //We are not the head of the list + port->transmitList.push_back(packet); + } else if (port->sendTiming(packet)) { + // send successful + if (port->transmitList.size()) { + port->transmitList.pop_front(); + port->outTiming--; + if (!port->transmitList.empty()) + port->sendTimingLater(port->transmitList.front(), 1); + } + if (port->transmitList.empty() && port->drainEvent) { port->drainEvent->process(); port->drainEvent = NULL; } } else { // send unsuccessful (due to flow control). Will get retry - // callback later; save for then. - port->transmitList.push_back(packet); + // callback later; save for then if not already + DPRINTF(Bus, "Waiting on retry\n"); + if (!(port->transmitList.front() == packet)) + port->transmitList.push_back(packet); } } diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index f6828a0d5..6710111e5 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -6,3 +6,5 @@ class Bus(MemObject): port = VectorPort("vector port for connecting devices") default = Port("Default port for requests that aren't handeled by a device.") bus_id = Param.Int(0, "blah") + clock = Param.Clock("1GHz", "bus clock speed") + width = Param.Int(64, "bus width (bytes)") diff --git a/src/python/m5/objects/FUPool.py b/src/python/m5/objects/FUPool.py index 4b4be79a6..916183bd7 100644 --- a/src/python/m5/objects/FUPool.py +++ b/src/python/m5/objects/FUPool.py @@ -1,6 +1,12 @@ from m5.SimObject import SimObject from m5.params import * +from FuncUnit import * +from FuncUnitConfig import * class FUPool(SimObject): type = 'FUPool' FUList = VectorParam.FUDesc("list of FU's for this pool") + +class DefaultFUPool(FUPool): + FUList = [ IntALU(), IntMultDiv(), FP_ALU(), FP_MultDiv(), ReadPort(), + WritePort(), RdWrPort(), IprPort() ] diff --git a/src/python/m5/objects/FuncUnitConfig.py b/src/python/m5/objects/FuncUnitConfig.py new file mode 100644 index 000000000..43d7a4bb7 --- /dev/null +++ b/src/python/m5/objects/FuncUnitConfig.py @@ -0,0 +1,41 @@ +from m5.SimObject import SimObject +from m5.params import * +from FuncUnit import * + +class IntALU(FUDesc): + opList = [ OpDesc(opClass='IntAlu') ] + count = 6 + +class IntMultDiv(FUDesc): + opList = [ OpDesc(opClass='IntMult', opLat=3), + OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ] + count=2 + +class FP_ALU(FUDesc): + opList = [ OpDesc(opClass='FloatAdd', opLat=2), + OpDesc(opClass='FloatCmp', opLat=2), + OpDesc(opClass='FloatCvt', opLat=2) ] + count = 4 + +class FP_MultDiv(FUDesc): + opList = [ OpDesc(opClass='FloatMult', opLat=4), + OpDesc(opClass='FloatDiv', opLat=12, issueLat=12), + OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ] + count = 2 + +class ReadPort(FUDesc): + opList = [ OpDesc(opClass='MemRead') ] + count = 0 + +class WritePort(FUDesc): + opList = [ OpDesc(opClass='MemWrite') ] + count = 0 + +class RdWrPort(FUDesc): + opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite') ] + count = 4 + +class IprPort(FUDesc): + opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ] + count = 1 + diff --git a/src/python/m5/objects/MemTest.py b/src/python/m5/objects/MemTest.py index 18aff03f4..83399be80 100644 --- a/src/python/m5/objects/MemTest.py +++ b/src/python/m5/objects/MemTest.py @@ -6,6 +6,7 @@ from m5 import build_env class MemTest(SimObject): type = 'MemTest' max_loads = Param.Counter("number of loads to execute") + atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n") memory_size = Param.Int(65536, "memory size") percent_dest_unaligned = Param.Percent(50, "percent of copy dest address that are unaligned") diff --git a/src/python/m5/objects/O3CPU.py b/src/python/m5/objects/O3CPU.py index 59b40c6e8..20eef383f 100644 --- a/src/python/m5/objects/O3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -3,6 +3,7 @@ from m5.proxy import * from m5 import build_env from BaseCPU import BaseCPU from Checker import O3Checker +from FUPool import * class DerivO3CPU(BaseCPU): type = 'DerivO3CPU' @@ -14,11 +15,13 @@ class DerivO3CPU(BaseCPU): if build_env['USE_CHECKER']: if not build_env['FULL_SYSTEM']: checker = Param.BaseCPU(O3Checker(workload=Parent.workload, - exitOnError=True, + exitOnError=False, + updateOnError=True, warnOnlyOnLoadError=False), "checker") else: - checker = Param.BaseCPU(O3Checker(exitOnError=True, warnOnlyOnLoadError=False), "checker") + checker = Param.BaseCPU(O3Checker(exitOnError=False, updateOnError=True, + warnOnlyOnLoadError=False), "checker") checker.itb = Parent.itb checker.dtb = Parent.dtb @@ -57,7 +60,7 @@ class DerivO3CPU(BaseCPU): issueWidth = Param.Unsigned(8, "Issue width") wbWidth = Param.Unsigned(8, "Writeback width") wbDepth = Param.Unsigned(1, "Writeback depth") - fuPool = Param.FUPool("Functional Unit pool") + fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool") iewToCommitDelay = Param.Unsigned(1, "Issue/Execute/Writeback to commit " "delay") @@ -77,7 +80,7 @@ class DerivO3CPU(BaseCPU): localHistoryBits = Param.Unsigned(11, "Bits for the local history") globalPredictorSize = Param.Unsigned(8192, "Size of global predictor") globalCtrBits = Param.Unsigned(2, "Bits per counter") - globalHistoryBits = Param.Unsigned(4096, "Bits of history") + globalHistoryBits = Param.Unsigned(13, "Bits of history") choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor") choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") |