diff options
Diffstat (limited to 'src')
58 files changed, 976 insertions, 397 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript index 59cea6211..dda1dea53 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -50,6 +50,7 @@ isa_switch_hdrs = Split(''' arguments.hh faults.hh isa_traits.hh + locked_mem.hh process.hh regfile.hh stacktrace.hh diff --git a/src/arch/alpha/locked_mem.hh b/src/arch/alpha/locked_mem.hh new file mode 100644 index 000000000..368ea2895 --- /dev/null +++ b/src/arch/alpha/locked_mem.hh @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __ARCH_ALPHA_LOCKED_MEM_HH__ +#define __ARCH_ALPHA_LOCKED_MEM_HH__ + +/** + * @file + * + * ISA-specific helper functions for locked memory accesses. + */ + +#include "arch/isa_traits.hh" +#include "base/misc.hh" +#include "mem/request.hh" + + +namespace AlphaISA +{ +template <class XC> +inline void +handleLockedRead(XC *xc, Request *req) +{ + xc->setMiscReg(Lock_Addr_DepTag, req->getPaddr() & ~0xf); + xc->setMiscReg(Lock_Flag_DepTag, true); +} + + +template <class XC> +inline bool +handleLockedWrite(XC *xc, Request *req) +{ + if (req->isUncacheable()) { + // Funky Turbolaser mailbox access...don't update + // result register (see stq_c in decoder.isa) + req->setScResult(2); + } else { + // standard store conditional + bool lock_flag = xc->readMiscReg(Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(Lock_Addr_DepTag); + if (!lock_flag || (req->getPaddr() & ~0xf) != lock_addr) { + // Lock flag not set or addr mismatch in CPU; + // don't even bother sending to memory system + req->setScResult(0); + xc->setMiscReg(Lock_Flag_DepTag, false); + // the rest of this code is not architectural; + // it's just a debugging aid to help detect + // livelock by warning on long sequences of failed + // store conditionals + int stCondFailures = xc->readStCondFailures(); + stCondFailures++; + xc->setStCondFailures(stCondFailures); + if (stCondFailures % 100000 == 0) { + warn("cpu %d: %d consecutive " + "store conditional failures\n", + xc->readCpuId(), stCondFailures); + } + + // store conditional failed already, so don't issue it to mem + return false; + } + } + + return true; +} + + +} // namespace AlphaISA + +#endif diff --git a/src/arch/mips/locked_mem.hh b/src/arch/mips/locked_mem.hh new file mode 100644 index 000000000..363cf1e90 --- /dev/null +++ b/src/arch/mips/locked_mem.hh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __ARCH_MIPS_LOCKED_MEM_HH__ +#define __ARCH_MIPS_LOCKED_MEM_HH__ + +/** + * @file + * + * ISA-specific helper functions for locked memory accesses. + */ + +#include "mem/request.hh" + + +namespace MipsISA +{ +template <class XC> +inline void +handleLockedRead(XC *xc, Request *req) +{ +} + + +template <class XC> +inline bool +handleLockedWrite(XC *xc, Request *req) +{ + return true; +} + + +} // namespace MipsISA + +#endif diff --git a/src/arch/sparc/locked_mem.hh b/src/arch/sparc/locked_mem.hh new file mode 100644 index 000000000..291b2f422 --- /dev/null +++ b/src/arch/sparc/locked_mem.hh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + */ + +#ifndef __ARCH_SPARC_LOCKED_MEM_HH__ +#define __ARCH_SPARC_LOCKED_MEM_HH__ + +/** + * @file + * + * ISA-specific helper functions for locked memory accesses. + */ + +#include "mem/request.hh" + + +namespace SparcISA +{ +template <class XC> +inline void +handleLockedRead(XC *xc, Request *req) +{ +} + + +template <class XC> +inline bool +handleLockedWrite(XC *xc, Request *req) +{ + return true; +} + + +} // namespace SparcISA + +#endif diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 8e8153b68..274407be5 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -112,6 +112,7 @@ baseFlags = [ 'IdeDisk', 'InstExec', 'Interrupt', + 'LLSC', 'LSQ', 'LSQUnit', 'Loader', diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 2bb9a2399..5771a7904 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -158,6 +158,7 @@ if 'O3CPU' in env['CPU_MODELS']: o3/scoreboard.cc o3/store_set.cc ''') + sources += Split('memtest/memtest.cc') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') else: diff --git a/src/cpu/base.hh b/src/cpu/base.hh index e02527371..75e0d86af 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -140,8 +140,8 @@ class BaseCPU : public MemObject bool functionTrace; Tick functionTraceStart; System *system; -#if FULL_SYSTEM int cpu_id; +#if FULL_SYSTEM Tick profile; #endif Tick progress_interval; diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index f2109e88d..d6cdff5c5 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -193,7 +193,7 @@ BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags) // note this is a local, not BaseDynInst::fault Fault trans_fault = cpu->translateDataReadReq(req); - if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) { + if (trans_fault == NoFault && !(req->isUncacheable())) { // It's a valid address to cacheable space. Record key MemReq // parameters so we can generate another one just like it for // the timing access without calling translate() again (which diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 1540a6b94..f6d56eef6 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -175,7 +175,7 @@ CheckerCPU::read(Addr addr, T &data, unsigned flags) pkt->dataStatic(&data); - if (!(memReq->getFlags() & UNCACHEABLE)) { + if (!(memReq->isUncacheable())) { // Access memory to see if we have the same data dcachePort->sendFunctional(pkt); } else { @@ -251,9 +251,9 @@ CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) // This is because the LSQ would have to be snooped in the CPU to // verify this data. if (unverifiedReq && - !(unverifiedReq->getFlags() & UNCACHEABLE) && - (!(unverifiedReq->getFlags() & LOCKED) || - ((unverifiedReq->getFlags() & LOCKED) && + !(unverifiedReq->isUncacheable()) && + (!(unverifiedReq->isLocked()) || + ((unverifiedReq->isLocked()) && unverifiedReq->getScResult() == 1))) { T inst_data; /* diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index e3a47f10a..127cad414 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -38,16 +38,19 @@ #include "base/misc.hh" #include "base/statistics.hh" -#include "cpu/simple_thread.hh" +//#include "cpu/simple_thread.hh" #include "cpu/memtest/memtest.hh" //#include "mem/cache/base_cache.hh" -#include "mem/physical.hh" +//#include "mem/physical.hh" #include "sim/builder.hh" #include "sim/sim_events.hh" #include "sim/stats.hh" +#include "mem/packet.hh" +#include "mem/request.hh" +#include "mem/port.hh" +#include "mem/mem_object.hh" using namespace std; -using namespace TheISA; int TESTER_ALLOCATOR=0; @@ -68,7 +71,8 @@ MemTest::CpuPort::recvAtomic(Packet *pkt) void MemTest::CpuPort::recvFunctional(Packet *pkt) { - memtest->completeRequest(pkt); + //Do nothing if we see one come through + return; } void @@ -86,11 +90,10 @@ MemTest::CpuPort::recvRetry() memtest->doRetry(); } - MemTest::MemTest(const string &name, // MemInterface *_cache_interface, - PhysicalMemory *main_mem, - PhysicalMemory *check_mem, +// PhysicalMemory *main_mem, +// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, // unsigned _percentCopies, @@ -102,10 +105,11 @@ MemTest::MemTest(const string &name, Counter _max_loads) : MemObject(name), tickEvent(this), - cachePort("dcache", this), + cachePort("test", this), + funcPort("functional", this), retryPkt(NULL), - mainMem(main_mem), - checkMem(check_mem), +// mainMem(main_mem), +// checkMem(check_mem), size(_memorySize), percentReads(_percentReads), // percentCopies(_percentCopies), @@ -119,7 +123,7 @@ MemTest::MemTest(const string &name, vector<string> cmd; cmd.push_back("/bin/ls"); vector<string> null_vec; - thread = new SimpleThread(NULL, 0, NULL, 0, mainMem); + // thread = new SimpleThread(NULL, 0, NULL, 0, mainMem); curTick = 0; // Needs to be masked off once we know the block size. @@ -134,16 +138,18 @@ MemTest::MemTest(const string &name, tickEvent.schedule(0); id = TESTER_ALLOCATOR++; + if (TESTER_ALLOCATOR > 8) + panic("False sharing memtester only allows up to 8 testers"); + + accessRetry = false; } Port * MemTest::getPort(const std::string &if_name, int idx) { - // ***** NOTE TO RON: I'm not sure what it should do if these get ports - // are called on it. - if (if_name == "dcache_port") - return &cachePort; - else if (if_name == "icache_port") + if (if_name == "functional") + return &funcPort; + else if (if_name == "test") return &cachePort; else panic("No Such Port\n"); @@ -157,29 +163,14 @@ MemTest::init() blockAddrMask = blockSize - 1; traceBlockAddr = blockAddr(traceBlockAddr); - //setup data storage with interesting values - uint8_t *data1 = new uint8_t[size]; - uint8_t *data2 = new uint8_t[size]; - uint8_t *data3 = new uint8_t[size]; - memset(data1, 1, size); - memset(data2, 2, size); - memset(data3, 3, size); - // set up intial memory contents here - // ***** NOTE FOR RON: I'm not sure how to setup initial memory - // contents. - Kevin -/* - mainMem->prot_write(baseAddr1, data1, size); - checkMem->prot_write(baseAddr1, data1, size); - mainMem->prot_write(baseAddr2, data2, size); - checkMem->prot_write(baseAddr2, data2, size); - mainMem->prot_write(uncacheAddr, data3, size); - checkMem->prot_write(uncacheAddr, data3, size); -*/ - delete [] data1; - delete [] data2; - delete [] data3; + cachePort.memsetBlob(baseAddr1, 1, size); + funcPort.memsetBlob(baseAddr1, 1, size); + cachePort.memsetBlob(baseAddr2, 2, size); + funcPort.memsetBlob(baseAddr2, 2, size); + cachePort.memsetBlob(uncacheAddr, 3, size); + funcPort.memsetBlob(uncacheAddr, 3, size); } static void @@ -209,7 +200,7 @@ MemTest::completeRequest(Packet *pkt) outstandingAddrs.erase(removeAddr); switch (pkt->cmd) { - case Packet::ReadReq: + case Packet::ReadResp: if (memcmp(pkt_data, data, pkt->getSize()) != 0) { cerr << name() << ": on read of 0x" << hex << req->getPaddr() @@ -236,7 +227,7 @@ MemTest::completeRequest(Packet *pkt) exitSimLoop("Maximum number of loads reached!"); break; - case Packet::WriteReq: + case Packet::WriteResp: numWritesStat++; break; /* @@ -319,20 +310,14 @@ MemTest::tick() //If we aren't doing copies, use id as offset, and do a false sharing //mem tester - // ***** NOTE FOR RON: We're not doing copies, but I'm not sure if this - // code should be used. -/* - if (percentCopies == 0) { - //We can eliminate the lower bits of the offset, and then use the id - //to offset within the blks - offset &= ~63; //Not the low order bits - offset += id; - access_size = 0; - } -*/ + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset &= ~63; //Not the low order bits + offset += id; + access_size = 0; Request *req = new Request(); - uint32_t flags = req->getFlags(); + uint32_t flags = 0; Addr paddr; if (cacheable < percentUncacheable) { @@ -341,11 +326,12 @@ MemTest::tick() } else { paddr = ((base) ? baseAddr1 : baseAddr2) + offset; } - // bool probe = (random() % 2 == 1) && !req->isUncacheable(); + //bool probe = (random() % 2 == 1) && !req->isUncacheable(); bool probe = false; paddr &= ~((1 << access_size) - 1); req->setPhys(paddr, 1 << access_size, flags); + req->setThreadContext(id,0); uint8_t *result = new uint8_t[8]; @@ -359,7 +345,8 @@ MemTest::tick() else outstandingAddrs.insert(paddr); // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin -// checkMem->access(Read, req->getPaddr(), result, req->size); + funcPort.readBlob(req->getPaddr(), result, req->getSize()); + if (blockAddr(paddr) == traceBlockAddr) { cerr << name() << ": initiating read " @@ -377,10 +364,8 @@ MemTest::tick() pkt->senderState = state; if (probe) { - // ***** NOTE FOR RON: Send functional access? It used to - // be a probeAndUpdate access. - Kevin cachePort.sendFunctional(pkt); -// completeRequest(pkt, result); + completeRequest(pkt); } else { // req->completionEvent = new MemCompleteEvent(req, result, this); if (!cachePort.sendTiming(pkt)) { @@ -397,8 +382,6 @@ MemTest::tick() if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; else outstandingAddrs.insert(paddr); - // ***** NOTE FOR RON: Not sure how to access memory. -// checkMem->access(Write, req->paddr, req->data, req->size); /* if (blockAddr(req->getPaddr()) == traceBlockAddr) { cerr << name() << ": initiating write " @@ -419,9 +402,9 @@ MemTest::tick() MemTestSenderState *state = new MemTestSenderState(result); pkt->senderState = state; + funcPort.writeBlob(req->getPaddr(), pkt_data, req->getSize()); + if (probe) { - // ***** NOTE FOR RON: Send functional access? It used to - // be a probe access. - Kevin cachePort.sendFunctional(pkt); // completeRequest(req, NULL); } else { @@ -490,8 +473,8 @@ MemTest::doRetry() BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) // SimObjectParam<BaseCache *> cache; - SimObjectParam<PhysicalMemory *> main_mem; - SimObjectParam<PhysicalMemory *> check_mem; +// SimObjectParam<PhysicalMemory *> main_mem; +// SimObjectParam<PhysicalMemory *> check_mem; Param<unsigned> memory_size; Param<unsigned> percent_reads; // Param<unsigned> percent_copies; @@ -508,8 +491,8 @@ END_DECLARE_SIM_OBJECT_PARAMS(MemTest) BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) // INIT_PARAM(cache, "L1 cache"), - INIT_PARAM(main_mem, "hierarchical memory"), - INIT_PARAM(check_mem, "check memory"), +// INIT_PARAM(main_mem, "hierarchical memory"), +// INIT_PARAM(check_mem, "check memory"), INIT_PARAM(memory_size, "memory size"), INIT_PARAM(percent_reads, "target read percentage"), // INIT_PARAM(percent_copies, "target copy percentage"), @@ -527,8 +510,8 @@ END_INIT_SIM_OBJECT_PARAMS(MemTest) CREATE_SIM_OBJECT(MemTest) { - return new MemTest(getInstanceName(), /*cache->getInterface(),*/ main_mem, - check_mem, memory_size, percent_reads, /*percent_copies,*/ + return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/ + /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/ percent_uncacheable, progress_interval, percent_source_unaligned, percent_dest_unaligned, trace_addr, max_loads); diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index a0e58126b..87ecc6de3 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -41,17 +41,18 @@ #include "sim/sim_exit.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" -class PhysicalMemory; -class ThreadContext; +class Packet; class MemTest : public MemObject { public: MemTest(const std::string &name, // MemInterface *_cache_interface, - PhysicalMemory *main_mem, - PhysicalMemory *check_mem, +// PhysicalMemory *main_mem, +// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, // unsigned _percentCopies, @@ -112,10 +113,11 @@ class MemTest : public MemObject virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } }; CpuPort cachePort; + CpuPort funcPort; class MemTestSenderState : public Packet::SenderState { @@ -132,9 +134,9 @@ class MemTest : public MemObject // Request *dataReq; Packet *retryPkt; // MemInterface *cacheInterface; - PhysicalMemory *mainMem; - PhysicalMemory *checkMem; - SimpleThread *thread; +// PhysicalMemory *mainMem; +// PhysicalMemory *checkMem; +// SimpleThread *thread; bool accessRetry; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 1a2ca32a4..280bf0e71 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -96,7 +96,7 @@ class DefaultFetch /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles setting fetch to the * proper status to start fetching. */ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index a01784f8f..32210f1cd 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -63,7 +63,7 @@ template<class Impl> void DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvFunctional callback!"); + warn("Default fetch doesn't update it's state from a functional call."); } template<class Impl> @@ -599,7 +599,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || - memReq[tid]->flags & UNCACHEABLE) { + memReq[tid]->isUncacheable()) { DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " "misspeculating path)!", memReq[tid]->paddr); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 190734dc2..6b12d75b4 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -311,7 +311,7 @@ class LSQ { /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles writing back and * completing the load or store that has returned from diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 2bbab71f0..7b7d1eb8e 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -46,7 +46,7 @@ template <class Impl> void LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) { - panic("O3CPU doesn't expect recvFunctional callback!"); + warn("O3CPU doesn't update things on a recvFunctional."); } template <class Impl> diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 7b65ef556..11a02e7c7 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -492,7 +492,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // A bit of a hackish way to get uncached accesses to work only if they're // at the head of the LSQ and are ready to commit (at the head of the ROB // too). - if (req->getFlags() & UNCACHEABLE && + if (req->isUncacheable() && (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; @@ -509,7 +509,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) load_idx, store_idx, storeHead, req->getPaddr()); #if FULL_SYSTEM - if (req->getFlags() & LOCKED) { + if (req->isLocked()) { cpu->lockAddr = req->getPaddr(); cpu->lockFlag = true; } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index dc1a99d87..3f9db912f 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -416,7 +416,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) // realizes there is activity. // Mark it as executed unless it is an uncached load that // needs to hit the head of commit. - if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) { + if (!(inst->req->isUncacheable()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); @@ -613,8 +613,8 @@ LSQUnit<Impl>::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->getFlags() & LOCKED) { - if (req->getFlags() & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->setScResult(2); } else { if (cpu->lockFlag) { diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh index 9bab6a964..8debd277d 100644 --- a/src/cpu/ozone/back_end.hh +++ b/src/cpu/ozone/back_end.hh @@ -493,7 +493,7 @@ BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx) } */ /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Read"); */ return LSQ.read(req, data, load_idx); @@ -534,7 +534,7 @@ BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx) *res = memReq->result; */ /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Write"); */ return LSQ.write(req, data, store_idx); diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh index ac3218c02..4078699fe 100644 --- a/src/cpu/ozone/back_end_impl.hh +++ b/src/cpu/ozone/back_end_impl.hh @@ -1256,7 +1256,7 @@ BackEnd<Impl>::executeInsts() // ++iewExecStoreInsts; - if (!(inst->req->flags & LOCKED)) { + if (!(inst->req->isLocked())) { inst->setExecuted(); instToCommit(inst); diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index 8c5be9424..70ec1d101 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -455,12 +455,12 @@ class OzoneCPU : public BaseCPU { #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } #endif - if (req->flags & LOCKED) { + if (req->isLocked()) { lockAddrList.insert(req->paddr); lockFlag = true; } @@ -489,10 +489,10 @@ class OzoneCPU : public BaseCPU ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] @@ -532,8 +532,8 @@ class OzoneCPU : public BaseCPU #endif - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->result = 2; } else { if (this->lockFlag) { diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index 5ffd3666e..59cf9785c 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -92,7 +92,7 @@ class FrontEnd /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles setting fetch to the * proper status to start fetching. */ diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index d34716de6..c814ff9c7 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -59,7 +59,7 @@ template<class Impl> void FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt) { - panic("FrontEnd doesn't expect recvFunctional callback!"); + warn("FrontEnd doesn't update state from functional calls"); } template<class Impl> @@ -493,7 +493,7 @@ FrontEnd<Impl>::fetchCacheLine() if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq->paddr) || - memReq->flags & UNCACHEABLE) { + memReq->isUncacheable()) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq->paddr); diff --git a/src/cpu/ozone/inorder_back_end.hh b/src/cpu/ozone/inorder_back_end.hh index ffdba2f6c..76eef6fad 100644 --- a/src/cpu/ozone/inorder_back_end.hh +++ b/src/cpu/ozone/inorder_back_end.hh @@ -231,7 +231,7 @@ InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags) } } /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Read"); */ return fault; @@ -243,7 +243,7 @@ Fault InorderBackEnd<Impl>::read(MemReqPtr &req, T &data) { #if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } @@ -291,7 +291,7 @@ InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) if (res && (fault == NoFault)) *res = memReq->result; /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Write"); */ return fault; @@ -306,10 +306,10 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data) ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] @@ -391,7 +391,7 @@ InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx) } /* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) + if (!dcacheInterface && (req->isUncacheable())) recordEvent("Uncached Read"); */ return NoFault; @@ -455,8 +455,8 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) } } /* - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; } else { @@ -469,7 +469,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) *res = req->result; */ /* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) + if (!dcacheInterface && (req->isUncacheable())) recordEvent("Uncached Write"); */ return NoFault; diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh index 38c1c09a2..056c79521 100644 --- a/src/cpu/ozone/lsq_unit.hh +++ b/src/cpu/ozone/lsq_unit.hh @@ -426,7 +426,7 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). // @todo: Fix uncached accesses. - if (req->flags & UNCACHEABLE && + if (req->isUncacheable() && (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { return TheISA::genMachineCheckFault(); diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh index ee0804036..c46eb90be 100644 --- a/src/cpu/ozone/lsq_unit_impl.hh +++ b/src/cpu/ozone/lsq_unit_impl.hh @@ -577,7 +577,7 @@ OzoneLSQ<Impl>::writebackStores() MemAccessResult result = dcacheInterface->access(req); //@todo temp fix for LL/SC (works fine for 1 CPU) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->result=1; panic("LL/SC! oh no no support!!!"); } @@ -596,7 +596,7 @@ OzoneLSQ<Impl>::writebackStores() Event *wb = NULL; /* typename IEW::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port transaction. req->result=0; wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, @@ -630,7 +630,7 @@ OzoneLSQ<Impl>::writebackStores() // DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", // storeQueue[storeWBIdx].inst->seqNum); - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port transaction. req->result=1; typename BackEnd::LdWritebackEvent *wb = diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index 6640a9f34..9b93ce74f 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -260,7 +260,7 @@ class OzoneLWLSQ { virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1); } virtual bool recvTiming(PacketPtr pkt); @@ -507,7 +507,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). // @todo: Fix uncached accesses. - if (req->getFlags() & UNCACHEABLE && + if (req->isUncacheable() && (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", @@ -659,7 +659,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) return NoFault; } - if (req->getFlags() & LOCKED) { + if (req->isLocked()) { cpu->lockFlag = true; } diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 4c96ad149..e523712da 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -72,7 +72,7 @@ template <class Impl> void OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) { - panic("O3CPU doesn't expect recvFunctional callback!"); + warn("O3CPU doesn't update things on a recvFunctional"); } template <class Impl> @@ -394,7 +394,7 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst) // Actually probably want the oldest faulting load if (load_fault != NoFault) { DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); - if (!(inst->req->getFlags() & UNCACHEABLE && !inst->isAtCommit())) { + if (!(inst->req->isUncacheable() && !inst->isAtCommit())) { inst->setExecuted(); } // Maybe just set it as can commit here, although that might cause @@ -605,8 +605,8 @@ OzoneLWLSQ<Impl>::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->getFlags() & LOCKED) { - if (req->getFlags() & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->setScResult(2); } else { if (cpu->lockFlag) { @@ -663,7 +663,7 @@ OzoneLWLSQ<Impl>::writebackStores() if (result != MA_HIT && dcacheInterface->doEvents()) { store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { + if (req->isLocked()) { wb = new typename BackEnd::LdWritebackEvent(inst, be); store_event->wbEvent = wb; @@ -690,7 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores() // DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", // inst->seqNum); - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port // transaction in the 21264, but that might be // hard to accomplish in this model. diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 88698bfee..490be20ae 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -28,6 +28,7 @@ * Authors: Steve Reinhardt */ +#include "arch/locked_mem.hh" #include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/simple/atomic.hh" @@ -93,7 +94,7 @@ AtomicSimpleCPU::init() bool AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt) { - panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); return true; } @@ -107,7 +108,8 @@ AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt) void AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt) { - panic("AtomicSimpleCPU doesn't expect recvFunctional callback!"); + //No internal storage to update, just return + return; } void @@ -133,20 +135,19 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) { _status = Idle; - // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); - ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); - data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); - data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } @@ -161,6 +162,8 @@ AtomicSimpleCPU::serialize(ostream &os) { SimObject::State so_state = SimObject::getState(); SERIALIZE_ENUM(so_state); + Status _status = status(); + SERIALIZE_ENUM(_status); BaseSimpleCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); @@ -171,6 +174,7 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { SimObject::State so_state; UNSERIALIZE_ENUM(so_state); + UNSERIALIZE_ENUM(_status); BaseSimpleCPU::unserialize(cp, section); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } @@ -253,29 +257,36 @@ template <class T> Fault AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) { - data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + // use the CPU's statically allocated read request and packet objects + Request *req = data_read_req; + Packet *pkt = data_read_pkt; + + req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { traceData->setAddr(addr); } // translate to physical address - Fault fault = thread->translateDataReadReq(data_read_req); + Fault fault = thread->translateDataReadReq(req); // Now do the access. if (fault == NoFault) { - data_read_pkt->reinitFromRequest(); + pkt->reinitFromRequest(); - dcache_latency = dcachePort.sendAtomic(data_read_pkt); + dcache_latency = dcachePort.sendAtomic(pkt); dcache_access = true; - assert(data_read_pkt->result == Packet::Success); - data = data_read_pkt->get<T>(); + assert(pkt->result == Packet::Success); + data = pkt->get<T>(); + if (req->isLocked()) { + TheISA::handleLockedRead(thread, req); + } } // This will need a new way to tell if it has a dcache attached. - if (data_read_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Read"); return fault; @@ -328,33 +339,52 @@ template <class T> Fault AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { - data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + // use the CPU's statically allocated write request and packet objects + Request *req = data_write_req; + Packet *pkt = data_write_pkt; + + req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { traceData->setAddr(addr); } // translate to physical address - Fault fault = thread->translateDataWriteReq(data_write_req); + Fault fault = thread->translateDataWriteReq(req); // Now do the access. if (fault == NoFault) { - data = htog(data); - data_write_pkt->reinitFromRequest(); - data_write_pkt->dataStatic(&data); + bool do_access = true; // flag to suppress cache access - dcache_latency = dcachePort.sendAtomic(data_write_pkt); - dcache_access = true; + if (req->isLocked()) { + do_access = TheISA::handleLockedWrite(thread, req); + } + + if (do_access) { + data = htog(data); + pkt->reinitFromRequest(); + pkt->dataStatic(&data); - assert(data_write_pkt->result == Packet::Success); + dcache_latency = dcachePort.sendAtomic(pkt); + dcache_access = true; - if (res && data_write_req->getFlags() & LOCKED) { - *res = data_write_req->getScResult(); + assert(pkt->result == Packet::Success); + } + + if (req->isLocked()) { + uint64_t scResult = req->getScResult(); + if (scResult != 0) { + // clear failure counter + thread->setStCondFailures(0); + } + if (res) { + *res = req->getScResult(); + } } } // This will need a new way to tell if it's hooked up to a cache or not. - if (data_write_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -467,11 +497,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Tick> progress_interval; SimObjectParam<MemObject *> mem; SimObjectParam<System *> system; + Param<int> cpu_id; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - Param<int> cpu_id; Param<Tick> profile; #else SimObjectParam<Process *> workload; @@ -500,11 +530,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(progress_interval, "Progress interval"), INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "processes to run"), @@ -538,11 +568,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->simulate_stalls = simulate_stalls; params->mem = mem; params->system = system; + params->cpu_id = cpu_id; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->cpu_id = cpu_id; params->profile = profile; #else params->process = workload; diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index b602af558..52afd76ef 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -104,9 +104,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - }; + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } + }; CpuPort icachePort; CpuPort dcachePort; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 9bed5dab1..33f673cbc 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -28,6 +28,7 @@ * Authors: Steve Reinhardt */ +#include "arch/locked_mem.hh" #include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/simple/timing.hh" @@ -73,7 +74,8 @@ TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt) void TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt) { - panic("TimingSimpleCPU doesn't expect recvFunctional callback!"); + //No internal storage to update, jusst return + return; } void @@ -94,7 +96,8 @@ TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t) } TimingSimpleCPU::TimingSimpleCPU(Params *p) - : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock) + : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock), + cpu_id(p->cpu_id) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; @@ -251,35 +254,35 @@ template <class T> Fault TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { - // need to fill in CPU & thread IDs here - Request *data_read_req = new Request(); - data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE - data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + Request *req = + new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), + cpu_id, /* thread ID */ 0); if (traceData) { - traceData->setAddr(data_read_req->getVaddr()); + traceData->setAddr(req->getVaddr()); } // translate to physical address - Fault fault = thread->translateDataReadReq(data_read_req); + Fault fault = thread->translateDataReadReq(req); // Now do the access. if (fault == NoFault) { - Packet *data_read_pkt = - new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); - data_read_pkt->dataDynamic<T>(new T); + Packet *pkt = + new Packet(req, Packet::ReadReq, Packet::Broadcast); + pkt->dataDynamic<T>(new T); - if (!dcachePort.sendTiming(data_read_pkt)) { + if (!dcachePort.sendTiming(pkt)) { _status = DcacheRetry; - dcache_pkt = data_read_pkt; + dcache_pkt = pkt; } else { _status = DcacheWaitResponse; + // memory system takes ownership of packet dcache_pkt = NULL; } } // This will need a new way to tell if it has a dcache attached. - if (data_read_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Read"); return fault; @@ -332,31 +335,39 @@ template <class T> Fault TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { - // need to fill in CPU & thread IDs here - Request *data_write_req = new Request(); - data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE - data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + Request *req = + new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), + cpu_id, /* thread ID */ 0); // translate to physical address - Fault fault = thread->translateDataWriteReq(data_write_req); + Fault fault = thread->translateDataWriteReq(req); + // Now do the access. if (fault == NoFault) { - Packet *data_write_pkt = - new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); - data_write_pkt->allocate(); - data_write_pkt->set(data); + assert(dcache_pkt == NULL); + dcache_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + dcache_pkt->allocate(); + dcache_pkt->set(data); - if (!dcachePort.sendTiming(data_write_pkt)) { - _status = DcacheRetry; - dcache_pkt = data_write_pkt; - } else { - _status = DcacheWaitResponse; - dcache_pkt = NULL; + bool do_access = true; // flag to suppress cache access + + if (req->isLocked()) { + do_access = TheISA::handleLockedWrite(thread, req); + } + + if (do_access) { + if (!dcachePort.sendTiming(dcache_pkt)) { + _status = DcacheRetry; + } else { + _status = DcacheWaitResponse; + // memory system takes ownership of packet + dcache_pkt = NULL; + } } } // This will need a new way to tell if it's hooked up to a cache or not. - if (data_write_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -416,9 +427,8 @@ TimingSimpleCPU::fetch() { checkForInterrupts(); - // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); - ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0); Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); @@ -483,12 +493,20 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) { // load or store: just send to dcache Fault fault = curStaticInst->initiateAcc(this, traceData); - if (fault == NoFault) { - // successfully initiated access: instruction will - // complete in dcache response callback - assert(_status == DcacheWaitResponse); + if (_status != Running) { + // instruction will complete in dcache response callback + assert(_status == DcacheWaitResponse || _status == DcacheRetry); + assert(fault == NoFault); } else { - // fault: complete now to invoke fault handler + if (fault == NoFault) { + // early fail on store conditional: complete now + assert(dcache_pkt != NULL); + fault = curStaticInst->completeAcc(dcache_pkt, this, + traceData); + delete dcache_pkt->req; + delete dcache_pkt; + dcache_pkt = NULL; + } postExecute(); advanceInst(fault); } @@ -509,8 +527,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process() bool TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) { - // These next few lines could be replaced with something faster - // who knows what though + // delay processing of returned data until next CPU clock edge Tick time = pkt->req->getTime(); while (time < curTick) time += lat; @@ -551,6 +568,10 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) Fault fault = curStaticInst->completeAcc(pkt, this, traceData); + if (pkt->isRead() && pkt->req->isLocked()) { + TheISA::handleLockedRead(thread, pkt->req); + } + delete pkt->req; delete pkt; @@ -578,6 +599,7 @@ TimingSimpleCPU::completeDrain() bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) { + // delay processing of returned data until next CPU clock edge Tick time = pkt->req->getTime(); while (time < curTick) time += lat; @@ -606,6 +628,7 @@ TimingSimpleCPU::DcachePort::recvRetry() Packet *tmp = cpu->dcache_pkt; if (sendTiming(tmp)) { cpu->_status = DcacheWaitResponse; + // memory system takes ownership of packet cpu->dcache_pkt = NULL; } } @@ -624,11 +647,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Tick> progress_interval; SimObjectParam<MemObject *> mem; SimObjectParam<System *> system; + Param<int> cpu_id; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - Param<int> cpu_id; Param<Tick> profile; #else SimObjectParam<Process *> workload; @@ -657,11 +680,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(progress_interval, "Progress interval"), INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "processes to run"), @@ -693,11 +716,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTraceStart = function_trace_start; params->mem = mem; params->system = system; + params->cpu_id = cpu_id; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->cpu_id = cpu_id; params->profile = profile; #else params->process = workload; diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 8a20d1cfe..988ddeded 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -92,7 +92,7 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } struct TickEvent : public Event { @@ -166,6 +166,7 @@ class TimingSimpleCPU : public BaseSimpleCPU Packet *ifetch_pkt; Packet *dcache_pkt; + int cpu_id; Tick previousTick; public: diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 242cfd0e1..6fa6500bd 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -237,7 +237,7 @@ class SimpleThread : public ThreadState Fault read(RequestPtr &req, T &data) { #if FULL_SYSTEM && THE_ISA == ALPHA_ISA - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } @@ -256,10 +256,10 @@ class SimpleThread : public ThreadState ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index e8d7f4817..8007fda5e 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -742,7 +742,6 @@ IdeController::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(bm_enabled); UNSERIALIZE_ARRAY(cmd_in_progress, sizeof(cmd_in_progress) / sizeof(cmd_in_progress[0])); - pioPort->sendStatusChange(Port::RangeChange); } #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index c3b83f448..b16ddb31a 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -302,6 +302,8 @@ PciDev::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_ARRAY(BARAddrs, sizeof(BARAddrs) / sizeof(BARAddrs[0])); UNSERIALIZE_ARRAY(config.data, sizeof(config.data) / sizeof(config.data[0])); + pioPort->sendStatusChange(Port::RangeChange); + } #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/kern/tru64/tru64.hh b/src/kern/tru64/tru64.hh index 87ac88007..18671c364 100644 --- a/src/kern/tru64/tru64.hh +++ b/src/kern/tru64/tru64.hh @@ -588,16 +588,26 @@ class Tru64 : public OperatingSystem argp.copyIn(tc->getMemPort()); + int stack_size = + gtoh(argp->rsize) + gtoh(argp->ysize) + gtoh(argp->gsize); + // if the user chose an address, just let them have it. Otherwise // pick one for them. - if (htog(argp->address) == 0) { - argp->address = htog(process->next_thread_stack_base); - int stack_size = (htog(argp->rsize) + htog(argp->ysize) + - htog(argp->gsize)); + Addr stack_base = gtoh(argp->address); + + if (stack_base == 0) { + stack_base = process->next_thread_stack_base; process->next_thread_stack_base -= stack_size; - argp.copyOut(tc->getMemPort()); } + stack_base = roundDown(stack_base, VMPageSize); + + // map memory + process->pTable->allocate(stack_base, roundUp(stack_size, VMPageSize)); + + argp->address = gtoh(stack_base); + argp.copyOut(tc->getMemPort()); + return 0; } @@ -633,7 +643,7 @@ class Tru64 : public OperatingSystem abort(); } - const Addr base_addr = 0x12000; // was 0x3f0000000LL; + Addr base_addr = 0x12000; // was 0x3f0000000LL; Addr cur_addr = base_addr; // next addresses to use // first comes the config_info struct Addr config_addr = cur_addr; @@ -659,8 +669,6 @@ class Tru64 : public OperatingSystem config->nxm_slot_state = htog(slot_state_addr); config->nxm_rad[0] = htog(rad_state_addr); - config.copyOut(tc->getMemPort()); - // initialize the slot_state array and copy it out TypedBufferArg<Tru64::nxm_slot_state_t> slot_state(slot_state_addr, slot_state_size); @@ -672,8 +680,6 @@ class Tru64 : public OperatingSystem (i == 0) ? Tru64::NXM_SLOT_BOUND : Tru64::NXM_SLOT_AVAIL; } - slot_state.copyOut(tc->getMemPort()); - // same for the per-RAD "shared" struct. Note that we need to // allocate extra bytes for the per-VP array which is embedded at // the end. @@ -706,17 +712,20 @@ class Tru64 : public OperatingSystem } } - rad_state.copyOut(tc->getMemPort()); - // // copy pointer to shared config area out to user // *configptr_ptr = htog(config_addr); - configptr_ptr.copyOut(tc->getMemPort()); // Register this as a valid address range with the process - process->nxm_start = base_addr; - process->nxm_end = cur_addr; + base_addr = roundDown(base_addr, VMPageSize); + int size = cur_addr - base_addr; + process->pTable->allocate(base_addr, roundUp(size, VMPageSize)); + + config.copyOut(tc->getMemPort()); + slot_state.copyOut(tc->getMemPort()); + rad_state.copyOut(tc->getMemPort()); + configptr_ptr.copyOut(tc->getMemPort()); return 0; } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 3c5283a77..7584ffffd 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -67,6 +67,47 @@ Bus::init() (*intIter)->sendStatusChange(Port::RangeChange); } +Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) : Event(&mainEventQueue), bus(_bus) +{ + assert(!scheduled()); +} + +void Bus::BusFreeEvent::process() +{ + bus->recvRetry(0); +} + +const char * Bus::BusFreeEvent::description() +{ + return "bus became available"; +} + +void +Bus::occupyBus(int numCycles) +{ + //Move up when the bus will next be free + //We avoid the use of divide by adding repeatedly + //This should be faster if the value is updated frequently, but should + //be may be slower otherwise. + + //Bring tickNextIdle up to the present tick + //There is some potential ambiguity where a cycle starts, which might make + //a difference when devices are acting right around a cycle boundary. Using + //a < allows things which happen exactly on a cycle boundary to take up only + //the following cycle. Anthing that happens later will have to "wait" for the + //end of that cycle, and then start using the bus after that. + while (tickNextIdle < curTick) + tickNextIdle += clock; + //Advance it numCycles bus cycles. + //XXX Should this use the repeating add trick as well? + tickNextIdle += (numCycles * clock); + if (!busIdle.scheduled()) { + busIdle.schedule(tickNextIdle); + } else { + busIdle.reschedule(tickNextIdle); + } + DPRINTF(Bus, "The bus is now occupied from tick %d to %d\n", curTick, tickNextIdle); +} /** Function called by the port when the bus is receiving a Timing * transaction.*/ @@ -77,17 +118,26 @@ Bus::recvTiming(Packet *pkt) DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); + Port *pktPort = interfaces[pkt->getSrc()]; + short dest = pkt->getDest(); if (dest == Packet::Broadcast) { - if ( timingSnoopPhase1(pkt) ) - { - timingSnoopPhase2(pkt); + if (timingSnoop(pkt)) { + pkt->flags |= SNOOP_COMMIT; + bool success = timingSnoop(pkt); + assert(success); + if (pkt->flags & SATISFIED) { + //Cache-Cache transfer occuring + if (retryingPort) { + retryList.pop_front(); + retryingPort = NULL; + } + return true; + } port = findPort(pkt->getAddr(), pkt->getSrc()); - } - else - { + } else { //Snoop didn't succeed - retryList.push_back(interfaces[pkt->getSrc()]); + addToRetryList(pktPort); return false; } } else { @@ -95,35 +145,61 @@ Bus::recvTiming(Packet *pkt) assert(dest != pkt->getSrc()); // catch infinite loops port = interfaces[dest]; } + + // The packet will be sent. Figure out how long it occupies the bus. + int numCycles = 0; + // Requests need one cycle to send an address + if (pkt->isRequest()) + numCycles++; + else if (pkt->isResponse() || pkt->hasData()) { + // If a packet has data, it needs ceil(size/width) cycles to send it + // We're using the "adding instead of dividing" trick again here + if (pkt->hasData()) { + int dataSize = pkt->getSize(); + for (int transmitted = 0; transmitted < dataSize; + transmitted += width) { + numCycles++; + } + } else { + // If the packet didn't have data, it must have been a response. + // Those use the bus for one cycle to send their data. + numCycles++; + } + } + + occupyBus(numCycles); + if (port->sendTiming(pkt)) { - // packet was successfully sent, just return true. + // Packet was successfully sent. Return true. + // Also take care of retries + if (retryingPort) { + retryList.pop_front(); + retryingPort = NULL; + } return true; } - // packet not successfully sent - retryList.push_back(interfaces[pkt->getSrc()]); + // Packet not successfully sent. Leave or put it on the retry list. + addToRetryList(pktPort); return false; } void Bus::recvRetry(int id) { - // Go through all the elements on the list calling sendRetry on each - // This is not very efficient at all but it works. Ultimately we should end - // up with something that is more intelligent. - int initialSize = retryList.size(); - int i; - Port *p; - - for (i = 0; i < initialSize; i++) { - assert(retryList.size() > 0); - p = retryList.front(); - retryList.pop_front(); - p->sendRetry(); + // If there's anything waiting... + if (retryList.size()) { + retryingPort = retryList.front(); + retryingPort->sendRetry(); + // If the retryingPort pointer isn't null, sendTiming wasn't called + if (retryingPort) { + warn("sendRetry didn't call sendTiming\n"); + retryList.pop_front(); + retryingPort = NULL; + } } } - Port * Bus::findPort(Addr addr, int id) { @@ -194,44 +270,34 @@ Bus::atomicSnoop(Packet *pkt) } } -bool -Bus::timingSnoopPhase1(Packet *pkt) +void +Bus::functionalSnoop(Packet *pkt) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); - bool success = true; - while (!ports.empty() && success) + while (!ports.empty()) { - snoopCallbacks.push_back(ports.back()); - success = interfaces[ports.back()]->sendTiming(pkt); + interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); } - if (!success) - { - while (!snoopCallbacks.empty()) - { - interfaces[snoopCallbacks.back()]->sendStatusChange(Port::SnoopSquash); - snoopCallbacks.pop_back(); - } - return false; - } - return true; } -void -Bus::timingSnoopPhase2(Packet *pkt) +bool +Bus::timingSnoop(Packet *pkt) { - bool success; - pkt->flags |= SNOOP_COMMIT; - while (!snoopCallbacks.empty()) + std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + bool success = true; + + while (!ports.empty() && success) { - success = interfaces[snoopCallbacks.back()]->sendTiming(pkt); - //We should not fail on snoop callbacks - assert(success); - snoopCallbacks.pop_back(); + success = interfaces[ports.back()]->sendTiming(pkt); + ports.pop_back(); } + + return success; } + /** Function called by the port when the bus is receiving a Atomic * transaction.*/ Tick @@ -252,7 +318,7 @@ Bus::recvFunctional(Packet *pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - atomicSnoop(pkt); + functionalSnoop(pkt); findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); } @@ -381,16 +447,20 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bus) Param<int> bus_id; + Param<int> clock; + Param<int> width; END_DECLARE_SIM_OBJECT_PARAMS(Bus) BEGIN_INIT_SIM_OBJECT_PARAMS(Bus) - INIT_PARAM(bus_id, "a globally unique bus id") + INIT_PARAM(bus_id, "a globally unique bus id"), + INIT_PARAM(clock, "bus clock speed"), + INIT_PARAM(width, "width of the bus (bits)") END_INIT_SIM_OBJECT_PARAMS(Bus) CREATE_SIM_OBJECT(Bus) { - return new Bus(getInstanceName(), bus_id); + return new Bus(getInstanceName(), bus_id, clock, width); } REGISTER_SIM_OBJECT("Bus", Bus) diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 941389296..a89738775 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -46,11 +46,18 @@ #include "mem/packet.hh" #include "mem/port.hh" #include "mem/request.hh" +#include "sim/eventq.hh" class Bus : public MemObject { /** a globally unique id for this bus. */ int busId; + /** the clock speed for the bus */ + int clock; + /** the width of the bus in bytes */ + int width; + /** the next tick at which the bus will be idle */ + Tick tickNextIdle; static const int defaultId = -1; @@ -62,9 +69,6 @@ class Bus : public MemObject AddrRangeList defaultRange; std::vector<DevMap> portSnoopList; - std::vector<int> snoopCallbacks; - - /** Function called by the port when the bus is recieving a Timing transaction.*/ bool recvTiming(Packet *pkt); @@ -105,16 +109,14 @@ class Bus : public MemObject /** Snoop all relevant ports atomicly. */ void atomicSnoop(Packet *pkt); - /** Snoop for NACK and Blocked in phase 1 - * @return True if succeds. - */ - bool timingSnoopPhase1(Packet *pkt); + /** Snoop all relevant ports functionally. */ + void functionalSnoop(Packet *pkt); - /** @todo Don't need to commit all snoops just those that need it - *(register somehow). */ - /** Commit all snoops now that we know if any of them would have blocked. + /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want + * the snoop to happen + * @return True if succeds. */ - void timingSnoopPhase2(Packet *pkt); + bool timingSnoop(Packet *pkt); /** Process address range request. * @param resp addresses that we can respond to @@ -181,6 +183,22 @@ class Bus : public MemObject }; + class BusFreeEvent : public Event + { + Bus * bus; + + public: + BusFreeEvent(Bus * _bus); + void process(); + const char *description(); + }; + + BusFreeEvent busIdle; + + void occupyBus(int numCycles); + + Port * retryingPort; + /** An array of pointers to the peer port interfaces connected to this bus.*/ std::vector<Port*> interfaces; @@ -189,6 +207,23 @@ class Bus : public MemObject * original send failed for whatever reason.*/ std::list<Port*> retryList; + void addToRetryList(Port * port) + { + if (!retryingPort) { + // The device wasn't retrying a packet, or wasn't at an appropriate + // time. + retryList.push_back(port); + } else { + // The device was retrying a packet. It didn't work, so we'll leave + // it at the head of the retry list. + retryingPort = NULL; + + // We shouldn't be receiving a packet from one port when a different + // one is retrying. + assert(port == retryingPort); + } + } + /** Port that handles requests that don't match any of the interfaces.*/ Port *defaultPort; @@ -199,8 +234,14 @@ class Bus : public MemObject virtual void init(); - Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id), defaultPort(NULL) {} + Bus(const std::string &n, int bus_id, int _clock, int _width) + : MemObject(n), busId(bus_id), clock(_clock), width(_width), + tickNextIdle(0), busIdle(this), retryingPort(NULL), defaultPort(NULL) + { + //Both the width and clock period must be positive + assert(width); + assert(clock); + } }; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index d7ccca8c0..c56f48eeb 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -71,7 +71,7 @@ BaseCache::CachePort::deviceBlockSize() bool BaseCache::CachePort::recvTiming(Packet *pkt) { - if (blocked) + if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; @@ -105,14 +105,14 @@ BaseCache::CachePort::recvRetry() drainList.pop_front(); } } - - if (!isCpuSide) + else if (!isCpuSide) { pkt = cache->getPacket(); + MSHR* mshr = (MSHR*)pkt->senderState; bool success = sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); - cache->sendResult(pkt, success); + cache->sendResult(pkt, mshr, success); if (success && cache->doMasterRequest()) { //Still more to issue, rerequest in 1 cycle @@ -123,7 +123,9 @@ BaseCache::CachePort::recvRetry() } else { - pkt = cache->getCoherencePacket(); + //pkt = cache->getCoherencePacket(); + //We save the packet, no reordering on CSHRS + pkt = cshrRetry; bool success = sendTiming(pkt); if (success && cache->doSlaveRequest()) { @@ -182,10 +184,11 @@ BaseCache::CacheEvent::process() { //MSHR pkt = cachePort->cache->getPacket(); + MSHR* mshr = (MSHR*) pkt->senderState; bool success = cachePort->sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); - cachePort->cache->sendResult(pkt, success); + cachePort->cache->sendResult(pkt, mshr, success); if (success && cachePort->cache->doMasterRequest()) { //Still more to issue, rerequest in 1 cycle @@ -198,7 +201,11 @@ BaseCache::CacheEvent::process() //CSHR pkt = cachePort->cache->getCoherencePacket(); bool success = cachePort->sendTiming(pkt); - if (success && cachePort->cache->doSlaveRequest()) + if (!success) { + //Need to send on a retry + cachePort->cshrRetry = pkt; + } + else if (cachePort->cache->doSlaveRequest()) { //Still more to issue, rerequest in 1 cycle pkt = NULL; @@ -209,7 +216,10 @@ BaseCache::CacheEvent::process() } //Response //Know the packet to send - pkt->result = Packet::Success; + if (pkt->flags & NACKED_LINE) + pkt->result = Packet::Nacked; + else + pkt->result = Packet::Success; pkt->makeTimingResponse(); if (!cachePort->drainList.empty()) { //Already blocked waiting for bus, just append diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index c69fb7fd5..c45f3b71b 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -72,6 +72,7 @@ enum RequestCause{ Request_PF }; +class MSHR; /** * A basic cache interface. Implements some common functions for speed. */ @@ -112,6 +113,8 @@ class BaseCache : public MemObject bool isCpuSide; std::list<Packet *> drainList; + + Packet *cshrRetry; }; struct CacheEvent : public Event @@ -156,7 +159,7 @@ class BaseCache : public MemObject if (status == Port::RangeChange){ if (!isCpuSide) { cpuSidePort->sendStatusChange(Port::RangeChange); - if (topLevelCache && !snoopRangesSent) { + if (!snoopRangesSent) { snoopRangesSent = true; memSidePort->sendStatusChange(Port::RangeChange); } @@ -165,10 +168,6 @@ class BaseCache : public MemObject memSidePort->sendStatusChange(Port::RangeChange); } } - else if (status == Port::SnoopSquash) { - assert(snoopPhase2); - snoopPhase2 = false; - } } virtual Packet *getPacket() @@ -181,7 +180,7 @@ class BaseCache : public MemObject fatal("No implementation"); } - virtual void sendResult(Packet* &pkt, bool success) + virtual void sendResult(Packet* &pkt, MSHR* mshr, bool success) { fatal("No implementation"); @@ -215,9 +214,6 @@ class BaseCache : public MemObject bool topLevelCache; - /** True if we are now in phase 2 of the snoop process. */ - bool snoopPhase2; - /** Stores time the cache blocked for statistics. */ Tick blockedCycle; @@ -523,8 +519,10 @@ class BaseCache : public MemObject */ void respond(Packet *pkt, Tick time) { - CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); + if (pkt->needsResponse()) { + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } } /** @@ -537,8 +535,10 @@ class BaseCache : public MemObject if (!pkt->req->isUncacheable()) { missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time; } - CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); + if (pkt->needsResponse()) { + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } } /** @@ -549,6 +549,7 @@ class BaseCache : public MemObject { // assert("Implement\n" && 0); // mi->respond(pkt,curTick + hitLatency); + assert (pkt->needsResponse()); CacheEvent *reqMem = new CacheEvent(memSidePort, pkt); reqMem->schedule(time); } @@ -570,14 +571,14 @@ class BaseCache : public MemObject { //This is where snoops get updated AddrRangeList dummy; - if (!topLevelCache) - { +// if (!topLevelCache) +// { cpuSidePort->getPeerAddressRanges(dummy, snoop); - } - else - { - snoop.push_back(RangeSize(0,-1)); - } +// } +// else +// { +// snoop.push_back(RangeSize(0,-1)); +// } return; } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 4b8870c95..923bf8255 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -175,7 +175,7 @@ class Cache : public BaseCache * @param pkt The request. * @param success True if the request was sent successfully. */ - virtual void sendResult(Packet * &pkt, bool success); + virtual void sendResult(Packet * &pkt, MSHR* mshr, bool success); /** * Handles a response (cache line fill/write ack) from the bus. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 46f4b0ebe..c3c1c0881 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -60,28 +60,20 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } - if (!(pkt->flags & SATISFIED)) { - access(pkt); - } + access(pkt); + } else { if (pkt->isResponse()) handleResponse(pkt); else { - //Check if we are in phase1 - if (!snoopPhase2) { - snoopPhase2 = true; - } - else { - //Check if we should do the snoop - if (pkt->flags && SNOOP_COMMIT) - snoop(pkt); - snoopPhase2 = false; - } + //Check if we should do the snoop + if (pkt->flags & SNOOP_COMMIT) + snoop(pkt); } } return true; @@ -95,7 +87,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (isCpuSide) { //Temporary solution to LL/SC - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } @@ -125,7 +117,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) pkt->req->setThreadContext(0,0); //Temporary solution to LL/SC - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { assert("Can't handle LL/SC on functional path\n"); } @@ -211,9 +203,8 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) pkt->getAddr() & (((ULL(1))<<48)-1), pkt->getAddr() & ~((Addr)blkSize - 1)); - //@todo Should this return latency have the hit latency in it? -// respond(pkt,curTick+lat); pkt->flags |= SATISFIED; + //Invalidates/Upgrades need no response if they get the bus // return MA_HIT; //@todo, return values return true; } @@ -243,9 +234,9 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %x %s blk_addr: %x pc %x\n", pkt->cmdString(), + DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC()); + pkt->getAddr() & ~((Addr)blkSize - 1)); if (blk) { // Hit hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; @@ -294,10 +285,10 @@ Cache<TagStore,Buffering,Coherence>::getPacket() template<class TagStore, class Buffering, class Coherence> void -Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success) +Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success) { if (success) { - missQueue->markInService(pkt); + missQueue->markInService(pkt, mshr); //Temp Hack for UPGRADES if (pkt->cmd == Packet::UpgradeReq) { handleResponse(pkt); @@ -313,6 +304,13 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) { BlkType *blk = NULL; if (pkt->senderState) { + if (pkt->result == Packet::Nacked) { + pkt->reinitFromRequest(); + panic("Unimplemented NACK of packet\n"); + } + if (pkt->result == Packet::BadAddress) { + //Make the response a Bad address and send it + } // MemDebug::cacheResponse(pkt); DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), pkt->getAddr() & (((ULL(1))<<48)-1)); @@ -321,9 +319,11 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; PacketList writebacks; + CacheBlk::State new_state = coherence->getNewState(pkt,old_state); + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); blk = tags->handleFill(blk, (MSHR*)pkt->senderState, - coherence->getNewState(pkt,old_state), - writebacks, pkt); + new_state, writebacks, pkt); while (!writebacks.empty()) { missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); @@ -394,9 +394,9 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) + assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; pkt->flags |= NACKED_LINE; - assert("Don't detect these on the other side yet\n"); respondToSnoop(pkt, curTick + hitLatency); return; } @@ -410,7 +410,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - assert("Don't have invalidates yet\n"); + panic("Don't have invalidates yet\n"); invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on @@ -433,6 +433,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data + assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; //If we are in an exclusive protocol, make it ask again @@ -451,7 +452,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) if (pkt->isInvalidate()) { //This must be an upgrade or other cache will take ownership - missQueue->markInService(mshr->pkt); + missQueue->markInService(mshr->pkt, mshr); } return; } @@ -461,10 +462,16 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) CacheBlk::State new_state; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { + DPRINTF(Cache, "Cache snooped a %s request and now supplying data," + "new state is %i\n", + pkt->cmdString(), new_state); + tags->handleSnoop(blk, new_state, pkt); respondToSnoop(pkt, curTick + hitLatency); return; } + if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n", + pkt->cmdString(), new_state); tags->handleSnoop(blk, new_state); } @@ -610,7 +617,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort // update the cache state and statistics if (mshr || !writes.empty()){ // Can't handle it, return pktuest unsatisfied. - return 0; + panic("Atomic access ran into outstanding MSHR's or WB's!"); } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill @@ -627,7 +634,9 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort lat = memSidePort->sendAtomic(busPkt); //Be sure to flip the response to a request for coherence - busPkt->makeAtomicResponse(); + if (busPkt->needsResponse()) { + busPkt->makeAtomicResponse(); + } /* if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return @@ -662,7 +671,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; } else if (pkt->isWrite()) { // Still need to change data in all locations. - return otherSidePort->sendAtomic(pkt); + otherSidePort->sendFunctional(pkt); } return curTick + lat; } @@ -680,9 +689,15 @@ Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt) CacheBlk::State new_state = 0; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { + DPRINTF(Cache, "Cache snooped a %s request and now supplying data," + "new state is %i\n", + pkt->cmdString(), new_state); + tags->handleSnoop(blk, new_state, pkt); return hitLatency; } + if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n", + pkt->cmdString(), new_state); tags->handleSnoop(blk, new_state); return 0; } diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index bcf3ce9c5..e28dda3dc 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -271,7 +271,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name, } Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; - Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp; //@todo add in hardware prefetch to this list if (protocol == "msi") { diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 7a6ea9133..f7aacff89 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -123,12 +123,12 @@ BlockingBuffer::restoreOrigCmd(Packet * &pkt) } void -BlockingBuffer::markInService(Packet * &pkt) +BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr) { if (!pkt->isCacheFill() && pkt->isWrite()) { // Forwarding a write/ writeback, don't need to change // the command - assert((MSHR*)pkt->senderState == &wb); + assert(mshr == &wb); cache->clearMasterRequest(Request_WB); if (!pkt->needsResponse()) { assert(wb.getNumTargets() == 0); @@ -138,7 +138,7 @@ BlockingBuffer::markInService(Packet * &pkt) wb.inService = true; } } else { - assert((MSHR*)pkt->senderState == &miss); + assert(mshr == &miss); cache->clearMasterRequest(Request_MSHR); if (!pkt->needsResponse()) { assert(miss.getNumTargets() == 0); diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 641d5a798..f7069696c 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -152,7 +152,7 @@ public: * are successfully sent. * @param pkt The request that was sent on the bus. */ - void markInService(Packet * &pkt); + void markInService(Packet * &pkt, MSHR* mshr); /** * Frees the resources of the pktuest and unblock the cache. diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 273b6587f..bdb7a39c8 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,blkSize); + MSHR* mshr = wb.allocate(pkt,size); mshr->order = order++; //REMOVING COMPRESSION FOR NOW @@ -446,7 +446,7 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, blkSize, time); + mshr = allocateWrite(pkt, pkt->getSize(), time); return; } @@ -526,9 +526,8 @@ MissQueue::restoreOrigCmd(Packet * &pkt) } void -MissQueue::markInService(Packet * &pkt) +MissQueue::markInService(Packet * &pkt, MSHR* mshr) { - assert(pkt->senderState != 0); bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; @@ -540,7 +539,7 @@ MissQueue::markInService(Packet * &pkt) // Forwarding a write/ writeback, don't need to change // the command unblock = wb.isFull(); - wb.markInService((MSHR*)pkt->senderState); + wb.markInService(mshr); if (!wb.havePending()){ cache->clearMasterRequest(Request_WB); } @@ -551,11 +550,11 @@ MissQueue::markInService(Packet * &pkt) } } else { unblock = mq.isFull(); - mq.markInService((MSHR*)pkt->senderState); + mq.markInService(mshr); if (!mq.havePending()){ cache->clearMasterRequest(Request_MSHR); } - if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { + if (mshr->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", cache->name()); //Also clear pending if need be diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index 505d1f90c..179638d2b 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -256,7 +256,7 @@ class MissQueue * are successfully sent. * @param pkt The request that was sent on the bus. */ - void markInService(Packet * &pkt); + void markInService(Packet * &pkt, MSHR* mshr); /** * Collect statistics and free resources of a satisfied pktuest. diff --git a/src/mem/packet.hh b/src/mem/packet.hh index be9bf5f57..4d57aee75 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -58,10 +58,6 @@ typedef std::list<PacketPtr> PacketList; #define NO_ALLOCATE 1 << 5 #define SNOOP_COMMIT 1 << 6 -//For statistics we need max number of commands, hard code it at -//20 for now. @todo fix later -#define NUM_MEM_CMDS 1 << 9 - /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -164,6 +160,8 @@ class Packet private: /** List of command attributes. */ + // If you add a new CommandAttribute, make sure to increase NUM_MEM_CMDS + // as well. enum CommandAttribute { IsRead = 1 << 0, @@ -174,30 +172,37 @@ class Packet IsResponse = 1 << 5, NeedsResponse = 1 << 6, IsSWPrefetch = 1 << 7, - IsHWPrefetch = 1 << 8 + IsHWPrefetch = 1 << 8, + HasData = 1 << 9 }; +//For statistics we need max number of commands, hard code it at +//20 for now. @todo fix later +#define NUM_MEM_CMDS 1 << 10 + public: /** List of all commands associated with a packet. */ enum Command { InvalidCmd = 0, ReadReq = IsRead | IsRequest | NeedsResponse, - WriteReq = IsWrite | IsRequest | NeedsResponse, - WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse | NeedsResponse, + WriteReq = IsWrite | IsRequest | NeedsResponse | HasData, + WriteReqNoAck = IsWrite | IsRequest | HasData, + ReadResp = IsRead | IsResponse | NeedsResponse | HasData, WriteResp = IsWrite | IsResponse | NeedsResponse, - Writeback = IsWrite | IsRequest, + Writeback = IsWrite | IsRequest | HasData, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, - SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, - HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch + | NeedsResponse | HasData, + HardPFResp = IsRead | IsResponse | IsHWPrefetch + | NeedsResponse | HasData, InvalidateReq = IsInvalidate | IsRequest, - WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, - UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest | HasData, + UpgradeReq = IsInvalidate | IsRequest, ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, - ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse + ReadExResp = IsRead | IsInvalidate | IsResponse + | NeedsResponse | HasData }; /** Return the string name of the cmd field (for debugging and @@ -219,6 +224,7 @@ class Packet bool isResponse() { return (cmd & IsResponse) != 0; } bool needsResponse() { return (cmd & NeedsResponse) != 0; } bool isInvalidate() { return (cmd & IsInvalidate) != 0; } + bool hasData() { return (cmd & HasData) != 0; } bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 0580954de..7303f278e 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -110,28 +110,112 @@ PhysicalMemory::calculateLatency(Packet *pkt) return lat; } + + +// Add load-locked to tracking list. Should only be called if the +// operation is a load and the LOCKED flag is set. +void +PhysicalMemory::trackLoadLocked(Request *req) +{ + Addr paddr = LockedAddr::mask(req->getPaddr()); + + // first we check if we already have a locked addr for this + // xc. Since each xc only gets one, we just update the + // existing record with the new address. + list<LockedAddr>::iterator i; + + for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) { + if (i->matchesContext(req)) { + DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + i->addr = paddr; + return; + } + } + + // no record for this xc: need to allocate a new one + DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + lockedAddrList.push_front(LockedAddr(req)); +} + + +// Called on *writes* only... both regular stores and +// store-conditional operations. Check for conventional stores which +// conflict with locked addresses, and for success/failure of store +// conditionals. +bool +PhysicalMemory::checkLockedAddrList(Request *req) +{ + Addr paddr = LockedAddr::mask(req->getPaddr()); + bool isLocked = req->isLocked(); + + // Initialize return value. Non-conditional stores always + // succeed. Assume conditional stores will fail until proven + // otherwise. + bool success = !isLocked; + + // Iterate over list. Note that there could be multiple matching + // records, as more than one context could have done a load locked + // to this location. + list<LockedAddr>::iterator i = lockedAddrList.begin(); + + while (i != lockedAddrList.end()) { + + if (i->addr == paddr) { + // we have a matching address + + if (isLocked && i->matchesContext(req)) { + // it's a store conditional, and as far as the memory + // system can tell, the requesting context's lock is + // still valid. + DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + success = true; + } + + // Get rid of our record of this lock and advance to next + DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n", + i->cpuNum, i->threadNum, paddr); + i = lockedAddrList.erase(i); + } + else { + // no match: advance to next record + ++i; + } + } + + if (isLocked) { + req->setScResult(success ? 1 : 0); + } + + return success; +} + void PhysicalMemory::doFunctionalAccess(Packet *pkt) { assert(pkt->getAddr() + pkt->getSize() <= params()->addrRange.size()); - switch (pkt->cmd) { - case Packet::ReadReq: + if (pkt->isRead()) { + if (pkt->req->isLocked()) { + trackLoadLocked(pkt->req); + } memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - params()->addrRange.start, pkt->getSize()); - break; - case Packet::WriteReq: - memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, - pkt->getPtr<uint8_t>(), - pkt->getSize()); - // temporary hack: will need to add real LL/SC implementation - // for cacheless systems later. - if (pkt->req->getFlags() & LOCKED) { - pkt->req->setScResult(1); + } + else if (pkt->isWrite()) { + if (writeOK(pkt->req)) { + memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, + pkt->getPtr<uint8_t>(), pkt->getSize()); } - break; - default: + } + else if (pkt->isInvalidate()) { + //upgrade or invalidate + pkt->flags |= SATISFIED; + } + else { panic("unimplemented"); } @@ -147,7 +231,7 @@ PhysicalMemory::getPort(const std::string &if_name, int idx) port = new MemoryPort(name() + "-port", this); return port; } else if (if_name == "functional") { - /* special port for functional writes at startup. */ + /* special port for functional writes at startup. And for memtester */ return new MemoryPort(name() + "-funcport", this); } else { panic("PhysicalMemory::getPort: unknown port %s requested", if_name); diff --git a/src/mem/physical.hh b/src/mem/physical.hh index 02308b2ef..97bea2ec4 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -78,6 +78,68 @@ class PhysicalMemory : public MemObject const PhysicalMemory &operator=(const PhysicalMemory &specmem); protected: + + class LockedAddr { + public: + // on alpha, minimum LL/SC granularity is 16 bytes, so lower + // bits need to masked off. + static const Addr Addr_Mask = 0xf; + + static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); } + + Addr addr; // locked address + int cpuNum; // locking CPU + int threadNum; // locking thread ID within CPU + + // check for matching execution context + bool matchesContext(Request *req) + { + return (cpuNum == req->getCpuNum() && + threadNum == req->getThreadNum()); + } + + LockedAddr(Request *req) + : addr(mask(req->getPaddr())), + cpuNum(req->getCpuNum()), + threadNum(req->getThreadNum()) + { + } + }; + + std::list<LockedAddr> lockedAddrList; + + // helper function for checkLockedAddrs(): we really want to + // inline a quick check for an empty locked addr list (hopefully + // the common case), and do the full list search (if necessary) in + // this out-of-line function + bool checkLockedAddrList(Request *req); + + // Record the address of a load-locked operation so that we can + // clear the execution context's lock flag if a matching store is + // performed + void trackLoadLocked(Request *req); + + // Compare a store address with any locked addresses so we can + // clear the lock flag appropriately. Return value set to 'false' + // if store operation should be suppressed (because it was a + // conditional store and the address was no longer locked by the + // requesting execution context), 'true' otherwise. Note that + // this method must be called on *all* stores since even + // non-conditional stores must clear any matching lock addresses. + bool writeOK(Request *req) { + if (lockedAddrList.empty()) { + // no locked addrs: nothing to check, store_conditional fails + bool isLocked = req->isLocked(); + if (isLocked) { + req->setScResult(0); + } + return !isLocked; // only do write if not an sc + } else { + // iterate over list... + return checkLockedAddrList(req); + } + } + uint8_t *pmemAddr; MemoryPort *port; int pagePtr; diff --git a/src/mem/port.hh b/src/mem/port.hh index 6b4184043..bb3bc1b1b 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -106,8 +106,7 @@ class Port /** Holds the ports status. Currently just that a range recomputation needs * to be done. */ enum Status { - RangeChange, - SnoopSquash + RangeChange }; void setName(const std::string &name) diff --git a/src/mem/request.hh b/src/mem/request.hh index 6acd7526c..e54984fcd 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -232,9 +232,11 @@ class Request Addr getPC() { assert(validPC); return pc; } /** Accessor Function to Check Cacheability. */ - bool isUncacheable() { return getFlags() & UNCACHEABLE; } + bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; } - bool isInstRead() { return getFlags() & INST_READ; } + bool isInstRead() { return (getFlags() & INST_READ) != 0; } + + bool isLocked() { return (getFlags() & LOCKED) != 0; } friend class Packet; }; diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 55c301c87..cef7a2a5b 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -47,9 +47,11 @@ SimpleTimingPort::recvTiming(Packet *pkt) // if we ever added it back. assert(pkt->result != Packet::Nacked); Tick latency = recvAtomic(pkt); - // turn packet around to go back to requester - pkt->makeTimingResponse(); - sendTimingLater(pkt, latency); + // turn packet around to go back to requester if response expected + if (pkt->needsResponse()) { + pkt->makeTimingResponse(); + sendTimingLater(pkt, latency); + } return true; } diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 0b887cceb..b6dc08e46 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -11,10 +11,11 @@ class BaseCPU(SimObject): mem = Param.MemObject("memory") system = Param.System(Parent.any, "system object") + cpu_id = Param.Int("CPU identifier") + if build_env['FULL_SYSTEM']: dtb = Param.AlphaDTB(AlphaDTB(), "Data TLB") itb = Param.AlphaITB(AlphaITB(), "Instruction TLB") - cpu_id = Param.Int(-1, "CPU identifier") else: workload = VectorParam.Process("processes to run") diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index f6828a0d5..6710111e5 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -6,3 +6,5 @@ class Bus(MemObject): port = VectorPort("vector port for connecting devices") default = Port("Default port for requests that aren't handeled by a device.") bus_id = Param.Int(0, "blah") + clock = Param.Clock("1GHz", "bus clock speed") + width = Param.Int(64, "bus width (bytes)") diff --git a/src/python/m5/objects/MemTest.py b/src/python/m5/objects/MemTest.py index 97600768f..18aff03f4 100644 --- a/src/python/m5/objects/MemTest.py +++ b/src/python/m5/objects/MemTest.py @@ -1,13 +1,12 @@ from m5.SimObject import SimObject from m5.params import * +from m5.proxy import * +from m5 import build_env + class MemTest(SimObject): type = 'MemTest' - cache = Param.BaseCache("L1 cache") - check_mem = Param.FunctionalMemory("check memory") - main_mem = Param.FunctionalMemory("hierarchical memory") max_loads = Param.Counter("number of loads to execute") memory_size = Param.Int(65536, "memory size") - percent_copies = Param.Percent(0, "target copy percentage") percent_dest_unaligned = Param.Percent(50, "percent of copy dest address that are unaligned") percent_reads = Param.Percent(65, "target read percentage") @@ -18,3 +17,6 @@ class MemTest(SimObject): progress_interval = Param.Counter(1000000, "progress report interval (in accesses)") trace_addr = Param.Addr(0, "address to trace") + + test = Port("Port to the memory system to test") + functional = Port("Port to the functional memory used for verification") diff --git a/src/python/m5/objects/PhysicalMemory.py b/src/python/m5/objects/PhysicalMemory.py index dd3ffd651..4e097543d 100644 --- a/src/python/m5/objects/PhysicalMemory.py +++ b/src/python/m5/objects/PhysicalMemory.py @@ -5,6 +5,7 @@ from MemObject import * class PhysicalMemory(MemObject): type = 'PhysicalMemory' port = Port("the access port") + functional = Port("Functional Access Port") range = Param.AddrRange(AddrRange('128MB'), "Device Address") file = Param.String('', "memory mapped file") latency = Param.Latency(Parent.clock, "latency of an access") diff --git a/src/python/m5/params.py b/src/python/m5/params.py index cbbd23004..93d784181 100644 --- a/src/python/m5/params.py +++ b/src/python/m5/params.py @@ -804,7 +804,7 @@ class PortRef(object): newRef.simobj = simobj assert(isSimObject(newRef.simobj)) if self.peer and not proxy.isproxy(self.peer): - peerObj = memo[self.peer.simobj] + peerObj = self.peer.simobj(_memo=memo) newRef.peer = self.peer.clone(peerObj, memo) assert(not isinstance(newRef.peer, VectorPortRef)) return newRef diff --git a/src/python/m5/proxy.py b/src/python/m5/proxy.py index 7ebc0ae19..e539f14ee 100644 --- a/src/python/m5/proxy.py +++ b/src/python/m5/proxy.py @@ -33,6 +33,8 @@ # ##################################################################### +import copy + class BaseProxy(object): def __init__(self, search_self, search_up): self._search_self = search_self @@ -129,15 +131,22 @@ class AttrProxy(BaseProxy): return super(AttrProxy, self).__getattr__(self, attr) if hasattr(self, '_pdesc'): raise AttributeError, "Attribute reference on bound proxy" - self._modifiers.append(attr) - return self + # Return a copy of self rather than modifying self in place + # since self could be an indirect reference via a variable or + # parameter + new_self = copy.deepcopy(self) + new_self._modifiers.append(attr) + return new_self # support indexing on proxies (e.g., Self.cpu[0]) def __getitem__(self, key): if not isinstance(key, int): raise TypeError, "Proxy object requires integer index" - self._modifiers.append(key) - return self + if hasattr(self, '_pdesc'): + raise AttributeError, "Index operation on bound proxy" + new_self = copy.deepcopy(self) + new_self._modifiers.append(key) + return new_self def find(self, obj): try: |