diff options
author | Gabe Black <gblack@eecs.umich.edu> | 2006-10-12 10:58:45 -0400 |
---|---|---|
committer | Gabe Black <gblack@eecs.umich.edu> | 2006-10-12 10:58:45 -0400 |
commit | 866cfaf9dc596d8547e14bc2133fb962776572a7 (patch) | |
tree | 19b82a8021533e8bc2e35f14fb0b6a0440756814 /src/cpu | |
parent | 6a31898a88a9ecced399ccf50636831c21d4a75e (diff) | |
parent | 78aec04b660544ea7af80d76912b4422c4426602 (diff) | |
download | gem5-866cfaf9dc596d8547e14bc2133fb962776572a7.tar.xz |
Merge zizzer.eecs.umich.edu:/bk/newmem
into zeep.eecs.umich.edu:/home/gblack/m5/newmem
--HG--
extra : convert_revision : 30b2475ba034550376455e1bc0e52e19a200fd5a
Diffstat (limited to 'src/cpu')
34 files changed, 657 insertions, 360 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 2bb9a2399..5771a7904 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -158,6 +158,7 @@ if 'O3CPU' in env['CPU_MODELS']: o3/scoreboard.cc o3/store_set.cc ''') + sources += Split('memtest/memtest.cc') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') else: diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 513dd7c55..ea4b03bf2 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,6 +41,7 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" +#include "sim/sim_exit.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" @@ -125,8 +126,9 @@ BaseCPU::BaseCPU(Params *p) // if (p->max_insts_any_thread != 0) for (int i = 0; i < number_of_threads; ++i) - new SimLoopExitEvent(comInstEventQueue[i], p->max_insts_any_thread, - "a thread reached the max instruction count"); + schedExitSimLoop("a thread reached the max instruction count", + p->max_insts_any_thread, 0, + comInstEventQueue[i]); if (p->max_insts_all_threads != 0) { // allocate & initialize shared downcounter: each event will @@ -150,8 +152,9 @@ BaseCPU::BaseCPU(Params *p) // if (p->max_loads_any_thread != 0) for (int i = 0; i < number_of_threads; ++i) - new SimLoopExitEvent(comLoadEventQueue[i], p->max_loads_any_thread, - "a thread reached the max load count"); + schedExitSimLoop("a thread reached the max load count", + p->max_loads_any_thread, 0, + comLoadEventQueue[i]); if (p->max_loads_all_threads != 0) { // allocate & initialize shared downcounter: each event will diff --git a/src/cpu/base.hh b/src/cpu/base.hh index e02527371..75e0d86af 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -140,8 +140,8 @@ class BaseCPU : public MemObject bool functionTrace; Tick functionTraceStart; System *system; -#if FULL_SYSTEM int cpu_id; +#if FULL_SYSTEM Tick profile; #endif Tick progress_interval; diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index f2109e88d..d6cdff5c5 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -193,7 +193,7 @@ BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags) // note this is a local, not BaseDynInst::fault Fault trans_fault = cpu->translateDataReadReq(req); - if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) { + if (trans_fault == NoFault && !(req->isUncacheable())) { // It's a valid address to cacheable space. Record key MemReq // parameters so we can generate another one just like it for // the timing access without calling translate() again (which diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 1540a6b94..f6d56eef6 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -175,7 +175,7 @@ CheckerCPU::read(Addr addr, T &data, unsigned flags) pkt->dataStatic(&data); - if (!(memReq->getFlags() & UNCACHEABLE)) { + if (!(memReq->isUncacheable())) { // Access memory to see if we have the same data dcachePort->sendFunctional(pkt); } else { @@ -251,9 +251,9 @@ CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) // This is because the LSQ would have to be snooped in the CPU to // verify this data. if (unverifiedReq && - !(unverifiedReq->getFlags() & UNCACHEABLE) && - (!(unverifiedReq->getFlags() & LOCKED) || - ((unverifiedReq->getFlags() & LOCKED) && + !(unverifiedReq->isUncacheable()) && + (!(unverifiedReq->isLocked()) || + ((unverifiedReq->isLocked()) && unverifiedReq->getScResult() == 1))) { T inst_data; /* diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index 8c0186dae..b2806d40b 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -133,7 +133,7 @@ class CheckerThreadContext : public ThreadContext void takeOverFrom(ThreadContext *oldContext) { actualTC->takeOverFrom(oldContext); - checkerTC->takeOverFrom(oldContext); + checkerTC->copyState(oldContext); } void regStats(const std::string &name) { actualTC->regStats(name); } diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 7ea9eaefc..f42f0f8e2 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -38,86 +38,158 @@ #include "base/misc.hh" #include "base/statistics.hh" -#include "cpu/simple_thread.hh" +//#include "cpu/simple_thread.hh" #include "cpu/memtest/memtest.hh" -#include "mem/cache/base_cache.hh" +//#include "mem/cache/base_cache.hh" +//#include "mem/physical.hh" #include "sim/builder.hh" #include "sim/sim_events.hh" #include "sim/stats.hh" +#include "mem/packet.hh" +#include "mem/request.hh" +#include "mem/port.hh" +#include "mem/mem_object.hh" using namespace std; -using namespace TheISA; int TESTER_ALLOCATOR=0; +bool +MemTest::CpuPort::recvTiming(Packet *pkt) +{ + memtest->completeRequest(pkt); + return true; +} + +Tick +MemTest::CpuPort::recvAtomic(Packet *pkt) +{ + panic("MemTest doesn't expect recvAtomic callback!"); + return curTick; +} + +void +MemTest::CpuPort::recvFunctional(Packet *pkt) +{ + //Do nothing if we see one come through + if (curTick != 0)//Supress warning durring initialization + warn("Functional Writes not implemented in MemTester\n"); + //Need to find any response values that intersect and update + return; +} + +void +MemTest::CpuPort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("MemTest doesn't expect recvStatusChange callback!"); +} + +void +MemTest::CpuPort::recvRetry() +{ + memtest->doRetry(); +} + +void +MemTest::sendPkt(Packet *pkt) { + if (atomic) { + cachePort.sendAtomic(pkt); + pkt->makeAtomicResponse(); + completeRequest(pkt); + } + else if (!cachePort.sendTiming(pkt)) { + accessRetry = true; + retryPkt = pkt; + } + +} + MemTest::MemTest(const string &name, - MemInterface *_cache_interface, - FunctionalMemory *main_mem, - FunctionalMemory *check_mem, +// MemInterface *_cache_interface, +// PhysicalMemory *main_mem, +// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, - unsigned _percentCopies, +// unsigned _percentCopies, unsigned _percentUncacheable, unsigned _progressInterval, unsigned _percentSourceUnaligned, unsigned _percentDestUnaligned, Addr _traceAddr, - Counter _max_loads) - : SimObject(name), + Counter _max_loads, + bool _atomic) + : MemObject(name), tickEvent(this), - cacheInterface(_cache_interface), - mainMem(main_mem), - checkMem(check_mem), + cachePort("test", this), + funcPort("functional", this), + retryPkt(NULL), +// mainMem(main_mem), +// checkMem(check_mem), size(_memorySize), percentReads(_percentReads), - percentCopies(_percentCopies), +// percentCopies(_percentCopies), percentUncacheable(_percentUncacheable), progressInterval(_progressInterval), nextProgressMessage(_progressInterval), percentSourceUnaligned(_percentSourceUnaligned), percentDestUnaligned(percentDestUnaligned), - maxLoads(_max_loads) + maxLoads(_max_loads), + atomic(_atomic) { vector<string> cmd; cmd.push_back("/bin/ls"); vector<string> null_vec; - thread = new SimpleThread(NULL, 0, mainMem, 0); - - blockSize = cacheInterface->getBlockSize(); - blockAddrMask = blockSize - 1; - traceBlockAddr = blockAddr(_traceAddr); - - //setup data storage with interesting values - uint8_t *data1 = new uint8_t[size]; - uint8_t *data2 = new uint8_t[size]; - uint8_t *data3 = new uint8_t[size]; - memset(data1, 1, size); - memset(data2, 2, size); - memset(data3, 3, size); + // thread = new SimpleThread(NULL, 0, NULL, 0, mainMem); curTick = 0; + // Needs to be masked off once we know the block size. + traceBlockAddr = _traceAddr; baseAddr1 = 0x100000; baseAddr2 = 0x400000; uncacheAddr = 0x800000; - // set up intial memory contents here - mainMem->prot_write(baseAddr1, data1, size); - checkMem->prot_write(baseAddr1, data1, size); - mainMem->prot_write(baseAddr2, data2, size); - checkMem->prot_write(baseAddr2, data2, size); - mainMem->prot_write(uncacheAddr, data3, size); - checkMem->prot_write(uncacheAddr, data3, size); - - delete [] data1; - delete [] data2; - delete [] data3; - // set up counters noResponseCycles = 0; numReads = 0; tickEvent.schedule(0); id = TESTER_ALLOCATOR++; + if (TESTER_ALLOCATOR > 8) + panic("False sharing memtester only allows up to 8 testers"); + + accessRetry = false; +} + +Port * +MemTest::getPort(const std::string &if_name, int idx) +{ + if (if_name == "functional") + return &funcPort; + else if (if_name == "test") + return &cachePort; + else + panic("No Such Port\n"); +} + +void +MemTest::init() +{ + // By the time init() is called, the ports should be hooked up. + blockSize = cachePort.peerBlockSize(); + blockAddrMask = blockSize - 1; + traceBlockAddr = blockAddr(traceBlockAddr); + + // set up intial memory contents here + + cachePort.memsetBlob(baseAddr1, 1, size); + funcPort.memsetBlob(baseAddr1, 1, size); + cachePort.memsetBlob(baseAddr2, 2, size); + funcPort.memsetBlob(baseAddr2, 2, size); + cachePort.memsetBlob(uncacheAddr, 3, size); + funcPort.memsetBlob(uncacheAddr, 3, size); } static void @@ -132,23 +204,31 @@ printData(ostream &os, uint8_t *data, int nbytes) } void -MemTest::completeRequest(MemReqPtr &req, uint8_t *data) +MemTest::completeRequest(Packet *pkt) { + MemTestSenderState *state = + dynamic_cast<MemTestSenderState *>(pkt->senderState); + + uint8_t *data = state->data; + uint8_t *pkt_data = pkt->getPtr<uint8_t>(); + Request *req = pkt->req; + //Remove the address from the list of outstanding - std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr); + std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->getPaddr()); assert(removeAddr != outstandingAddrs.end()); outstandingAddrs.erase(removeAddr); - switch (req->cmd) { - case Read: - if (memcmp(req->data, data, req->size) != 0) { - cerr << name() << ": on read of 0x" << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + switch (pkt->cmd) { + case Packet::ReadResp: + + if (memcmp(pkt_data, data, pkt->getSize()) != 0) { + cerr << name() << ": on read of 0x" << hex << req->getPaddr() + << " (0x" << hex << blockAddr(req->getPaddr()) << ")" << "@ cycle " << dec << curTick << ", cache returns 0x"; - printData(cerr, req->data, req->size); + printData(cerr, pkt_data, pkt->getSize()); cerr << ", expected 0x"; - printData(cerr, data, req->size); + printData(cerr, data, pkt->getSize()); cerr << endl; fatal(""); } @@ -163,13 +243,13 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data) } if (numReads >= maxLoads) - SimExit(curTick, "Maximum number of loads reached!"); + exitSimLoop("Maximum number of loads reached!"); break; - case Write: + case Packet::WriteResp: numWritesStat++; break; - +/* case Copy: //Also remove dest from outstanding list removeAddr = outstandingAddrs.find(req->dest); @@ -177,36 +257,37 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data) outstandingAddrs.erase(removeAddr); numCopiesStat++; break; - +*/ default: panic("invalid command"); } - if (blockAddr(req->paddr) == traceBlockAddr) { + if (blockAddr(req->getPaddr()) == traceBlockAddr) { cerr << name() << ": completed " - << (req->cmd.isWrite() ? "write" : "read") + << (pkt->isWrite() ? "write" : "read") << " access of " - << dec << req->size << " bytes at address 0x" - << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + << dec << pkt->getSize() << " bytes at address 0x" + << hex << req->getPaddr() + << " (0x" << hex << blockAddr(req->getPaddr()) << ")" << ", value = 0x"; - printData(cerr, req->data, req->size); + printData(cerr, pkt_data, pkt->getSize()); cerr << " @ cycle " << dec << curTick; cerr << endl; } noResponseCycles = 0; + delete state; delete [] data; + delete pkt->req; + delete pkt; } - void MemTest::regStats() { using namespace Stats; - numReadsStat .name(name() + ".num_reads") .desc("number of read accesses completed") @@ -234,7 +315,7 @@ MemTest::tick() fatal(""); } - if (cacheInterface->isBlocked()) { + if (accessRetry) { return; } @@ -248,30 +329,30 @@ MemTest::tick() //If we aren't doing copies, use id as offset, and do a false sharing //mem tester - if (percentCopies == 0) { - //We can eliminate the lower bits of the offset, and then use the id - //to offset within the blks - offset &= ~63; //Not the low order bits - offset += id; - access_size = 0; - } + //We can eliminate the lower bits of the offset, and then use the id + //to offset within the blks + offset &= ~63; //Not the low order bits + offset += id; + access_size = 0; - MemReqPtr req = new MemReq(); + Request *req = new Request(); + uint32_t flags = 0; + Addr paddr; if (cacheable < percentUncacheable) { - req->flags |= UNCACHEABLE; - req->paddr = uncacheAddr + offset; + flags |= UNCACHEABLE; + paddr = uncacheAddr + offset; } else { - req->paddr = ((base) ? baseAddr1 : baseAddr2) + offset; + paddr = ((base) ? baseAddr1 : baseAddr2) + offset; } - // bool probe = (random() % 2 == 1) && !req->isUncacheable(); + //bool probe = (random() % 2 == 1) && !req->isUncacheable(); bool probe = false; - req->size = 1 << access_size; - req->data = new uint8_t[req->size]; - req->paddr &= ~(req->size - 1); - req->time = curTick; - req->xc = thread->getProxy(); + paddr &= ~((1 << access_size) - 1); + req->setPhys(paddr, 1 << access_size, flags); + req->setThreadContext(id,0); + + uint8_t *result = new uint8_t[8]; if (cmd < percentReads) { // read @@ -279,60 +360,75 @@ MemTest::tick() //For now we only allow one outstanding request per addreess per tester //This means we assume CPU does write forwarding to reads that alias something //in the cpu store buffer. - if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; - else outstandingAddrs.insert(req->paddr); + if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(paddr); + + // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin + funcPort.readBlob(req->getPaddr(), result, req->getSize()); - req->cmd = Read; - uint8_t *result = new uint8_t[8]; - checkMem->access(Read, req->paddr, result, req->size); - if (blockAddr(req->paddr) == traceBlockAddr) { + if (blockAddr(paddr) == traceBlockAddr) { cerr << name() << ": initiating read " << ((probe) ? "probe of " : "access of ") - << dec << req->size << " bytes from addr 0x" - << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + << dec << req->getSize() << " bytes from addr 0x" + << hex << paddr + << " (0x" << hex << blockAddr(paddr) << ")" << " at cycle " << dec << curTick << endl; } + + Packet *pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + pkt->dataDynamicArray(new uint8_t[req->getSize()]); + MemTestSenderState *state = new MemTestSenderState(result); + pkt->senderState = state; + if (probe) { - cacheInterface->probeAndUpdate(req); - completeRequest(req, result); + cachePort.sendFunctional(pkt); + completeRequest(pkt); } else { - req->completionEvent = new MemCompleteEvent(req, result, this); - cacheInterface->access(req); +// req->completionEvent = new MemCompleteEvent(req, result, this); + sendPkt(pkt); } - } else if (cmd < (100 - percentCopies)){ + } else { // write //For now we only allow one outstanding request per addreess per tester //This means we assume CPU does write forwarding to reads that alias something //in the cpu store buffer. - if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return; - else outstandingAddrs.insert(req->paddr); + if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return; + else outstandingAddrs.insert(paddr); - req->cmd = Write; - memcpy(req->data, &data, req->size); - checkMem->access(Write, req->paddr, req->data, req->size); - if (blockAddr(req->paddr) == traceBlockAddr) { +/* + if (blockAddr(req->getPaddr()) == traceBlockAddr) { cerr << name() << ": initiating write " << ((probe)?"probe of ":"access of ") - << dec << req->size << " bytes (value = 0x"; - printData(cerr, req->data, req->size); + << dec << req->getSize() << " bytes (value = 0x"; + printData(cerr, data_pkt->getPtr(), req->getSize()); cerr << ") to addr 0x" - << hex << req->paddr - << " (0x" << hex << blockAddr(req->paddr) << ")" + << hex << req->getPaddr() + << " (0x" << hex << blockAddr(req->getPaddr()) << ")" << " at cycle " << dec << curTick << endl; } +*/ + Packet *pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + uint8_t *pkt_data = new uint8_t[req->getSize()]; + pkt->dataDynamicArray(pkt_data); + memcpy(pkt_data, &data, req->getSize()); + MemTestSenderState *state = new MemTestSenderState(result); + pkt->senderState = state; + + funcPort.writeBlob(req->getPaddr(), pkt_data, req->getSize()); + if (probe) { - cacheInterface->probeAndUpdate(req); - completeRequest(req, NULL); + cachePort.sendFunctional(pkt); + completeRequest(pkt); } else { - req->completionEvent = new MemCompleteEvent(req, NULL, this); - cacheInterface->access(req); +// req->completionEvent = new MemCompleteEvent(req, NULL, this); + sendPkt(pkt); } - } else { + } +/* else { // copy unsigned source_align = random() % 100; unsigned dest_align = random() % 100; @@ -369,56 +465,51 @@ MemTest::tick() << " (0x" << hex << blockAddr(dest) << ")" << " at cycle " << dec << curTick << endl; - } + }* cacheInterface->access(req); uint8_t result[blockSize]; checkMem->access(Read, source, &result, blockSize); checkMem->access(Write, dest, &result, blockSize); } +*/ } - void -MemCompleteEvent::process() -{ - tester->completeRequest(req, data); - delete this; -} - - -const char * -MemCompleteEvent::description() +MemTest::doRetry() { - return "memory access completion"; + if (cachePort.sendTiming(retryPkt)) { + accessRetry = false; + retryPkt = NULL; + } } - BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest) - SimObjectParam<BaseCache *> cache; - SimObjectParam<FunctionalMemory *> main_mem; - SimObjectParam<FunctionalMemory *> check_mem; +// SimObjectParam<BaseCache *> cache; +// SimObjectParam<PhysicalMemory *> main_mem; +// SimObjectParam<PhysicalMemory *> check_mem; Param<unsigned> memory_size; Param<unsigned> percent_reads; - Param<unsigned> percent_copies; +// Param<unsigned> percent_copies; Param<unsigned> percent_uncacheable; Param<unsigned> progress_interval; Param<unsigned> percent_source_unaligned; Param<unsigned> percent_dest_unaligned; Param<Addr> trace_addr; Param<Counter> max_loads; + Param<bool> atomic; END_DECLARE_SIM_OBJECT_PARAMS(MemTest) BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) - INIT_PARAM(cache, "L1 cache"), - INIT_PARAM(main_mem, "hierarchical memory"), - INIT_PARAM(check_mem, "check memory"), +// INIT_PARAM(cache, "L1 cache"), +// INIT_PARAM(main_mem, "hierarchical memory"), +// INIT_PARAM(check_mem, "check memory"), INIT_PARAM(memory_size, "memory size"), INIT_PARAM(percent_reads, "target read percentage"), - INIT_PARAM(percent_copies, "target copy percentage"), +// INIT_PARAM(percent_copies, "target copy percentage"), INIT_PARAM(percent_uncacheable, "target uncacheable percentage"), INIT_PARAM(progress_interval, "progress report interval (in accesses)"), INIT_PARAM(percent_source_unaligned, @@ -426,18 +517,19 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest) INIT_PARAM(percent_dest_unaligned, "percent of copy dest address that are unaligned"), INIT_PARAM(trace_addr, "address to trace"), - INIT_PARAM(max_loads, "terminate when we have reached this load count") + INIT_PARAM(max_loads, "terminate when we have reached this load count"), + INIT_PARAM(atomic, "Is the tester testing atomic mode (or timing)") END_INIT_SIM_OBJECT_PARAMS(MemTest) CREATE_SIM_OBJECT(MemTest) { - return new MemTest(getInstanceName(), cache->getInterface(), main_mem, - check_mem, memory_size, percent_reads, percent_copies, + return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/ + /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/ percent_uncacheable, progress_interval, percent_source_unaligned, percent_dest_unaligned, - trace_addr, max_loads); + trace_addr, max_loads, atomic); } REGISTER_SIM_OBJECT("MemTest", MemTest) diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 42fb235db..5de41f0d8 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -35,31 +35,36 @@ #include <set> #include "base/statistics.hh" -#include "mem/functional/functional.hh" -#include "mem/mem_interface.hh" +//#include "mem/functional/functional.hh" +//#include "mem/mem_interface.hh" #include "sim/eventq.hh" #include "sim/sim_exit.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" -class ThreadContext; -class MemTest : public SimObject +class Packet; +class MemTest : public MemObject { public: MemTest(const std::string &name, - MemInterface *_cache_interface, - FunctionalMemory *main_mem, - FunctionalMemory *check_mem, +// MemInterface *_cache_interface, +// PhysicalMemory *main_mem, +// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, - unsigned _percentCopies, +// unsigned _percentCopies, unsigned _percentUncacheable, unsigned _progressInterval, unsigned _percentSourceUnaligned, unsigned _percentDestUnaligned, Addr _traceAddr, - Counter _max_loads); + Counter _max_loads, + bool _atomic); + + virtual void init(); // register statistics virtual void regStats(); @@ -69,6 +74,8 @@ class MemTest : public SimObject // main simulation loop (one cycle) void tick(); + virtual Port *getPort(const std::string &if_name, int idx = -1); + protected: class TickEvent : public Event { @@ -82,16 +89,62 @@ class MemTest : public SimObject }; TickEvent tickEvent; + class CpuPort : public Port + { + + MemTest *memtest; + + public: + + CpuPort(const std::string &_name, MemTest *_memtest) + : Port(_name), memtest(_memtest) + { } + + protected: + + virtual bool recvTiming(Packet *pkt); - MemInterface *cacheInterface; - FunctionalMemory *mainMem; - FunctionalMemory *checkMem; - SimpleThread *thread; + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void recvRetry(); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } + }; + + CpuPort cachePort; + CpuPort funcPort; + + class MemTestSenderState : public Packet::SenderState + { + public: + /** Constructor. */ + MemTestSenderState(uint8_t *_data) + : data(_data) + { } + + // Hold onto data pointer + uint8_t *data; + }; + +// Request *dataReq; + Packet *retryPkt; +// MemInterface *cacheInterface; +// PhysicalMemory *mainMem; +// PhysicalMemory *checkMem; +// SimpleThread *thread; + + bool accessRetry; unsigned size; // size of testing memory region unsigned percentReads; // target percentage of read accesses - unsigned percentCopies; // target percentage of copy accesses +// unsigned percentCopies; // target percentage of copy accesses unsigned percentUncacheable; int id; @@ -123,34 +176,21 @@ class MemTest : public SimObject uint64_t numReads; uint64_t maxLoads; + + bool atomic; + Stats::Scalar<> numReadsStat; Stats::Scalar<> numWritesStat; Stats::Scalar<> numCopiesStat; // called by MemCompleteEvent::process() - void completeRequest(MemReqPtr &req, uint8_t *data); + void completeRequest(Packet *pkt); - friend class MemCompleteEvent; -}; + void sendPkt(Packet *pkt); + void doRetry(); -class MemCompleteEvent : public Event -{ - MemReqPtr req; - uint8_t *data; - MemTest *tester; - - public: - - MemCompleteEvent(MemReqPtr &_req, uint8_t *_data, MemTest *_tester) - : Event(&mainEventQueue), - req(_req), data(_data), tester(_tester) - { - } - - void process(); - - virtual const char *description(); + friend class MemCompleteEvent; }; #endif // __CPU_MEMTEST_MEMTEST_HH__ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c80e4d8c1..ecf6ed632 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -342,12 +342,6 @@ DefaultCommit<Impl>::drain() { drainPending = true; - // If it's already drained, return true. - if (rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalDrained(); - return true; - } - return false; } @@ -1218,16 +1212,16 @@ DefaultCommit<Impl>::skidInsert() for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) { DynInstPtr inst = fromRename->insts[inst_num]; - int tid = inst->threadNumber; if (!inst->isSquashed()) { DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ", - "skidBuffer.\n", inst->readPC(), inst->seqNum, tid); + "skidBuffer.\n", inst->readPC(), inst->seqNum, + inst->threadNumber); skidBuffer.push(inst); } else { DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " "squashed, skipping.\n", - inst->readPC(), inst->seqNum, tid); + inst->readPC(), inst->seqNum, inst->threadNumber); } } } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7386dfadd..4c9a8e91f 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -88,7 +88,7 @@ FullO3CPU<Impl>::TickEvent::description() template <class Impl> FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent() - : Event(&mainEventQueue, CPU_Tick_Pri) + : Event(&mainEventQueue, CPU_Switch_Pri) { } @@ -135,7 +135,8 @@ void FullO3CPU<Impl>::DeallocateContextEvent::process() { cpu->deactivateThread(tid); - cpu->removeThread(tid); + if (remove) + cpu->removeThread(tid); } template <class Impl> @@ -191,7 +192,11 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) deferRegistration(params->deferRegistration), numThreads(number_of_threads) { - _status = Idle; + if (!deferRegistration) { + _status = Running; + } else { + _status = Idle; + } checker = NULL; @@ -304,6 +309,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params) tid, bindRegs); + + activateThreadEvent[tid].init(tid, this); + deallocateContextEvent[tid].init(tid, this); } rename.setRenameMap(renameMap); @@ -447,13 +455,16 @@ FullO3CPU<Impl>::tick() if (!tickEvent.scheduled()) { if (_status == SwitchedOut || getState() == SimObject::Drained) { + DPRINTF(O3CPU, "Switched out!\n"); // increment stat lastRunningCycle = curTick; - } else if (!activityRec.active()) { + } else if (!activityRec.active() || _status == Idle) { + DPRINTF(O3CPU, "Idle!\n"); lastRunningCycle = curTick; timesIdled++; } else { tickEvent.schedule(curTick + cycles(1)); + DPRINTF(O3CPU, "Scheduling next tick!\n"); } } @@ -512,6 +523,8 @@ FullO3CPU<Impl>::activateThread(unsigned tid) list<unsigned>::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid); + if (isActive == activeThreads.end()) { DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", tid); @@ -528,6 +541,8 @@ FullO3CPU<Impl>::deactivateThread(unsigned tid) list<unsigned>::iterator thread_it = find(activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid); + if (thread_it != activeThreads.end()) { DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); @@ -548,7 +563,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay) activateThread(tid); } - if(lastActivatedCycle < curTick) { + if (lastActivatedCycle < curTick) { scheduleTickEvent(delay); // Be sure to signal that there's some activity so the CPU doesn't @@ -563,17 +578,20 @@ FullO3CPU<Impl>::activateContext(int tid, int delay) } template <class Impl> -void -FullO3CPU<Impl>::deallocateContext(int tid, int delay) +bool +FullO3CPU<Impl>::deallocateContext(int tid, bool remove, int delay) { // Schedule removal of thread data from CPU if (delay){ DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleDeallocateContextEvent(tid, delay); + scheduleDeallocateContextEvent(tid, remove, delay); + return false; } else { deactivateThread(tid); - removeThread(tid); + if (remove) + removeThread(tid); + return true; } } @@ -582,8 +600,9 @@ void FullO3CPU<Impl>::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - deactivateThread(tid); - if (activeThreads.size() == 0) + bool deallocated = deallocateContext(tid, false, 1); + // If this was the last thread then unschedule the tick event. + if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0) unscheduleTickEvent(); _status = Idle; } @@ -594,7 +613,7 @@ FullO3CPU<Impl>::haltContext(int tid) { //For now, this is the same as deallocate DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); - deallocateContext(tid, 1); + deallocateContext(tid, true, 1); } template <class Impl> @@ -682,10 +701,17 @@ FullO3CPU<Impl>::removeThread(unsigned tid) assert(iew.ldstQueue.getCount(tid) == 0); // Reset ROB/IQ/LSQ Entries + + // Commented out for now. This should be possible to do by + // telling all the pipeline stages to drain first, and then + // checking until the drain completes. Once the pipeline is + // drained, call resetEntries(). - 10-09-06 ktlim +/* if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } +*/ } @@ -824,7 +850,9 @@ template <class Impl> void FullO3CPU<Impl>::resume() { +#if FULL_SYSTEM assert(system->getMemoryMode() == System::Timing); +#endif fetch.resume(); decode.resume(); rename.resume(); @@ -935,6 +963,25 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) } if (!tickEvent.scheduled()) tickEvent.schedule(curTick); + + Port *peer; + Port *icachePort = fetch.getIcachePort(); + if (icachePort->getPeer() == NULL) { + peer = oldCPU->getPort("icache_port")->getPeer(); + icachePort->setPeer(peer); + } else { + peer = icachePort->getPeer(); + } + peer->setPeer(icachePort); + + Port *dcachePort = iew.getDcachePort(); + if (dcachePort->getPeer() == NULL) { + peer = oldCPU->getPort("dcache_port")->getPeer(); + dcachePort->setPeer(peer); + } else { + peer = dcachePort->getPeer(); + } + peer->setPeer(dcachePort); } template <class Impl> diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index dcdcd1fe6..fe510519c 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU class DeallocateContextEvent : public Event { private: - /** Number of Thread to Activate */ + /** Number of Thread to deactivate */ int tid; + /** Should the thread be removed from the CPU? */ + bool remove; + /** Pointer to the CPU. */ FullO3CPU<Impl> *cpu; @@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU /** Processes the event, calling activateThread() on the CPU. */ void process(); + /** Sets whether the thread should also be removed from the CPU. */ + void setRemove(bool _remove) { remove = _remove; } + /** Returns the description of the event. */ const char *description(); }; /** Schedule cpu to deallocate thread context.*/ - void scheduleDeallocateContextEvent(int tid, int delay) + void scheduleDeallocateContextEvent(int tid, bool remove, int delay) { // Schedule thread to activate, regardless of its current state. if (deallocateContextEvent[tid].squashed()) @@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU void suspendContext(int tid); /** Remove Thread from Active Threads List && - * Remove Thread Context from CPU. + * Possibly Remove Thread Context from CPU. */ - void deallocateContext(int tid, int delay = 1); + bool deallocateContext(int tid, bool remove, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. @@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU /** Pointers to all of the threads in the CPU. */ std::vector<Thread *> thread; - /** Pointer to the icache interface. */ - MemInterface *icacheInterface; - /** Pointer to the dcache interface. */ - MemInterface *dcacheInterface; - /** Whether or not the CPU should defer its registration. */ bool deferRegistration; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 1a2ca32a4..280bf0e71 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -96,7 +96,7 @@ class DefaultFetch /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles setting fetch to the * proper status to start fetching. */ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 3c47c39fa..072580af7 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -63,7 +63,7 @@ template<class Impl> void DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvFunctional callback!"); + warn("Default fetch doesn't update it's state from a functional call."); } template<class Impl> @@ -599,7 +599,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || - memReq[tid]->flags & UNCACHEABLE) { + memReq[tid]->isUncacheable()) { DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " "misspeculating path)!", memReq[tid]->paddr); @@ -623,6 +623,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Now do the timing access to see whether or not the instruction // exists within the cache. if (!icachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + fault = TheISA::genMachineCheckFault(); + delete mem_req; + memReq[tid] = NULL; + } assert(retryPkt == NULL); assert(retryTid == -1); DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b2baae296..ba5260fe2 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -600,6 +600,11 @@ template<class Impl> void DefaultIEW<Impl>::instToCommit(DynInstPtr &inst) { + // This function should not be called after writebackInsts in a + // single cycle. That will cause problems with an instruction + // being added to the queue to commit without being processed by + // writebackInsts prior to being sent to commit. + // First check the time slot that this instruction will write // to. If there are free write ports at the time, then go ahead // and write the instruction to that time. If there are not, @@ -1286,6 +1291,7 @@ DefaultIEW<Impl>::executeInsts() } else if (fault != NoFault) { // If the instruction faulted, then we need to send it along to commit // without the instruction completing. + DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum); // Send this instruction to commit, also make sure iew stage // realizes there is activity. diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 190734dc2..6b12d75b4 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -311,7 +311,7 @@ class LSQ { /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles writing back and * completing the load or store that has returned from diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 2bbab71f0..7b7d1eb8e 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -46,7 +46,7 @@ template <class Impl> void LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) { - panic("O3CPU doesn't expect recvFunctional callback!"); + warn("O3CPU doesn't update things on a recvFunctional."); } template <class Impl> diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 90d1a3d53..11a02e7c7 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -492,7 +492,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) // A bit of a hackish way to get uncached accesses to work only if they're // at the head of the LSQ and are ready to commit (at the head of the ROB // too). - if (req->getFlags() & UNCACHEABLE && + if (req->isUncacheable() && (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; @@ -509,7 +509,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) load_idx, store_idx, storeHead, req->getPaddr()); #if FULL_SYSTEM - if (req->getFlags() & LOCKED) { + if (req->isLocked()) { cpu->lockAddr = req->getPaddr(); cpu->lockFlag = true; } @@ -626,18 +626,30 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) ++usedPorts; - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(load_inst->memData); - - LSQSenderState *state = new LSQSenderState; - state->isLoad = true; - state->idx = load_idx; - state->inst = load_inst; - data_pkt->senderState = state; - // if we the cache is not blocked, do cache access if (!lsq->cacheBlocked()) { + PacketPtr data_pkt = + new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = load_inst; + data_pkt->senderState = state; + if (!dcachePort->sendTiming(data_pkt)) { + Packet::Result result = data_pkt->result; + + // Delete state and data packet because a load retry + // initiates a pipeline restart; it does not retry. + delete state; + delete data_pkt; + + if (result == Packet::BadAddress) { + return TheISA::genMachineCheckFault(); + } + // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); @@ -664,16 +676,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx) return NoFault; } - if (data_pkt->result != Packet::Success) { - DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - load_inst->seqNum); - } else { - DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); - DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", - load_inst->seqNum); - } - return NoFault; } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 98bea74fb..3f9db912f 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -416,7 +416,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst) // realizes there is activity. // Mark it as executed unless it is an uncached load that // needs to hit the head of commit. - if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) { + if (!(inst->req->isUncacheable()) || inst->isAtCommit()) { inst->setExecuted(); } iewStage->instToCommit(inst); @@ -608,21 +608,30 @@ LSQUnit<Impl>::writebackStores() DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), + storeWBIdx, inst->readPC(), req->getPaddr(), *(inst->memData), - storeQueue[storeWBIdx].inst->seqNum); + inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->getFlags() & LOCKED) { - if (req->getFlags() & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->setScResult(2); } else { if (cpu->lockFlag) { req->setScResult(1); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.", + inst->seqNum); } else { req->setScResult(0); // Hack: Instantly complete this store. - completeDataAccess(data_pkt); +// completeDataAccess(data_pkt); + DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. " + "Instantly completing it.\n", + inst->seqNum); + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); + wb->schedule(curTick + 1); + delete state; + completeStore(storeWBIdx); incrStIdx(storeWBIdx); continue; } @@ -633,7 +642,13 @@ LSQUnit<Impl>::writebackStores() } if (!dcachePort->sendTiming(data_pkt)) { + if (data_pkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } // Need to handle becoming blocked on a store. + DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will" + "retry later\n", + inst->seqNum); isStoreBlocked = true; ++lsqCacheBlocked; assert(retryPkt == NULL); @@ -880,6 +895,9 @@ LSQUnit<Impl>::recvRetry() assert(retryPkt != NULL); if (dcachePort->sendTiming(retryPkt)) { + if (retryPkt->result == Packet::BadAddress) { + panic("LSQ sent out a bad address for a completed store!"); + } storePostSend(retryPkt); retryPkt = NULL; isStoreBlocked = false; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index 25e1db21c..2bc194d53 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -165,14 +165,14 @@ template <class Impl> void O3ThreadContext<Impl>::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", - getThreadNum()); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n", + getThreadNum(), delay); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid(), delay); + cpu->deallocateContext(thread->readTid(), true, delay); } template <class Impl> diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh index 9bab6a964..8debd277d 100644 --- a/src/cpu/ozone/back_end.hh +++ b/src/cpu/ozone/back_end.hh @@ -493,7 +493,7 @@ BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx) } */ /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Read"); */ return LSQ.read(req, data, load_idx); @@ -534,7 +534,7 @@ BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx) *res = memReq->result; */ /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Write"); */ return LSQ.write(req, data, store_idx); diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh index ac3218c02..4078699fe 100644 --- a/src/cpu/ozone/back_end_impl.hh +++ b/src/cpu/ozone/back_end_impl.hh @@ -1256,7 +1256,7 @@ BackEnd<Impl>::executeInsts() // ++iewExecStoreInsts; - if (!(inst->req->flags & LOCKED)) { + if (!(inst->req->isLocked())) { inst->setExecuted(); instToCommit(inst); diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index 8c5be9424..70ec1d101 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -455,12 +455,12 @@ class OzoneCPU : public BaseCPU { #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } #endif - if (req->flags & LOCKED) { + if (req->isLocked()) { lockAddrList.insert(req->paddr); lockFlag = true; } @@ -489,10 +489,10 @@ class OzoneCPU : public BaseCPU ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] @@ -532,8 +532,8 @@ class OzoneCPU : public BaseCPU #endif - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->result = 2; } else { if (this->lockFlag) { diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index 5ffd3666e..59cf9785c 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -92,7 +92,7 @@ class FrontEnd /** Returns the address ranges of this device. */ virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } /** Timing version of receive. Handles setting fetch to the * proper status to start fetching. */ diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 9a00aefbf..9eff8619d 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -59,7 +59,7 @@ template<class Impl> void FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt) { - panic("FrontEnd doesn't expect recvFunctional callback!"); + warn("FrontEnd doesn't update state from functional calls"); } template<class Impl> @@ -493,7 +493,7 @@ FrontEnd<Impl>::fetchCacheLine() if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq->paddr) || - memReq->flags & UNCACHEABLE) { + memReq->isUncacheable()) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq->paddr); diff --git a/src/cpu/ozone/inorder_back_end.hh b/src/cpu/ozone/inorder_back_end.hh index ffdba2f6c..76eef6fad 100644 --- a/src/cpu/ozone/inorder_back_end.hh +++ b/src/cpu/ozone/inorder_back_end.hh @@ -231,7 +231,7 @@ InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags) } } /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Read"); */ return fault; @@ -243,7 +243,7 @@ Fault InorderBackEnd<Impl>::read(MemReqPtr &req, T &data) { #if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } @@ -291,7 +291,7 @@ InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) if (res && (fault == NoFault)) *res = memReq->result; /* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + if (!dcacheInterface && (memReq->isUncacheable())) recordEvent("Uncached Write"); */ return fault; @@ -306,10 +306,10 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data) ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] @@ -391,7 +391,7 @@ InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx) } /* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) + if (!dcacheInterface && (req->isUncacheable())) recordEvent("Uncached Read"); */ return NoFault; @@ -455,8 +455,8 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) } } /* - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; } else { @@ -469,7 +469,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) *res = req->result; */ /* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) + if (!dcacheInterface && (req->isUncacheable())) recordEvent("Uncached Write"); */ return NoFault; diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh index 38c1c09a2..056c79521 100644 --- a/src/cpu/ozone/lsq_unit.hh +++ b/src/cpu/ozone/lsq_unit.hh @@ -426,7 +426,7 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). // @todo: Fix uncached accesses. - if (req->flags & UNCACHEABLE && + if (req->isUncacheable() && (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { return TheISA::genMachineCheckFault(); diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh index ee0804036..c46eb90be 100644 --- a/src/cpu/ozone/lsq_unit_impl.hh +++ b/src/cpu/ozone/lsq_unit_impl.hh @@ -577,7 +577,7 @@ OzoneLSQ<Impl>::writebackStores() MemAccessResult result = dcacheInterface->access(req); //@todo temp fix for LL/SC (works fine for 1 CPU) - if (req->flags & LOCKED) { + if (req->isLocked()) { req->result=1; panic("LL/SC! oh no no support!!!"); } @@ -596,7 +596,7 @@ OzoneLSQ<Impl>::writebackStores() Event *wb = NULL; /* typename IEW::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port transaction. req->result=0; wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, @@ -630,7 +630,7 @@ OzoneLSQ<Impl>::writebackStores() // DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", // storeQueue[storeWBIdx].inst->seqNum); - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port transaction. req->result=1; typename BackEnd::LdWritebackEvent *wb = diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index 6640a9f34..9b93ce74f 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -260,7 +260,7 @@ class OzoneLWLSQ { virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1); } virtual bool recvTiming(PacketPtr pkt); @@ -507,7 +507,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). // @todo: Fix uncached accesses. - if (req->getFlags() & UNCACHEABLE && + if (req->isUncacheable() && (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", @@ -659,7 +659,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx) return NoFault; } - if (req->getFlags() & LOCKED) { + if (req->isLocked()) { cpu->lockFlag = true; } diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 4c96ad149..e523712da 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -72,7 +72,7 @@ template <class Impl> void OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt) { - panic("O3CPU doesn't expect recvFunctional callback!"); + warn("O3CPU doesn't update things on a recvFunctional"); } template <class Impl> @@ -394,7 +394,7 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst) // Actually probably want the oldest faulting load if (load_fault != NoFault) { DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); - if (!(inst->req->getFlags() & UNCACHEABLE && !inst->isAtCommit())) { + if (!(inst->req->isUncacheable() && !inst->isAtCommit())) { inst->setExecuted(); } // Maybe just set it as can commit here, although that might cause @@ -605,8 +605,8 @@ OzoneLWLSQ<Impl>::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->getFlags() & LOCKED) { - if (req->getFlags() & UNCACHEABLE) { + if (req->isLocked()) { + if (req->isUncacheable()) { req->setScResult(2); } else { if (cpu->lockFlag) { @@ -663,7 +663,7 @@ OzoneLWLSQ<Impl>::writebackStores() if (result != MA_HIT && dcacheInterface->doEvents()) { store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { + if (req->isLocked()) { wb = new typename BackEnd::LdWritebackEvent(inst, be); store_event->wbEvent = wb; @@ -690,7 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores() // DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", // inst->seqNum); - if (req->flags & LOCKED) { + if (req->isLocked()) { // Stx_C does not generate a system port // transaction in the 21264, but that might be // hard to accomplish in this model. diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 7ba1b7df1..490be20ae 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -28,6 +28,7 @@ * Authors: Steve Reinhardt */ +#include "arch/locked_mem.hh" #include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/simple/atomic.hh" @@ -93,7 +94,7 @@ AtomicSimpleCPU::init() bool AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt) { - panic("AtomicSimpleCPU doesn't expect recvAtomic callback!"); + panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); return true; } @@ -107,7 +108,8 @@ AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt) void AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt) { - panic("AtomicSimpleCPU doesn't expect recvFunctional callback!"); + //No internal storage to update, just return + return; } void @@ -133,20 +135,19 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) { _status = Idle; - // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); - ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); - data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); - data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } @@ -161,9 +162,11 @@ AtomicSimpleCPU::serialize(ostream &os) { SimObject::State so_state = SimObject::getState(); SERIALIZE_ENUM(so_state); + Status _status = status(); + SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); - BaseSimpleCPU::serialize(os); } void @@ -171,8 +174,9 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { SimObject::State so_state; UNSERIALIZE_ENUM(so_state); - tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + UNSERIALIZE_ENUM(_status); BaseSimpleCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } void @@ -253,29 +257,36 @@ template <class T> Fault AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) { - data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + // use the CPU's statically allocated read request and packet objects + Request *req = data_read_req; + Packet *pkt = data_read_pkt; + + req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { traceData->setAddr(addr); } // translate to physical address - Fault fault = thread->translateDataReadReq(data_read_req); + Fault fault = thread->translateDataReadReq(req); // Now do the access. if (fault == NoFault) { - data_read_pkt->reinitFromRequest(); + pkt->reinitFromRequest(); - dcache_latency = dcachePort.sendAtomic(data_read_pkt); + dcache_latency = dcachePort.sendAtomic(pkt); dcache_access = true; - assert(data_read_pkt->result == Packet::Success); - data = data_read_pkt->get<T>(); + assert(pkt->result == Packet::Success); + data = pkt->get<T>(); + if (req->isLocked()) { + TheISA::handleLockedRead(thread, req); + } } // This will need a new way to tell if it has a dcache attached. - if (data_read_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Read"); return fault; @@ -328,33 +339,52 @@ template <class T> Fault AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { - data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + // use the CPU's statically allocated write request and packet objects + Request *req = data_write_req; + Packet *pkt = data_write_pkt; + + req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { traceData->setAddr(addr); } // translate to physical address - Fault fault = thread->translateDataWriteReq(data_write_req); + Fault fault = thread->translateDataWriteReq(req); // Now do the access. if (fault == NoFault) { - data = htog(data); - data_write_pkt->reinitFromRequest(); - data_write_pkt->dataStatic(&data); + bool do_access = true; // flag to suppress cache access - dcache_latency = dcachePort.sendAtomic(data_write_pkt); - dcache_access = true; + if (req->isLocked()) { + do_access = TheISA::handleLockedWrite(thread, req); + } + + if (do_access) { + data = htog(data); + pkt->reinitFromRequest(); + pkt->dataStatic(&data); - assert(data_write_pkt->result == Packet::Success); + dcache_latency = dcachePort.sendAtomic(pkt); + dcache_access = true; - if (res && data_write_req->getFlags() & LOCKED) { - *res = data_write_req->getScResult(); + assert(pkt->result == Packet::Success); + } + + if (req->isLocked()) { + uint64_t scResult = req->getScResult(); + if (scResult != 0) { + // clear failure counter + thread->setStCondFailures(0); + } + if (res) { + *res = req->getScResult(); + } } } // This will need a new way to tell if it's hooked up to a cache or not. - if (data_write_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -467,11 +497,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param<Tick> progress_interval; SimObjectParam<MemObject *> mem; SimObjectParam<System *> system; + Param<int> cpu_id; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - Param<int> cpu_id; Param<Tick> profile; #else SimObjectParam<Process *> workload; @@ -500,11 +530,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(progress_interval, "Progress interval"), INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "processes to run"), @@ -538,11 +568,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->simulate_stalls = simulate_stalls; params->mem = mem; params->system = system; + params->cpu_id = cpu_id; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->cpu_id = cpu_id; params->profile = profile; #else params->process = workload; diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index b602af558..52afd76ef 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -104,9 +104,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - }; + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } + }; CpuPort icachePort; CpuPort dcachePort; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 03ee27e04..33f673cbc 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -28,6 +28,7 @@ * Authors: Steve Reinhardt */ +#include "arch/locked_mem.hh" #include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/simple/timing.hh" @@ -73,7 +74,8 @@ TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt) void TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt) { - panic("TimingSimpleCPU doesn't expect recvFunctional callback!"); + //No internal storage to update, jusst return + return; } void @@ -94,12 +96,14 @@ TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t) } TimingSimpleCPU::TimingSimpleCPU(Params *p) - : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock) + : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock), + cpu_id(p->cpu_id) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; + previousTick = 0; changeState(SimObject::Running); } @@ -158,6 +162,7 @@ TimingSimpleCPU::resume() assert(system->getMemoryMode() == System::Timing); changeState(SimObject::Running); + previousTick = curTick; } void @@ -165,6 +170,7 @@ TimingSimpleCPU::switchOut() { assert(status() == Running || status() == Idle); _status = SwitchedOut; + numCycles += curTick - previousTick; // If we've been scheduled to resume but are then told to switch out, // we'll need to cancel it. @@ -187,6 +193,27 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) break; } } + + if (_status != Running) { + _status = Idle; + } + + Port *peer; + if (icachePort.getPeer() == NULL) { + peer = oldCPU->getPort("icache_port")->getPeer(); + icachePort.setPeer(peer); + } else { + peer = icachePort.getPeer(); + } + peer->setPeer(&icachePort); + + if (dcachePort.getPeer() == NULL) { + peer = oldCPU->getPort("dcache_port")->getPeer(); + dcachePort.setPeer(peer); + } else { + peer = dcachePort.getPeer(); + } + peer->setPeer(&dcachePort); } @@ -227,35 +254,35 @@ template <class T> Fault TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { - // need to fill in CPU & thread IDs here - Request *data_read_req = new Request(); - data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE - data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + Request *req = + new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), + cpu_id, /* thread ID */ 0); if (traceData) { - traceData->setAddr(data_read_req->getVaddr()); + traceData->setAddr(req->getVaddr()); } // translate to physical address - Fault fault = thread->translateDataReadReq(data_read_req); + Fault fault = thread->translateDataReadReq(req); // Now do the access. if (fault == NoFault) { - Packet *data_read_pkt = - new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); - data_read_pkt->dataDynamic<T>(new T); + Packet *pkt = + new Packet(req, Packet::ReadReq, Packet::Broadcast); + pkt->dataDynamic<T>(new T); - if (!dcachePort.sendTiming(data_read_pkt)) { + if (!dcachePort.sendTiming(pkt)) { _status = DcacheRetry; - dcache_pkt = data_read_pkt; + dcache_pkt = pkt; } else { _status = DcacheWaitResponse; + // memory system takes ownership of packet dcache_pkt = NULL; } } // This will need a new way to tell if it has a dcache attached. - if (data_read_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Read"); return fault; @@ -308,31 +335,39 @@ template <class T> Fault TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { - // need to fill in CPU & thread IDs here - Request *data_write_req = new Request(); - data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE - data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + Request *req = + new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(), + cpu_id, /* thread ID */ 0); // translate to physical address - Fault fault = thread->translateDataWriteReq(data_write_req); + Fault fault = thread->translateDataWriteReq(req); + // Now do the access. if (fault == NoFault) { - Packet *data_write_pkt = - new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); - data_write_pkt->allocate(); - data_write_pkt->set(data); + assert(dcache_pkt == NULL); + dcache_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + dcache_pkt->allocate(); + dcache_pkt->set(data); - if (!dcachePort.sendTiming(data_write_pkt)) { - _status = DcacheRetry; - dcache_pkt = data_write_pkt; - } else { - _status = DcacheWaitResponse; - dcache_pkt = NULL; + bool do_access = true; // flag to suppress cache access + + if (req->isLocked()) { + do_access = TheISA::handleLockedWrite(thread, req); + } + + if (do_access) { + if (!dcachePort.sendTiming(dcache_pkt)) { + _status = DcacheRetry; + } else { + _status = DcacheWaitResponse; + // memory system takes ownership of packet + dcache_pkt = NULL; + } } } // This will need a new way to tell if it's hooked up to a cache or not. - if (data_write_req->getFlags() & UNCACHEABLE) + if (req->isUncacheable()) recordEvent("Uncached Write"); // If the write needs to have a fault on the access, consider calling @@ -392,9 +427,8 @@ TimingSimpleCPU::fetch() { checkForInterrupts(); - // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); - ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE + ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0); Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); @@ -414,6 +448,9 @@ TimingSimpleCPU::fetch() // fetch fault: advance directly to next instruction (fault handler) advanceInst(fault); } + + numCycles += curTick - previousTick; + previousTick = curTick; } @@ -444,6 +481,9 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) delete pkt->req; delete pkt; + numCycles += curTick - previousTick; + previousTick = curTick; + if (getState() == SimObject::Draining) { completeDrain(); return; @@ -453,12 +493,20 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) { // load or store: just send to dcache Fault fault = curStaticInst->initiateAcc(this, traceData); - if (fault == NoFault) { - // successfully initiated access: instruction will - // complete in dcache response callback - assert(_status == DcacheWaitResponse); + if (_status != Running) { + // instruction will complete in dcache response callback + assert(_status == DcacheWaitResponse || _status == DcacheRetry); + assert(fault == NoFault); } else { - // fault: complete now to invoke fault handler + if (fault == NoFault) { + // early fail on store conditional: complete now + assert(dcache_pkt != NULL); + fault = curStaticInst->completeAcc(dcache_pkt, this, + traceData); + delete dcache_pkt->req; + delete dcache_pkt; + dcache_pkt = NULL; + } postExecute(); advanceInst(fault); } @@ -479,8 +527,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process() bool TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) { - // These next few lines could be replaced with something faster - // who knows what though + // delay processing of returned data until next CPU clock edge Tick time = pkt->req->getTime(); while (time < curTick) time += lat; @@ -516,21 +563,27 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; - if (getState() == SimObject::Draining) { - completeDrain(); + numCycles += curTick - previousTick; + previousTick = curTick; - delete pkt->req; - delete pkt; + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); - return; + if (pkt->isRead() && pkt->req->isLocked()) { + TheISA::handleLockedRead(thread, pkt->req); } - Fault fault = curStaticInst->completeAcc(pkt, this, traceData); - delete pkt->req; delete pkt; postExecute(); + + if (getState() == SimObject::Draining) { + advancePC(fault); + completeDrain(); + + return; + } + advanceInst(fault); } @@ -546,6 +599,7 @@ TimingSimpleCPU::completeDrain() bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) { + // delay processing of returned data until next CPU clock edge Tick time = pkt->req->getTime(); while (time < curTick) time += lat; @@ -574,6 +628,7 @@ TimingSimpleCPU::DcachePort::recvRetry() Packet *tmp = cpu->dcache_pkt; if (sendTiming(tmp)) { cpu->_status = DcacheWaitResponse; + // memory system takes ownership of packet cpu->dcache_pkt = NULL; } } @@ -592,11 +647,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param<Tick> progress_interval; SimObjectParam<MemObject *> mem; SimObjectParam<System *> system; + Param<int> cpu_id; #if FULL_SYSTEM SimObjectParam<AlphaITB *> itb; SimObjectParam<AlphaDTB *> dtb; - Param<int> cpu_id; Param<Tick> profile; #else SimObjectParam<Process *> workload; @@ -625,11 +680,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(progress_interval, "Progress interval"), INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else INIT_PARAM(workload, "processes to run"), @@ -661,11 +716,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTraceStart = function_trace_start; params->mem = mem; params->system = system; + params->cpu_id = cpu_id; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->cpu_id = cpu_id; params->profile = profile; #else params->process = workload; diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index d03fa4bc0..988ddeded 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -92,7 +92,7 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } + { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); } struct TickEvent : public Event { @@ -166,6 +166,9 @@ class TimingSimpleCPU : public BaseSimpleCPU Packet *ifetch_pkt; Packet *dcache_pkt; + int cpu_id; + Tick previousTick; + public: virtual Port *getPort(const std::string &if_name, int idx = -1); diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 242cfd0e1..6fa6500bd 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -237,7 +237,7 @@ class SimpleThread : public ThreadState Fault read(RequestPtr &req, T &data) { #if FULL_SYSTEM && THE_ISA == ALPHA_ISA - if (req->flags & LOCKED) { + if (req->isLocked()) { req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } @@ -256,10 +256,10 @@ class SimpleThread : public ThreadState ExecContext *xc; // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { + if (req->isLocked()) { xc = req->xc; - if (req->flags & UNCACHEABLE) { + if (req->isUncacheable()) { // Don't update result register (see stq_c in isa_desc) req->result = 2; xc->setStCondFailures(0);//Needed? [RGD] |