summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2006-10-12 10:58:45 -0400
committerGabe Black <gblack@eecs.umich.edu>2006-10-12 10:58:45 -0400
commit866cfaf9dc596d8547e14bc2133fb962776572a7 (patch)
tree19b82a8021533e8bc2e35f14fb0b6a0440756814 /src/cpu
parent6a31898a88a9ecced399ccf50636831c21d4a75e (diff)
parent78aec04b660544ea7af80d76912b4422c4426602 (diff)
downloadgem5-866cfaf9dc596d8547e14bc2133fb962776572a7.tar.xz
Merge zizzer.eecs.umich.edu:/bk/newmem
into zeep.eecs.umich.edu:/home/gblack/m5/newmem --HG-- extra : convert_revision : 30b2475ba034550376455e1bc0e52e19a200fd5a
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/SConscript1
-rw-r--r--src/cpu/base.cc11
-rw-r--r--src/cpu/base.hh2
-rw-r--r--src/cpu/base_dyn_inst_impl.hh2
-rw-r--r--src/cpu/checker/cpu.cc8
-rw-r--r--src/cpu/checker/thread_context.hh2
-rw-r--r--src/cpu/memtest/memtest.cc356
-rw-r--r--src/cpu/memtest/memtest.hh108
-rw-r--r--src/cpu/o3/commit_impl.hh12
-rw-r--r--src/cpu/o3/cpu.cc71
-rw-r--r--src/cpu/o3/cpu.hh19
-rw-r--r--src/cpu/o3/fetch.hh2
-rw-r--r--src/cpu/o3/fetch_impl.hh9
-rw-r--r--src/cpu/o3/iew_impl.hh6
-rw-r--r--src/cpu/o3/lsq.hh2
-rw-r--r--src/cpu/o3/lsq_impl.hh2
-rw-r--r--src/cpu/o3/lsq_unit.hh44
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh30
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh6
-rw-r--r--src/cpu/ozone/back_end.hh4
-rw-r--r--src/cpu/ozone/back_end_impl.hh2
-rw-r--r--src/cpu/ozone/cpu.hh12
-rw-r--r--src/cpu/ozone/front_end.hh2
-rw-r--r--src/cpu/ozone/front_end_impl.hh4
-rw-r--r--src/cpu/ozone/inorder_back_end.hh18
-rw-r--r--src/cpu/ozone/lsq_unit.hh2
-rw-r--r--src/cpu/ozone/lsq_unit_impl.hh6
-rw-r--r--src/cpu/ozone/lw_lsq.hh6
-rw-r--r--src/cpu/ozone/lw_lsq_impl.hh12
-rw-r--r--src/cpu/simple/atomic.cc88
-rw-r--r--src/cpu/simple/atomic.hh4
-rw-r--r--src/cpu/simple/timing.cc153
-rw-r--r--src/cpu/simple/timing.hh5
-rw-r--r--src/cpu/simple_thread.hh6
34 files changed, 657 insertions, 360 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index 2bb9a2399..5771a7904 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -158,6 +158,7 @@ if 'O3CPU' in env['CPU_MODELS']:
o3/scoreboard.cc
o3/store_set.cc
''')
+ sources += Split('memtest/memtest.cc')
if env['USE_CHECKER']:
sources += Split('o3/checker_builder.cc')
else:
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 513dd7c55..ea4b03bf2 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -41,6 +41,7 @@
#include "cpu/cpuevent.hh"
#include "cpu/thread_context.hh"
#include "cpu/profile.hh"
+#include "sim/sim_exit.hh"
#include "sim/param.hh"
#include "sim/process.hh"
#include "sim/sim_events.hh"
@@ -125,8 +126,9 @@ BaseCPU::BaseCPU(Params *p)
//
if (p->max_insts_any_thread != 0)
for (int i = 0; i < number_of_threads; ++i)
- new SimLoopExitEvent(comInstEventQueue[i], p->max_insts_any_thread,
- "a thread reached the max instruction count");
+ schedExitSimLoop("a thread reached the max instruction count",
+ p->max_insts_any_thread, 0,
+ comInstEventQueue[i]);
if (p->max_insts_all_threads != 0) {
// allocate & initialize shared downcounter: each event will
@@ -150,8 +152,9 @@ BaseCPU::BaseCPU(Params *p)
//
if (p->max_loads_any_thread != 0)
for (int i = 0; i < number_of_threads; ++i)
- new SimLoopExitEvent(comLoadEventQueue[i], p->max_loads_any_thread,
- "a thread reached the max load count");
+ schedExitSimLoop("a thread reached the max load count",
+ p->max_loads_any_thread, 0,
+ comLoadEventQueue[i]);
if (p->max_loads_all_threads != 0) {
// allocate & initialize shared downcounter: each event will
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index e02527371..75e0d86af 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -140,8 +140,8 @@ class BaseCPU : public MemObject
bool functionTrace;
Tick functionTraceStart;
System *system;
-#if FULL_SYSTEM
int cpu_id;
+#if FULL_SYSTEM
Tick profile;
#endif
Tick progress_interval;
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index f2109e88d..d6cdff5c5 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -193,7 +193,7 @@ BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
// note this is a local, not BaseDynInst::fault
Fault trans_fault = cpu->translateDataReadReq(req);
- if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) {
+ if (trans_fault == NoFault && !(req->isUncacheable())) {
// It's a valid address to cacheable space. Record key MemReq
// parameters so we can generate another one just like it for
// the timing access without calling translate() again (which
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index 1540a6b94..f6d56eef6 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -175,7 +175,7 @@ CheckerCPU::read(Addr addr, T &data, unsigned flags)
pkt->dataStatic(&data);
- if (!(memReq->getFlags() & UNCACHEABLE)) {
+ if (!(memReq->isUncacheable())) {
// Access memory to see if we have the same data
dcachePort->sendFunctional(pkt);
} else {
@@ -251,9 +251,9 @@ CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
// This is because the LSQ would have to be snooped in the CPU to
// verify this data.
if (unverifiedReq &&
- !(unverifiedReq->getFlags() & UNCACHEABLE) &&
- (!(unverifiedReq->getFlags() & LOCKED) ||
- ((unverifiedReq->getFlags() & LOCKED) &&
+ !(unverifiedReq->isUncacheable()) &&
+ (!(unverifiedReq->isLocked()) ||
+ ((unverifiedReq->isLocked()) &&
unverifiedReq->getScResult() == 1))) {
T inst_data;
/*
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index 8c0186dae..b2806d40b 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -133,7 +133,7 @@ class CheckerThreadContext : public ThreadContext
void takeOverFrom(ThreadContext *oldContext)
{
actualTC->takeOverFrom(oldContext);
- checkerTC->takeOverFrom(oldContext);
+ checkerTC->copyState(oldContext);
}
void regStats(const std::string &name) { actualTC->regStats(name); }
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 7ea9eaefc..f42f0f8e2 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -38,86 +38,158 @@
#include "base/misc.hh"
#include "base/statistics.hh"
-#include "cpu/simple_thread.hh"
+//#include "cpu/simple_thread.hh"
#include "cpu/memtest/memtest.hh"
-#include "mem/cache/base_cache.hh"
+//#include "mem/cache/base_cache.hh"
+//#include "mem/physical.hh"
#include "sim/builder.hh"
#include "sim/sim_events.hh"
#include "sim/stats.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"
+#include "mem/port.hh"
+#include "mem/mem_object.hh"
using namespace std;
-using namespace TheISA;
int TESTER_ALLOCATOR=0;
+bool
+MemTest::CpuPort::recvTiming(Packet *pkt)
+{
+ memtest->completeRequest(pkt);
+ return true;
+}
+
+Tick
+MemTest::CpuPort::recvAtomic(Packet *pkt)
+{
+ panic("MemTest doesn't expect recvAtomic callback!");
+ return curTick;
+}
+
+void
+MemTest::CpuPort::recvFunctional(Packet *pkt)
+{
+ //Do nothing if we see one come through
+ if (curTick != 0)//Supress warning durring initialization
+ warn("Functional Writes not implemented in MemTester\n");
+ //Need to find any response values that intersect and update
+ return;
+}
+
+void
+MemTest::CpuPort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("MemTest doesn't expect recvStatusChange callback!");
+}
+
+void
+MemTest::CpuPort::recvRetry()
+{
+ memtest->doRetry();
+}
+
+void
+MemTest::sendPkt(Packet *pkt) {
+ if (atomic) {
+ cachePort.sendAtomic(pkt);
+ pkt->makeAtomicResponse();
+ completeRequest(pkt);
+ }
+ else if (!cachePort.sendTiming(pkt)) {
+ accessRetry = true;
+ retryPkt = pkt;
+ }
+
+}
+
MemTest::MemTest(const string &name,
- MemInterface *_cache_interface,
- FunctionalMemory *main_mem,
- FunctionalMemory *check_mem,
+// MemInterface *_cache_interface,
+// PhysicalMemory *main_mem,
+// PhysicalMemory *check_mem,
unsigned _memorySize,
unsigned _percentReads,
- unsigned _percentCopies,
+// unsigned _percentCopies,
unsigned _percentUncacheable,
unsigned _progressInterval,
unsigned _percentSourceUnaligned,
unsigned _percentDestUnaligned,
Addr _traceAddr,
- Counter _max_loads)
- : SimObject(name),
+ Counter _max_loads,
+ bool _atomic)
+ : MemObject(name),
tickEvent(this),
- cacheInterface(_cache_interface),
- mainMem(main_mem),
- checkMem(check_mem),
+ cachePort("test", this),
+ funcPort("functional", this),
+ retryPkt(NULL),
+// mainMem(main_mem),
+// checkMem(check_mem),
size(_memorySize),
percentReads(_percentReads),
- percentCopies(_percentCopies),
+// percentCopies(_percentCopies),
percentUncacheable(_percentUncacheable),
progressInterval(_progressInterval),
nextProgressMessage(_progressInterval),
percentSourceUnaligned(_percentSourceUnaligned),
percentDestUnaligned(percentDestUnaligned),
- maxLoads(_max_loads)
+ maxLoads(_max_loads),
+ atomic(_atomic)
{
vector<string> cmd;
cmd.push_back("/bin/ls");
vector<string> null_vec;
- thread = new SimpleThread(NULL, 0, mainMem, 0);
-
- blockSize = cacheInterface->getBlockSize();
- blockAddrMask = blockSize - 1;
- traceBlockAddr = blockAddr(_traceAddr);
-
- //setup data storage with interesting values
- uint8_t *data1 = new uint8_t[size];
- uint8_t *data2 = new uint8_t[size];
- uint8_t *data3 = new uint8_t[size];
- memset(data1, 1, size);
- memset(data2, 2, size);
- memset(data3, 3, size);
+ // thread = new SimpleThread(NULL, 0, NULL, 0, mainMem);
curTick = 0;
+ // Needs to be masked off once we know the block size.
+ traceBlockAddr = _traceAddr;
baseAddr1 = 0x100000;
baseAddr2 = 0x400000;
uncacheAddr = 0x800000;
- // set up intial memory contents here
- mainMem->prot_write(baseAddr1, data1, size);
- checkMem->prot_write(baseAddr1, data1, size);
- mainMem->prot_write(baseAddr2, data2, size);
- checkMem->prot_write(baseAddr2, data2, size);
- mainMem->prot_write(uncacheAddr, data3, size);
- checkMem->prot_write(uncacheAddr, data3, size);
-
- delete [] data1;
- delete [] data2;
- delete [] data3;
-
// set up counters
noResponseCycles = 0;
numReads = 0;
tickEvent.schedule(0);
id = TESTER_ALLOCATOR++;
+ if (TESTER_ALLOCATOR > 8)
+ panic("False sharing memtester only allows up to 8 testers");
+
+ accessRetry = false;
+}
+
+Port *
+MemTest::getPort(const std::string &if_name, int idx)
+{
+ if (if_name == "functional")
+ return &funcPort;
+ else if (if_name == "test")
+ return &cachePort;
+ else
+ panic("No Such Port\n");
+}
+
+void
+MemTest::init()
+{
+ // By the time init() is called, the ports should be hooked up.
+ blockSize = cachePort.peerBlockSize();
+ blockAddrMask = blockSize - 1;
+ traceBlockAddr = blockAddr(traceBlockAddr);
+
+ // set up intial memory contents here
+
+ cachePort.memsetBlob(baseAddr1, 1, size);
+ funcPort.memsetBlob(baseAddr1, 1, size);
+ cachePort.memsetBlob(baseAddr2, 2, size);
+ funcPort.memsetBlob(baseAddr2, 2, size);
+ cachePort.memsetBlob(uncacheAddr, 3, size);
+ funcPort.memsetBlob(uncacheAddr, 3, size);
}
static void
@@ -132,23 +204,31 @@ printData(ostream &os, uint8_t *data, int nbytes)
}
void
-MemTest::completeRequest(MemReqPtr &req, uint8_t *data)
+MemTest::completeRequest(Packet *pkt)
{
+ MemTestSenderState *state =
+ dynamic_cast<MemTestSenderState *>(pkt->senderState);
+
+ uint8_t *data = state->data;
+ uint8_t *pkt_data = pkt->getPtr<uint8_t>();
+ Request *req = pkt->req;
+
//Remove the address from the list of outstanding
- std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->paddr);
+ std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->getPaddr());
assert(removeAddr != outstandingAddrs.end());
outstandingAddrs.erase(removeAddr);
- switch (req->cmd) {
- case Read:
- if (memcmp(req->data, data, req->size) != 0) {
- cerr << name() << ": on read of 0x" << hex << req->paddr
- << " (0x" << hex << blockAddr(req->paddr) << ")"
+ switch (pkt->cmd) {
+ case Packet::ReadResp:
+
+ if (memcmp(pkt_data, data, pkt->getSize()) != 0) {
+ cerr << name() << ": on read of 0x" << hex << req->getPaddr()
+ << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
<< "@ cycle " << dec << curTick
<< ", cache returns 0x";
- printData(cerr, req->data, req->size);
+ printData(cerr, pkt_data, pkt->getSize());
cerr << ", expected 0x";
- printData(cerr, data, req->size);
+ printData(cerr, data, pkt->getSize());
cerr << endl;
fatal("");
}
@@ -163,13 +243,13 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data)
}
if (numReads >= maxLoads)
- SimExit(curTick, "Maximum number of loads reached!");
+ exitSimLoop("Maximum number of loads reached!");
break;
- case Write:
+ case Packet::WriteResp:
numWritesStat++;
break;
-
+/*
case Copy:
//Also remove dest from outstanding list
removeAddr = outstandingAddrs.find(req->dest);
@@ -177,36 +257,37 @@ MemTest::completeRequest(MemReqPtr &req, uint8_t *data)
outstandingAddrs.erase(removeAddr);
numCopiesStat++;
break;
-
+*/
default:
panic("invalid command");
}
- if (blockAddr(req->paddr) == traceBlockAddr) {
+ if (blockAddr(req->getPaddr()) == traceBlockAddr) {
cerr << name() << ": completed "
- << (req->cmd.isWrite() ? "write" : "read")
+ << (pkt->isWrite() ? "write" : "read")
<< " access of "
- << dec << req->size << " bytes at address 0x"
- << hex << req->paddr
- << " (0x" << hex << blockAddr(req->paddr) << ")"
+ << dec << pkt->getSize() << " bytes at address 0x"
+ << hex << req->getPaddr()
+ << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
<< ", value = 0x";
- printData(cerr, req->data, req->size);
+ printData(cerr, pkt_data, pkt->getSize());
cerr << " @ cycle " << dec << curTick;
cerr << endl;
}
noResponseCycles = 0;
+ delete state;
delete [] data;
+ delete pkt->req;
+ delete pkt;
}
-
void
MemTest::regStats()
{
using namespace Stats;
-
numReadsStat
.name(name() + ".num_reads")
.desc("number of read accesses completed")
@@ -234,7 +315,7 @@ MemTest::tick()
fatal("");
}
- if (cacheInterface->isBlocked()) {
+ if (accessRetry) {
return;
}
@@ -248,30 +329,30 @@ MemTest::tick()
//If we aren't doing copies, use id as offset, and do a false sharing
//mem tester
- if (percentCopies == 0) {
- //We can eliminate the lower bits of the offset, and then use the id
- //to offset within the blks
- offset &= ~63; //Not the low order bits
- offset += id;
- access_size = 0;
- }
+ //We can eliminate the lower bits of the offset, and then use the id
+ //to offset within the blks
+ offset &= ~63; //Not the low order bits
+ offset += id;
+ access_size = 0;
- MemReqPtr req = new MemReq();
+ Request *req = new Request();
+ uint32_t flags = 0;
+ Addr paddr;
if (cacheable < percentUncacheable) {
- req->flags |= UNCACHEABLE;
- req->paddr = uncacheAddr + offset;
+ flags |= UNCACHEABLE;
+ paddr = uncacheAddr + offset;
} else {
- req->paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
+ paddr = ((base) ? baseAddr1 : baseAddr2) + offset;
}
- // bool probe = (random() % 2 == 1) && !req->isUncacheable();
+ //bool probe = (random() % 2 == 1) && !req->isUncacheable();
bool probe = false;
- req->size = 1 << access_size;
- req->data = new uint8_t[req->size];
- req->paddr &= ~(req->size - 1);
- req->time = curTick;
- req->xc = thread->getProxy();
+ paddr &= ~((1 << access_size) - 1);
+ req->setPhys(paddr, 1 << access_size, flags);
+ req->setThreadContext(id,0);
+
+ uint8_t *result = new uint8_t[8];
if (cmd < percentReads) {
// read
@@ -279,60 +360,75 @@ MemTest::tick()
//For now we only allow one outstanding request per addreess per tester
//This means we assume CPU does write forwarding to reads that alias something
//in the cpu store buffer.
- if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return;
- else outstandingAddrs.insert(req->paddr);
+ if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return;
+ else outstandingAddrs.insert(paddr);
+
+ // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin
+ funcPort.readBlob(req->getPaddr(), result, req->getSize());
- req->cmd = Read;
- uint8_t *result = new uint8_t[8];
- checkMem->access(Read, req->paddr, result, req->size);
- if (blockAddr(req->paddr) == traceBlockAddr) {
+ if (blockAddr(paddr) == traceBlockAddr) {
cerr << name()
<< ": initiating read "
<< ((probe) ? "probe of " : "access of ")
- << dec << req->size << " bytes from addr 0x"
- << hex << req->paddr
- << " (0x" << hex << blockAddr(req->paddr) << ")"
+ << dec << req->getSize() << " bytes from addr 0x"
+ << hex << paddr
+ << " (0x" << hex << blockAddr(paddr) << ")"
<< " at cycle "
<< dec << curTick << endl;
}
+
+ Packet *pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ pkt->dataDynamicArray(new uint8_t[req->getSize()]);
+ MemTestSenderState *state = new MemTestSenderState(result);
+ pkt->senderState = state;
+
if (probe) {
- cacheInterface->probeAndUpdate(req);
- completeRequest(req, result);
+ cachePort.sendFunctional(pkt);
+ completeRequest(pkt);
} else {
- req->completionEvent = new MemCompleteEvent(req, result, this);
- cacheInterface->access(req);
+// req->completionEvent = new MemCompleteEvent(req, result, this);
+ sendPkt(pkt);
}
- } else if (cmd < (100 - percentCopies)){
+ } else {
// write
//For now we only allow one outstanding request per addreess per tester
//This means we assume CPU does write forwarding to reads that alias something
//in the cpu store buffer.
- if (outstandingAddrs.find(req->paddr) != outstandingAddrs.end()) return;
- else outstandingAddrs.insert(req->paddr);
+ if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) return;
+ else outstandingAddrs.insert(paddr);
- req->cmd = Write;
- memcpy(req->data, &data, req->size);
- checkMem->access(Write, req->paddr, req->data, req->size);
- if (blockAddr(req->paddr) == traceBlockAddr) {
+/*
+ if (blockAddr(req->getPaddr()) == traceBlockAddr) {
cerr << name() << ": initiating write "
<< ((probe)?"probe of ":"access of ")
- << dec << req->size << " bytes (value = 0x";
- printData(cerr, req->data, req->size);
+ << dec << req->getSize() << " bytes (value = 0x";
+ printData(cerr, data_pkt->getPtr(), req->getSize());
cerr << ") to addr 0x"
- << hex << req->paddr
- << " (0x" << hex << blockAddr(req->paddr) << ")"
+ << hex << req->getPaddr()
+ << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
<< " at cycle "
<< dec << curTick << endl;
}
+*/
+ Packet *pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+ uint8_t *pkt_data = new uint8_t[req->getSize()];
+ pkt->dataDynamicArray(pkt_data);
+ memcpy(pkt_data, &data, req->getSize());
+ MemTestSenderState *state = new MemTestSenderState(result);
+ pkt->senderState = state;
+
+ funcPort.writeBlob(req->getPaddr(), pkt_data, req->getSize());
+
if (probe) {
- cacheInterface->probeAndUpdate(req);
- completeRequest(req, NULL);
+ cachePort.sendFunctional(pkt);
+ completeRequest(pkt);
} else {
- req->completionEvent = new MemCompleteEvent(req, NULL, this);
- cacheInterface->access(req);
+// req->completionEvent = new MemCompleteEvent(req, NULL, this);
+ sendPkt(pkt);
}
- } else {
+ }
+/* else {
// copy
unsigned source_align = random() % 100;
unsigned dest_align = random() % 100;
@@ -369,56 +465,51 @@ MemTest::tick()
<< " (0x" << hex << blockAddr(dest) << ")"
<< " at cycle "
<< dec << curTick << endl;
- }
+ }*
cacheInterface->access(req);
uint8_t result[blockSize];
checkMem->access(Read, source, &result, blockSize);
checkMem->access(Write, dest, &result, blockSize);
}
+*/
}
-
void
-MemCompleteEvent::process()
-{
- tester->completeRequest(req, data);
- delete this;
-}
-
-
-const char *
-MemCompleteEvent::description()
+MemTest::doRetry()
{
- return "memory access completion";
+ if (cachePort.sendTiming(retryPkt)) {
+ accessRetry = false;
+ retryPkt = NULL;
+ }
}
-
BEGIN_DECLARE_SIM_OBJECT_PARAMS(MemTest)
- SimObjectParam<BaseCache *> cache;
- SimObjectParam<FunctionalMemory *> main_mem;
- SimObjectParam<FunctionalMemory *> check_mem;
+// SimObjectParam<BaseCache *> cache;
+// SimObjectParam<PhysicalMemory *> main_mem;
+// SimObjectParam<PhysicalMemory *> check_mem;
Param<unsigned> memory_size;
Param<unsigned> percent_reads;
- Param<unsigned> percent_copies;
+// Param<unsigned> percent_copies;
Param<unsigned> percent_uncacheable;
Param<unsigned> progress_interval;
Param<unsigned> percent_source_unaligned;
Param<unsigned> percent_dest_unaligned;
Param<Addr> trace_addr;
Param<Counter> max_loads;
+ Param<bool> atomic;
END_DECLARE_SIM_OBJECT_PARAMS(MemTest)
BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest)
- INIT_PARAM(cache, "L1 cache"),
- INIT_PARAM(main_mem, "hierarchical memory"),
- INIT_PARAM(check_mem, "check memory"),
+// INIT_PARAM(cache, "L1 cache"),
+// INIT_PARAM(main_mem, "hierarchical memory"),
+// INIT_PARAM(check_mem, "check memory"),
INIT_PARAM(memory_size, "memory size"),
INIT_PARAM(percent_reads, "target read percentage"),
- INIT_PARAM(percent_copies, "target copy percentage"),
+// INIT_PARAM(percent_copies, "target copy percentage"),
INIT_PARAM(percent_uncacheable, "target uncacheable percentage"),
INIT_PARAM(progress_interval, "progress report interval (in accesses)"),
INIT_PARAM(percent_source_unaligned,
@@ -426,18 +517,19 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(MemTest)
INIT_PARAM(percent_dest_unaligned,
"percent of copy dest address that are unaligned"),
INIT_PARAM(trace_addr, "address to trace"),
- INIT_PARAM(max_loads, "terminate when we have reached this load count")
+ INIT_PARAM(max_loads, "terminate when we have reached this load count"),
+ INIT_PARAM(atomic, "Is the tester testing atomic mode (or timing)")
END_INIT_SIM_OBJECT_PARAMS(MemTest)
CREATE_SIM_OBJECT(MemTest)
{
- return new MemTest(getInstanceName(), cache->getInterface(), main_mem,
- check_mem, memory_size, percent_reads, percent_copies,
+ return new MemTest(getInstanceName(), /*cache->getInterface(),*/ /*main_mem,*/
+ /*check_mem,*/ memory_size, percent_reads, /*percent_copies,*/
percent_uncacheable, progress_interval,
percent_source_unaligned, percent_dest_unaligned,
- trace_addr, max_loads);
+ trace_addr, max_loads, atomic);
}
REGISTER_SIM_OBJECT("MemTest", MemTest)
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 42fb235db..5de41f0d8 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -35,31 +35,36 @@
#include <set>
#include "base/statistics.hh"
-#include "mem/functional/functional.hh"
-#include "mem/mem_interface.hh"
+//#include "mem/functional/functional.hh"
+//#include "mem/mem_interface.hh"
#include "sim/eventq.hh"
#include "sim/sim_exit.hh"
#include "sim/sim_object.hh"
#include "sim/stats.hh"
+#include "mem/mem_object.hh"
+#include "mem/port.hh"
-class ThreadContext;
-class MemTest : public SimObject
+class Packet;
+class MemTest : public MemObject
{
public:
MemTest(const std::string &name,
- MemInterface *_cache_interface,
- FunctionalMemory *main_mem,
- FunctionalMemory *check_mem,
+// MemInterface *_cache_interface,
+// PhysicalMemory *main_mem,
+// PhysicalMemory *check_mem,
unsigned _memorySize,
unsigned _percentReads,
- unsigned _percentCopies,
+// unsigned _percentCopies,
unsigned _percentUncacheable,
unsigned _progressInterval,
unsigned _percentSourceUnaligned,
unsigned _percentDestUnaligned,
Addr _traceAddr,
- Counter _max_loads);
+ Counter _max_loads,
+ bool _atomic);
+
+ virtual void init();
// register statistics
virtual void regStats();
@@ -69,6 +74,8 @@ class MemTest : public SimObject
// main simulation loop (one cycle)
void tick();
+ virtual Port *getPort(const std::string &if_name, int idx = -1);
+
protected:
class TickEvent : public Event
{
@@ -82,16 +89,62 @@ class MemTest : public SimObject
};
TickEvent tickEvent;
+ class CpuPort : public Port
+ {
+
+ MemTest *memtest;
+
+ public:
+
+ CpuPort(const std::string &_name, MemTest *_memtest)
+ : Port(_name), memtest(_memtest)
+ { }
+
+ protected:
+
+ virtual bool recvTiming(Packet *pkt);
- MemInterface *cacheInterface;
- FunctionalMemory *mainMem;
- FunctionalMemory *checkMem;
- SimpleThread *thread;
+ virtual Tick recvAtomic(Packet *pkt);
+
+ virtual void recvFunctional(Packet *pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void recvRetry();
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
+ };
+
+ CpuPort cachePort;
+ CpuPort funcPort;
+
+ class MemTestSenderState : public Packet::SenderState
+ {
+ public:
+ /** Constructor. */
+ MemTestSenderState(uint8_t *_data)
+ : data(_data)
+ { }
+
+ // Hold onto data pointer
+ uint8_t *data;
+ };
+
+// Request *dataReq;
+ Packet *retryPkt;
+// MemInterface *cacheInterface;
+// PhysicalMemory *mainMem;
+// PhysicalMemory *checkMem;
+// SimpleThread *thread;
+
+ bool accessRetry;
unsigned size; // size of testing memory region
unsigned percentReads; // target percentage of read accesses
- unsigned percentCopies; // target percentage of copy accesses
+// unsigned percentCopies; // target percentage of copy accesses
unsigned percentUncacheable;
int id;
@@ -123,34 +176,21 @@ class MemTest : public SimObject
uint64_t numReads;
uint64_t maxLoads;
+
+ bool atomic;
+
Stats::Scalar<> numReadsStat;
Stats::Scalar<> numWritesStat;
Stats::Scalar<> numCopiesStat;
// called by MemCompleteEvent::process()
- void completeRequest(MemReqPtr &req, uint8_t *data);
+ void completeRequest(Packet *pkt);
- friend class MemCompleteEvent;
-};
+ void sendPkt(Packet *pkt);
+ void doRetry();
-class MemCompleteEvent : public Event
-{
- MemReqPtr req;
- uint8_t *data;
- MemTest *tester;
-
- public:
-
- MemCompleteEvent(MemReqPtr &_req, uint8_t *_data, MemTest *_tester)
- : Event(&mainEventQueue),
- req(_req), data(_data), tester(_tester)
- {
- }
-
- void process();
-
- virtual const char *description();
+ friend class MemCompleteEvent;
};
#endif // __CPU_MEMTEST_MEMTEST_HH__
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index c80e4d8c1..ecf6ed632 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -342,12 +342,6 @@ DefaultCommit<Impl>::drain()
{
drainPending = true;
- // If it's already drained, return true.
- if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
- cpu->signalDrained();
- return true;
- }
-
return false;
}
@@ -1218,16 +1212,16 @@ DefaultCommit<Impl>::skidInsert()
for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) {
DynInstPtr inst = fromRename->insts[inst_num];
- int tid = inst->threadNumber;
if (!inst->isSquashed()) {
DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
- "skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
+ "skidBuffer.\n", inst->readPC(), inst->seqNum,
+ inst->threadNumber);
skidBuffer.push(inst);
} else {
DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
"squashed, skipping.\n",
- inst->readPC(), inst->seqNum, tid);
+ inst->readPC(), inst->seqNum, inst->threadNumber);
}
}
}
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 7386dfadd..4c9a8e91f 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -88,7 +88,7 @@ FullO3CPU<Impl>::TickEvent::description()
template <class Impl>
FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent()
- : Event(&mainEventQueue, CPU_Tick_Pri)
+ : Event(&mainEventQueue, CPU_Switch_Pri)
{
}
@@ -135,7 +135,8 @@ void
FullO3CPU<Impl>::DeallocateContextEvent::process()
{
cpu->deactivateThread(tid);
- cpu->removeThread(tid);
+ if (remove)
+ cpu->removeThread(tid);
}
template <class Impl>
@@ -191,7 +192,11 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
deferRegistration(params->deferRegistration),
numThreads(number_of_threads)
{
- _status = Idle;
+ if (!deferRegistration) {
+ _status = Running;
+ } else {
+ _status = Idle;
+ }
checker = NULL;
@@ -304,6 +309,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
tid,
bindRegs);
+
+ activateThreadEvent[tid].init(tid, this);
+ deallocateContextEvent[tid].init(tid, this);
}
rename.setRenameMap(renameMap);
@@ -447,13 +455,16 @@ FullO3CPU<Impl>::tick()
if (!tickEvent.scheduled()) {
if (_status == SwitchedOut ||
getState() == SimObject::Drained) {
+ DPRINTF(O3CPU, "Switched out!\n");
// increment stat
lastRunningCycle = curTick;
- } else if (!activityRec.active()) {
+ } else if (!activityRec.active() || _status == Idle) {
+ DPRINTF(O3CPU, "Idle!\n");
lastRunningCycle = curTick;
timesIdled++;
} else {
tickEvent.schedule(curTick + cycles(1));
+ DPRINTF(O3CPU, "Scheduling next tick!\n");
}
}
@@ -512,6 +523,8 @@ FullO3CPU<Impl>::activateThread(unsigned tid)
list<unsigned>::iterator isActive = find(
activeThreads.begin(), activeThreads.end(), tid);
+ DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid);
+
if (isActive == activeThreads.end()) {
DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
tid);
@@ -528,6 +541,8 @@ FullO3CPU<Impl>::deactivateThread(unsigned tid)
list<unsigned>::iterator thread_it =
find(activeThreads.begin(), activeThreads.end(), tid);
+ DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid);
+
if (thread_it != activeThreads.end()) {
DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
tid);
@@ -548,7 +563,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
activateThread(tid);
}
- if(lastActivatedCycle < curTick) {
+ if (lastActivatedCycle < curTick) {
scheduleTickEvent(delay);
// Be sure to signal that there's some activity so the CPU doesn't
@@ -563,17 +578,20 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
}
template <class Impl>
-void
-FullO3CPU<Impl>::deallocateContext(int tid, int delay)
+bool
+FullO3CPU<Impl>::deallocateContext(int tid, bool remove, int delay)
{
// Schedule removal of thread data from CPU
if (delay){
DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
"on cycle %d\n", tid, curTick + cycles(delay));
- scheduleDeallocateContextEvent(tid, delay);
+ scheduleDeallocateContextEvent(tid, remove, delay);
+ return false;
} else {
deactivateThread(tid);
- removeThread(tid);
+ if (remove)
+ removeThread(tid);
+ return true;
}
}
@@ -582,8 +600,9 @@ void
FullO3CPU<Impl>::suspendContext(int tid)
{
DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
- deactivateThread(tid);
- if (activeThreads.size() == 0)
+ bool deallocated = deallocateContext(tid, false, 1);
+ // If this was the last thread then unschedule the tick event.
+ if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0)
unscheduleTickEvent();
_status = Idle;
}
@@ -594,7 +613,7 @@ FullO3CPU<Impl>::haltContext(int tid)
{
//For now, this is the same as deallocate
DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
- deallocateContext(tid, 1);
+ deallocateContext(tid, true, 1);
}
template <class Impl>
@@ -682,10 +701,17 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
assert(iew.ldstQueue.getCount(tid) == 0);
// Reset ROB/IQ/LSQ Entries
+
+ // Commented out for now. This should be possible to do by
+ // telling all the pipeline stages to drain first, and then
+ // checking until the drain completes. Once the pipeline is
+ // drained, call resetEntries(). - 10-09-06 ktlim
+/*
if (activeThreads.size() >= 1) {
commit.rob->resetEntries();
iew.resetEntries();
}
+*/
}
@@ -824,7 +850,9 @@ template <class Impl>
void
FullO3CPU<Impl>::resume()
{
+#if FULL_SYSTEM
assert(system->getMemoryMode() == System::Timing);
+#endif
fetch.resume();
decode.resume();
rename.resume();
@@ -935,6 +963,25 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
}
if (!tickEvent.scheduled())
tickEvent.schedule(curTick);
+
+ Port *peer;
+ Port *icachePort = fetch.getIcachePort();
+ if (icachePort->getPeer() == NULL) {
+ peer = oldCPU->getPort("icache_port")->getPeer();
+ icachePort->setPeer(peer);
+ } else {
+ peer = icachePort->getPeer();
+ }
+ peer->setPeer(icachePort);
+
+ Port *dcachePort = iew.getDcachePort();
+ if (dcachePort->getPeer() == NULL) {
+ peer = oldCPU->getPort("dcache_port")->getPeer();
+ dcachePort->setPeer(peer);
+ } else {
+ peer = dcachePort->getPeer();
+ }
+ peer->setPeer(dcachePort);
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index dcdcd1fe6..fe510519c 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU
class DeallocateContextEvent : public Event
{
private:
- /** Number of Thread to Activate */
+ /** Number of Thread to deactivate */
int tid;
+ /** Should the thread be removed from the CPU? */
+ bool remove;
+
/** Pointer to the CPU. */
FullO3CPU<Impl> *cpu;
@@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU
/** Processes the event, calling activateThread() on the CPU. */
void process();
+ /** Sets whether the thread should also be removed from the CPU. */
+ void setRemove(bool _remove) { remove = _remove; }
+
/** Returns the description of the event. */
const char *description();
};
/** Schedule cpu to deallocate thread context.*/
- void scheduleDeallocateContextEvent(int tid, int delay)
+ void scheduleDeallocateContextEvent(int tid, bool remove, int delay)
{
// Schedule thread to activate, regardless of its current state.
if (deallocateContextEvent[tid].squashed())
@@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU
void suspendContext(int tid);
/** Remove Thread from Active Threads List &&
- * Remove Thread Context from CPU.
+ * Possibly Remove Thread Context from CPU.
*/
- void deallocateContext(int tid, int delay = 1);
+ bool deallocateContext(int tid, bool remove, int delay = 1);
/** Remove Thread from Active Threads List &&
* Remove Thread Context from CPU.
@@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU
/** Pointers to all of the threads in the CPU. */
std::vector<Thread *> thread;
- /** Pointer to the icache interface. */
- MemInterface *icacheInterface;
- /** Pointer to the dcache interface. */
- MemInterface *dcacheInterface;
-
/** Whether or not the CPU should defer its registration. */
bool deferRegistration;
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 1a2ca32a4..280bf0e71 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -96,7 +96,7 @@ class DefaultFetch
/** Returns the address ranges of this device. */
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
/** Timing version of receive. Handles setting fetch to the
* proper status to start fetching. */
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 3c47c39fa..072580af7 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -63,7 +63,7 @@ template<class Impl>
void
DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
{
- panic("DefaultFetch doesn't expect recvFunctional callback!");
+ warn("Default fetch doesn't update it's state from a functional call.");
}
template<class Impl>
@@ -599,7 +599,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
if (fault == NoFault) {
#if 0
if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
- memReq[tid]->flags & UNCACHEABLE) {
+ memReq[tid]->isUncacheable()) {
DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path)!",
memReq[tid]->paddr);
@@ -623,6 +623,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Now do the timing access to see whether or not the instruction
// exists within the cache.
if (!icachePort->sendTiming(data_pkt)) {
+ if (data_pkt->result == Packet::BadAddress) {
+ fault = TheISA::genMachineCheckFault();
+ delete mem_req;
+ memReq[tid] = NULL;
+ }
assert(retryPkt == NULL);
assert(retryTid == -1);
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index b2baae296..ba5260fe2 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -600,6 +600,11 @@ template<class Impl>
void
DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
{
+ // This function should not be called after writebackInsts in a
+ // single cycle. That will cause problems with an instruction
+ // being added to the queue to commit without being processed by
+ // writebackInsts prior to being sent to commit.
+
// First check the time slot that this instruction will write
// to. If there are free write ports at the time, then go ahead
// and write the instruction to that time. If there are not,
@@ -1286,6 +1291,7 @@ DefaultIEW<Impl>::executeInsts()
} else if (fault != NoFault) {
// If the instruction faulted, then we need to send it along to commit
// without the instruction completing.
+ DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum);
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 190734dc2..6b12d75b4 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -311,7 +311,7 @@ class LSQ {
/** Returns the address ranges of this device. */
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
/** Timing version of receive. Handles writing back and
* completing the load or store that has returned from
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 2bbab71f0..7b7d1eb8e 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -46,7 +46,7 @@ template <class Impl>
void
LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
{
- panic("O3CPU doesn't expect recvFunctional callback!");
+ warn("O3CPU doesn't update things on a recvFunctional.");
}
template <class Impl>
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 90d1a3d53..11a02e7c7 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -492,7 +492,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
// A bit of a hackish way to get uncached accesses to work only if they're
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
- if (req->getFlags() & UNCACHEABLE &&
+ if (req->isUncacheable() &&
(load_idx != loadHead || !load_inst->isAtCommit())) {
iewStage->rescheduleMemInst(load_inst);
++lsqRescheduledLoads;
@@ -509,7 +509,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
load_idx, store_idx, storeHead, req->getPaddr());
#if FULL_SYSTEM
- if (req->getFlags() & LOCKED) {
+ if (req->isLocked()) {
cpu->lockAddr = req->getPaddr();
cpu->lockFlag = true;
}
@@ -626,18 +626,30 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
++usedPorts;
- PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
- data_pkt->dataStatic(load_inst->memData);
-
- LSQSenderState *state = new LSQSenderState;
- state->isLoad = true;
- state->idx = load_idx;
- state->inst = load_inst;
- data_pkt->senderState = state;
-
// if we the cache is not blocked, do cache access
if (!lsq->cacheBlocked()) {
+ PacketPtr data_pkt =
+ new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(load_inst->memData);
+
+ LSQSenderState *state = new LSQSenderState;
+ state->isLoad = true;
+ state->idx = load_idx;
+ state->inst = load_inst;
+ data_pkt->senderState = state;
+
if (!dcachePort->sendTiming(data_pkt)) {
+ Packet::Result result = data_pkt->result;
+
+ // Delete state and data packet because a load retry
+ // initiates a pipeline restart; it does not retry.
+ delete state;
+ delete data_pkt;
+
+ if (result == Packet::BadAddress) {
+ return TheISA::genMachineCheckFault();
+ }
+
// If the access didn't succeed, tell the LSQ by setting
// the retry thread id.
lsq->setRetryTid(lsqID);
@@ -664,16 +676,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
return NoFault;
}
- if (data_pkt->result != Packet::Success) {
- DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
- DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
- load_inst->seqNum);
- } else {
- DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
- DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
- load_inst->seqNum);
- }
-
return NoFault;
}
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 98bea74fb..3f9db912f 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -416,7 +416,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
// realizes there is activity.
// Mark it as executed unless it is an uncached load that
// needs to hit the head of commit.
- if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) {
+ if (!(inst->req->isUncacheable()) || inst->isAtCommit()) {
inst->setExecuted();
}
iewStage->instToCommit(inst);
@@ -608,21 +608,30 @@ LSQUnit<Impl>::writebackStores()
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
- storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
+ storeWBIdx, inst->readPC(),
req->getPaddr(), *(inst->memData),
- storeQueue[storeWBIdx].inst->seqNum);
+ inst->seqNum);
// @todo: Remove this SC hack once the memory system handles it.
- if (req->getFlags() & LOCKED) {
- if (req->getFlags() & UNCACHEABLE) {
+ if (req->isLocked()) {
+ if (req->isUncacheable()) {
req->setScResult(2);
} else {
if (cpu->lockFlag) {
req->setScResult(1);
+ DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.",
+ inst->seqNum);
} else {
req->setScResult(0);
// Hack: Instantly complete this store.
- completeDataAccess(data_pkt);
+// completeDataAccess(data_pkt);
+ DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
+ "Instantly completing it.\n",
+ inst->seqNum);
+ WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
+ wb->schedule(curTick + 1);
+ delete state;
+ completeStore(storeWBIdx);
incrStIdx(storeWBIdx);
continue;
}
@@ -633,7 +642,13 @@ LSQUnit<Impl>::writebackStores()
}
if (!dcachePort->sendTiming(data_pkt)) {
+ if (data_pkt->result == Packet::BadAddress) {
+ panic("LSQ sent out a bad address for a completed store!");
+ }
// Need to handle becoming blocked on a store.
+ DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+ "retry later\n",
+ inst->seqNum);
isStoreBlocked = true;
++lsqCacheBlocked;
assert(retryPkt == NULL);
@@ -880,6 +895,9 @@ LSQUnit<Impl>::recvRetry()
assert(retryPkt != NULL);
if (dcachePort->sendTiming(retryPkt)) {
+ if (retryPkt->result == Packet::BadAddress) {
+ panic("LSQ sent out a bad address for a completed store!");
+ }
storePostSend(retryPkt);
retryPkt = NULL;
isStoreBlocked = false;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index 25e1db21c..2bc194d53 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -165,14 +165,14 @@ template <class Impl>
void
O3ThreadContext<Impl>::deallocate(int delay)
{
- DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n",
- getThreadNum());
+ DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n",
+ getThreadNum(), delay);
if (thread->status() == ThreadContext::Unallocated)
return;
thread->setStatus(ThreadContext::Unallocated);
- cpu->deallocateContext(thread->readTid(), delay);
+ cpu->deallocateContext(thread->readTid(), true, delay);
}
template <class Impl>
diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh
index 9bab6a964..8debd277d 100644
--- a/src/cpu/ozone/back_end.hh
+++ b/src/cpu/ozone/back_end.hh
@@ -493,7 +493,7 @@ BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
}
*/
/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ if (!dcacheInterface && (memReq->isUncacheable()))
recordEvent("Uncached Read");
*/
return LSQ.read(req, data, load_idx);
@@ -534,7 +534,7 @@ BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
*res = memReq->result;
*/
/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ if (!dcacheInterface && (memReq->isUncacheable()))
recordEvent("Uncached Write");
*/
return LSQ.write(req, data, store_idx);
diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh
index ac3218c02..4078699fe 100644
--- a/src/cpu/ozone/back_end_impl.hh
+++ b/src/cpu/ozone/back_end_impl.hh
@@ -1256,7 +1256,7 @@ BackEnd<Impl>::executeInsts()
// ++iewExecStoreInsts;
- if (!(inst->req->flags & LOCKED)) {
+ if (!(inst->req->isLocked())) {
inst->setExecuted();
instToCommit(inst);
diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh
index 8c5be9424..70ec1d101 100644
--- a/src/cpu/ozone/cpu.hh
+++ b/src/cpu/ozone/cpu.hh
@@ -455,12 +455,12 @@ class OzoneCPU : public BaseCPU
{
#if 0
#if FULL_SYSTEM && defined(TARGET_ALPHA)
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
}
#endif
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
lockAddrList.insert(req->paddr);
lockFlag = true;
}
@@ -489,10 +489,10 @@ class OzoneCPU : public BaseCPU
ExecContext *xc;
// If this is a store conditional, act appropriately
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
xc = req->xc;
- if (req->flags & UNCACHEABLE) {
+ if (req->isUncacheable()) {
// Don't update result register (see stq_c in isa_desc)
req->result = 2;
xc->setStCondFailures(0);//Needed? [RGD]
@@ -532,8 +532,8 @@ class OzoneCPU : public BaseCPU
#endif
- if (req->flags & LOCKED) {
- if (req->flags & UNCACHEABLE) {
+ if (req->isLocked()) {
+ if (req->isUncacheable()) {
req->result = 2;
} else {
if (this->lockFlag) {
diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh
index 5ffd3666e..59cf9785c 100644
--- a/src/cpu/ozone/front_end.hh
+++ b/src/cpu/ozone/front_end.hh
@@ -92,7 +92,7 @@ class FrontEnd
/** Returns the address ranges of this device. */
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
/** Timing version of receive. Handles setting fetch to the
* proper status to start fetching. */
diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh
index 9a00aefbf..9eff8619d 100644
--- a/src/cpu/ozone/front_end_impl.hh
+++ b/src/cpu/ozone/front_end_impl.hh
@@ -59,7 +59,7 @@ template<class Impl>
void
FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
{
- panic("FrontEnd doesn't expect recvFunctional callback!");
+ warn("FrontEnd doesn't update state from functional calls");
}
template<class Impl>
@@ -493,7 +493,7 @@ FrontEnd<Impl>::fetchCacheLine()
if (fault == NoFault) {
#if 0
if (cpu->system->memctrl->badaddr(memReq->paddr) ||
- memReq->flags & UNCACHEABLE) {
+ memReq->isUncacheable()) {
DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path!",
memReq->paddr);
diff --git a/src/cpu/ozone/inorder_back_end.hh b/src/cpu/ozone/inorder_back_end.hh
index ffdba2f6c..76eef6fad 100644
--- a/src/cpu/ozone/inorder_back_end.hh
+++ b/src/cpu/ozone/inorder_back_end.hh
@@ -231,7 +231,7 @@ InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags)
}
}
/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ if (!dcacheInterface && (memReq->isUncacheable()))
recordEvent("Uncached Read");
*/
return fault;
@@ -243,7 +243,7 @@ Fault
InorderBackEnd<Impl>::read(MemReqPtr &req, T &data)
{
#if FULL_SYSTEM && defined(TARGET_ALPHA)
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
}
@@ -291,7 +291,7 @@ InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
if (res && (fault == NoFault))
*res = memReq->result;
/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ if (!dcacheInterface && (memReq->isUncacheable()))
recordEvent("Uncached Write");
*/
return fault;
@@ -306,10 +306,10 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data)
ExecContext *xc;
// If this is a store conditional, act appropriately
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
xc = req->xc;
- if (req->flags & UNCACHEABLE) {
+ if (req->isUncacheable()) {
// Don't update result register (see stq_c in isa_desc)
req->result = 2;
xc->setStCondFailures(0);//Needed? [RGD]
@@ -391,7 +391,7 @@ InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
}
/*
- if (!dcacheInterface && (req->flags & UNCACHEABLE))
+ if (!dcacheInterface && (req->isUncacheable()))
recordEvent("Uncached Read");
*/
return NoFault;
@@ -455,8 +455,8 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
}
}
/*
- if (req->flags & LOCKED) {
- if (req->flags & UNCACHEABLE) {
+ if (req->isLocked()) {
+ if (req->isUncacheable()) {
// Don't update result register (see stq_c in isa_desc)
req->result = 2;
} else {
@@ -469,7 +469,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
*res = req->result;
*/
/*
- if (!dcacheInterface && (req->flags & UNCACHEABLE))
+ if (!dcacheInterface && (req->isUncacheable()))
recordEvent("Uncached Write");
*/
return NoFault;
diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh
index 38c1c09a2..056c79521 100644
--- a/src/cpu/ozone/lsq_unit.hh
+++ b/src/cpu/ozone/lsq_unit.hh
@@ -426,7 +426,7 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
// @todo: Fix uncached accesses.
- if (req->flags & UNCACHEABLE &&
+ if (req->isUncacheable() &&
(load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) {
return TheISA::genMachineCheckFault();
diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh
index ee0804036..c46eb90be 100644
--- a/src/cpu/ozone/lsq_unit_impl.hh
+++ b/src/cpu/ozone/lsq_unit_impl.hh
@@ -577,7 +577,7 @@ OzoneLSQ<Impl>::writebackStores()
MemAccessResult result = dcacheInterface->access(req);
//@todo temp fix for LL/SC (works fine for 1 CPU)
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
req->result=1;
panic("LL/SC! oh no no support!!!");
}
@@ -596,7 +596,7 @@ OzoneLSQ<Impl>::writebackStores()
Event *wb = NULL;
/*
typename IEW::LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
// Stx_C does not generate a system port transaction.
req->result=0;
wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
@@ -630,7 +630,7 @@ OzoneLSQ<Impl>::writebackStores()
// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
// storeQueue[storeWBIdx].inst->seqNum);
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
// Stx_C does not generate a system port transaction.
req->result=1;
typename BackEnd::LdWritebackEvent *wb =
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 6640a9f34..9b93ce74f 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -260,7 +260,7 @@ class OzoneLWLSQ {
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1); }
virtual bool recvTiming(PacketPtr pkt);
@@ -507,7 +507,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
// @todo: Fix uncached accesses.
- if (req->getFlags() & UNCACHEABLE &&
+ if (req->isUncacheable() &&
(inst != loadQueue.back() || !inst->isAtCommit())) {
DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
"commit/LSQ!\n",
@@ -659,7 +659,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
return NoFault;
}
- if (req->getFlags() & LOCKED) {
+ if (req->isLocked()) {
cpu->lockFlag = true;
}
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index 4c96ad149..e523712da 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -72,7 +72,7 @@ template <class Impl>
void
OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
{
- panic("O3CPU doesn't expect recvFunctional callback!");
+ warn("O3CPU doesn't update things on a recvFunctional");
}
template <class Impl>
@@ -394,7 +394,7 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
// Actually probably want the oldest faulting load
if (load_fault != NoFault) {
DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
- if (!(inst->req->getFlags() & UNCACHEABLE && !inst->isAtCommit())) {
+ if (!(inst->req->isUncacheable() && !inst->isAtCommit())) {
inst->setExecuted();
}
// Maybe just set it as can commit here, although that might cause
@@ -605,8 +605,8 @@ OzoneLWLSQ<Impl>::writebackStores()
inst->seqNum);
// @todo: Remove this SC hack once the memory system handles it.
- if (req->getFlags() & LOCKED) {
- if (req->getFlags() & UNCACHEABLE) {
+ if (req->isLocked()) {
+ if (req->isUncacheable()) {
req->setScResult(2);
} else {
if (cpu->lockFlag) {
@@ -663,7 +663,7 @@ OzoneLWLSQ<Impl>::writebackStores()
if (result != MA_HIT && dcacheInterface->doEvents()) {
store_event->miss = true;
typename BackEnd::LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
wb = new typename BackEnd::LdWritebackEvent(inst,
be);
store_event->wbEvent = wb;
@@ -690,7 +690,7 @@ OzoneLWLSQ<Impl>::writebackStores()
// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
// inst->seqNum);
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
// Stx_C does not generate a system port
// transaction in the 21264, but that might be
// hard to accomplish in this model.
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 7ba1b7df1..490be20ae 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -28,6 +28,7 @@
* Authors: Steve Reinhardt
*/
+#include "arch/locked_mem.hh"
#include "arch/utility.hh"
#include "cpu/exetrace.hh"
#include "cpu/simple/atomic.hh"
@@ -93,7 +94,7 @@ AtomicSimpleCPU::init()
bool
AtomicSimpleCPU::CpuPort::recvTiming(Packet *pkt)
{
- panic("AtomicSimpleCPU doesn't expect recvAtomic callback!");
+ panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
return true;
}
@@ -107,7 +108,8 @@ AtomicSimpleCPU::CpuPort::recvAtomic(Packet *pkt)
void
AtomicSimpleCPU::CpuPort::recvFunctional(Packet *pkt)
{
- panic("AtomicSimpleCPU doesn't expect recvFunctional callback!");
+ //No internal storage to update, just return
+ return;
}
void
@@ -133,20 +135,19 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
{
_status = Idle;
- // @todo fix me and get the real cpu id & thread number!!!
ifetch_req = new Request();
- ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+ ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast);
ifetch_pkt->dataStatic(&inst);
data_read_req = new Request();
- data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+ data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
data_read_pkt = new Packet(data_read_req, Packet::ReadReq,
Packet::Broadcast);
data_read_pkt->dataStatic(&dataReg);
data_write_req = new Request();
- data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+ data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
data_write_pkt = new Packet(data_write_req, Packet::WriteReq,
Packet::Broadcast);
}
@@ -161,9 +162,11 @@ AtomicSimpleCPU::serialize(ostream &os)
{
SimObject::State so_state = SimObject::getState();
SERIALIZE_ENUM(so_state);
+ Status _status = status();
+ SERIALIZE_ENUM(_status);
+ BaseSimpleCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
- BaseSimpleCPU::serialize(os);
}
void
@@ -171,8 +174,9 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
{
SimObject::State so_state;
UNSERIALIZE_ENUM(so_state);
- tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+ UNSERIALIZE_ENUM(_status);
BaseSimpleCPU::unserialize(cp, section);
+ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
}
void
@@ -253,29 +257,36 @@ template <class T>
Fault
AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
{
- data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+ // use the CPU's statically allocated read request and packet objects
+ Request *req = data_read_req;
+ Packet *pkt = data_read_pkt;
+
+ req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
if (traceData) {
traceData->setAddr(addr);
}
// translate to physical address
- Fault fault = thread->translateDataReadReq(data_read_req);
+ Fault fault = thread->translateDataReadReq(req);
// Now do the access.
if (fault == NoFault) {
- data_read_pkt->reinitFromRequest();
+ pkt->reinitFromRequest();
- dcache_latency = dcachePort.sendAtomic(data_read_pkt);
+ dcache_latency = dcachePort.sendAtomic(pkt);
dcache_access = true;
- assert(data_read_pkt->result == Packet::Success);
- data = data_read_pkt->get<T>();
+ assert(pkt->result == Packet::Success);
+ data = pkt->get<T>();
+ if (req->isLocked()) {
+ TheISA::handleLockedRead(thread, req);
+ }
}
// This will need a new way to tell if it has a dcache attached.
- if (data_read_req->getFlags() & UNCACHEABLE)
+ if (req->isUncacheable())
recordEvent("Uncached Read");
return fault;
@@ -328,33 +339,52 @@ template <class T>
Fault
AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
{
- data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+ // use the CPU's statically allocated write request and packet objects
+ Request *req = data_write_req;
+ Packet *pkt = data_write_pkt;
+
+ req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
if (traceData) {
traceData->setAddr(addr);
}
// translate to physical address
- Fault fault = thread->translateDataWriteReq(data_write_req);
+ Fault fault = thread->translateDataWriteReq(req);
// Now do the access.
if (fault == NoFault) {
- data = htog(data);
- data_write_pkt->reinitFromRequest();
- data_write_pkt->dataStatic(&data);
+ bool do_access = true; // flag to suppress cache access
- dcache_latency = dcachePort.sendAtomic(data_write_pkt);
- dcache_access = true;
+ if (req->isLocked()) {
+ do_access = TheISA::handleLockedWrite(thread, req);
+ }
+
+ if (do_access) {
+ data = htog(data);
+ pkt->reinitFromRequest();
+ pkt->dataStatic(&data);
- assert(data_write_pkt->result == Packet::Success);
+ dcache_latency = dcachePort.sendAtomic(pkt);
+ dcache_access = true;
- if (res && data_write_req->getFlags() & LOCKED) {
- *res = data_write_req->getScResult();
+ assert(pkt->result == Packet::Success);
+ }
+
+ if (req->isLocked()) {
+ uint64_t scResult = req->getScResult();
+ if (scResult != 0) {
+ // clear failure counter
+ thread->setStCondFailures(0);
+ }
+ if (res) {
+ *res = req->getScResult();
+ }
}
}
// This will need a new way to tell if it's hooked up to a cache or not.
- if (data_write_req->getFlags() & UNCACHEABLE)
+ if (req->isUncacheable())
recordEvent("Uncached Write");
// If the write needs to have a fault on the access, consider calling
@@ -467,11 +497,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
Param<Tick> progress_interval;
SimObjectParam<MemObject *> mem;
SimObjectParam<System *> system;
+ Param<int> cpu_id;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
- Param<int> cpu_id;
Param<Tick> profile;
#else
SimObjectParam<Process *> workload;
@@ -500,11 +530,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
INIT_PARAM(progress_interval, "Progress interval"),
INIT_PARAM(mem, "memory"),
INIT_PARAM(system, "system object"),
+ INIT_PARAM(cpu_id, "processor ID"),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
INIT_PARAM(dtb, "Data TLB"),
- INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "processes to run"),
@@ -538,11 +568,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU)
params->simulate_stalls = simulate_stalls;
params->mem = mem;
params->system = system;
+ params->cpu_id = cpu_id;
#if FULL_SYSTEM
params->itb = itb;
params->dtb = dtb;
- params->cpu_id = cpu_id;
params->profile = profile;
#else
params->process = workload;
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index b602af558..52afd76ef 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -104,9 +104,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
- };
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
+ };
CpuPort icachePort;
CpuPort dcachePort;
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 03ee27e04..33f673cbc 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -28,6 +28,7 @@
* Authors: Steve Reinhardt
*/
+#include "arch/locked_mem.hh"
#include "arch/utility.hh"
#include "cpu/exetrace.hh"
#include "cpu/simple/timing.hh"
@@ -73,7 +74,8 @@ TimingSimpleCPU::CpuPort::recvAtomic(Packet *pkt)
void
TimingSimpleCPU::CpuPort::recvFunctional(Packet *pkt)
{
- panic("TimingSimpleCPU doesn't expect recvFunctional callback!");
+ //No internal storage to update, jusst return
+ return;
}
void
@@ -94,12 +96,14 @@ TimingSimpleCPU::CpuPort::TickEvent::schedule(Packet *_pkt, Tick t)
}
TimingSimpleCPU::TimingSimpleCPU(Params *p)
- : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock)
+ : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock),
+ cpu_id(p->cpu_id)
{
_status = Idle;
ifetch_pkt = dcache_pkt = NULL;
drainEvent = NULL;
fetchEvent = NULL;
+ previousTick = 0;
changeState(SimObject::Running);
}
@@ -158,6 +162,7 @@ TimingSimpleCPU::resume()
assert(system->getMemoryMode() == System::Timing);
changeState(SimObject::Running);
+ previousTick = curTick;
}
void
@@ -165,6 +170,7 @@ TimingSimpleCPU::switchOut()
{
assert(status() == Running || status() == Idle);
_status = SwitchedOut;
+ numCycles += curTick - previousTick;
// If we've been scheduled to resume but are then told to switch out,
// we'll need to cancel it.
@@ -187,6 +193,27 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
break;
}
}
+
+ if (_status != Running) {
+ _status = Idle;
+ }
+
+ Port *peer;
+ if (icachePort.getPeer() == NULL) {
+ peer = oldCPU->getPort("icache_port")->getPeer();
+ icachePort.setPeer(peer);
+ } else {
+ peer = icachePort.getPeer();
+ }
+ peer->setPeer(&icachePort);
+
+ if (dcachePort.getPeer() == NULL) {
+ peer = oldCPU->getPort("dcache_port")->getPeer();
+ dcachePort.setPeer(peer);
+ } else {
+ peer = dcachePort.getPeer();
+ }
+ peer->setPeer(&dcachePort);
}
@@ -227,35 +254,35 @@ template <class T>
Fault
TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
{
- // need to fill in CPU & thread IDs here
- Request *data_read_req = new Request();
- data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
- data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+ Request *req =
+ new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
+ cpu_id, /* thread ID */ 0);
if (traceData) {
- traceData->setAddr(data_read_req->getVaddr());
+ traceData->setAddr(req->getVaddr());
}
// translate to physical address
- Fault fault = thread->translateDataReadReq(data_read_req);
+ Fault fault = thread->translateDataReadReq(req);
// Now do the access.
if (fault == NoFault) {
- Packet *data_read_pkt =
- new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast);
- data_read_pkt->dataDynamic<T>(new T);
+ Packet *pkt =
+ new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ pkt->dataDynamic<T>(new T);
- if (!dcachePort.sendTiming(data_read_pkt)) {
+ if (!dcachePort.sendTiming(pkt)) {
_status = DcacheRetry;
- dcache_pkt = data_read_pkt;
+ dcache_pkt = pkt;
} else {
_status = DcacheWaitResponse;
+ // memory system takes ownership of packet
dcache_pkt = NULL;
}
}
// This will need a new way to tell if it has a dcache attached.
- if (data_read_req->getFlags() & UNCACHEABLE)
+ if (req->isUncacheable())
recordEvent("Uncached Read");
return fault;
@@ -308,31 +335,39 @@ template <class T>
Fault
TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
{
- // need to fill in CPU & thread IDs here
- Request *data_write_req = new Request();
- data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
- data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
+ Request *req =
+ new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
+ cpu_id, /* thread ID */ 0);
// translate to physical address
- Fault fault = thread->translateDataWriteReq(data_write_req);
+ Fault fault = thread->translateDataWriteReq(req);
+
// Now do the access.
if (fault == NoFault) {
- Packet *data_write_pkt =
- new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast);
- data_write_pkt->allocate();
- data_write_pkt->set(data);
+ assert(dcache_pkt == NULL);
+ dcache_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+ dcache_pkt->allocate();
+ dcache_pkt->set(data);
- if (!dcachePort.sendTiming(data_write_pkt)) {
- _status = DcacheRetry;
- dcache_pkt = data_write_pkt;
- } else {
- _status = DcacheWaitResponse;
- dcache_pkt = NULL;
+ bool do_access = true; // flag to suppress cache access
+
+ if (req->isLocked()) {
+ do_access = TheISA::handleLockedWrite(thread, req);
+ }
+
+ if (do_access) {
+ if (!dcachePort.sendTiming(dcache_pkt)) {
+ _status = DcacheRetry;
+ } else {
+ _status = DcacheWaitResponse;
+ // memory system takes ownership of packet
+ dcache_pkt = NULL;
+ }
}
}
// This will need a new way to tell if it's hooked up to a cache or not.
- if (data_write_req->getFlags() & UNCACHEABLE)
+ if (req->isUncacheable())
recordEvent("Uncached Write");
// If the write needs to have a fault on the access, consider calling
@@ -392,9 +427,8 @@ TimingSimpleCPU::fetch()
{
checkForInterrupts();
- // need to fill in CPU & thread IDs here
Request *ifetch_req = new Request();
- ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE
+ ifetch_req->setThreadContext(cpu_id, /* thread ID */ 0);
Fault fault = setupFetchRequest(ifetch_req);
ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast);
@@ -414,6 +448,9 @@ TimingSimpleCPU::fetch()
// fetch fault: advance directly to next instruction (fault handler)
advanceInst(fault);
}
+
+ numCycles += curTick - previousTick;
+ previousTick = curTick;
}
@@ -444,6 +481,9 @@ TimingSimpleCPU::completeIfetch(Packet *pkt)
delete pkt->req;
delete pkt;
+ numCycles += curTick - previousTick;
+ previousTick = curTick;
+
if (getState() == SimObject::Draining) {
completeDrain();
return;
@@ -453,12 +493,20 @@ TimingSimpleCPU::completeIfetch(Packet *pkt)
if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) {
// load or store: just send to dcache
Fault fault = curStaticInst->initiateAcc(this, traceData);
- if (fault == NoFault) {
- // successfully initiated access: instruction will
- // complete in dcache response callback
- assert(_status == DcacheWaitResponse);
+ if (_status != Running) {
+ // instruction will complete in dcache response callback
+ assert(_status == DcacheWaitResponse || _status == DcacheRetry);
+ assert(fault == NoFault);
} else {
- // fault: complete now to invoke fault handler
+ if (fault == NoFault) {
+ // early fail on store conditional: complete now
+ assert(dcache_pkt != NULL);
+ fault = curStaticInst->completeAcc(dcache_pkt, this,
+ traceData);
+ delete dcache_pkt->req;
+ delete dcache_pkt;
+ dcache_pkt = NULL;
+ }
postExecute();
advanceInst(fault);
}
@@ -479,8 +527,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process()
bool
TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt)
{
- // These next few lines could be replaced with something faster
- // who knows what though
+ // delay processing of returned data until next CPU clock edge
Tick time = pkt->req->getTime();
while (time < curTick)
time += lat;
@@ -516,21 +563,27 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt)
assert(_status == DcacheWaitResponse);
_status = Running;
- if (getState() == SimObject::Draining) {
- completeDrain();
+ numCycles += curTick - previousTick;
+ previousTick = curTick;
- delete pkt->req;
- delete pkt;
+ Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
- return;
+ if (pkt->isRead() && pkt->req->isLocked()) {
+ TheISA::handleLockedRead(thread, pkt->req);
}
- Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
-
delete pkt->req;
delete pkt;
postExecute();
+
+ if (getState() == SimObject::Draining) {
+ advancePC(fault);
+ completeDrain();
+
+ return;
+ }
+
advanceInst(fault);
}
@@ -546,6 +599,7 @@ TimingSimpleCPU::completeDrain()
bool
TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt)
{
+ // delay processing of returned data until next CPU clock edge
Tick time = pkt->req->getTime();
while (time < curTick)
time += lat;
@@ -574,6 +628,7 @@ TimingSimpleCPU::DcachePort::recvRetry()
Packet *tmp = cpu->dcache_pkt;
if (sendTiming(tmp)) {
cpu->_status = DcacheWaitResponse;
+ // memory system takes ownership of packet
cpu->dcache_pkt = NULL;
}
}
@@ -592,11 +647,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
Param<Tick> progress_interval;
SimObjectParam<MemObject *> mem;
SimObjectParam<System *> system;
+ Param<int> cpu_id;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
- Param<int> cpu_id;
Param<Tick> profile;
#else
SimObjectParam<Process *> workload;
@@ -625,11 +680,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
INIT_PARAM(progress_interval, "Progress interval"),
INIT_PARAM(mem, "memory"),
INIT_PARAM(system, "system object"),
+ INIT_PARAM(cpu_id, "processor ID"),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
INIT_PARAM(dtb, "Data TLB"),
- INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "processes to run"),
@@ -661,11 +716,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU)
params->functionTraceStart = function_trace_start;
params->mem = mem;
params->system = system;
+ params->cpu_id = cpu_id;
#if FULL_SYSTEM
params->itb = itb;
params->dtb = dtb;
- params->cpu_id = cpu_id;
params->profile = profile;
#else
params->process = workload;
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index d03fa4bc0..988ddeded 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -92,7 +92,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
virtual void getDeviceAddressRanges(AddrRangeList &resp,
AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
+ { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
struct TickEvent : public Event
{
@@ -166,6 +166,9 @@ class TimingSimpleCPU : public BaseSimpleCPU
Packet *ifetch_pkt;
Packet *dcache_pkt;
+ int cpu_id;
+ Tick previousTick;
+
public:
virtual Port *getPort(const std::string &if_name, int idx = -1);
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 242cfd0e1..6fa6500bd 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -237,7 +237,7 @@ class SimpleThread : public ThreadState
Fault read(RequestPtr &req, T &data)
{
#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
}
@@ -256,10 +256,10 @@ class SimpleThread : public ThreadState
ExecContext *xc;
// If this is a store conditional, act appropriately
- if (req->flags & LOCKED) {
+ if (req->isLocked()) {
xc = req->xc;
- if (req->flags & UNCACHEABLE) {
+ if (req->isUncacheable()) {
// Don't update result register (see stq_c in isa_desc)
req->result = 2;
xc->setStCondFailures(0);//Needed? [RGD]