summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Lim <ktlim@umich.edu>2006-06-05 18:14:39 -0400
committerKevin Lim <ktlim@umich.edu>2006-06-05 18:14:39 -0400
commit090496bf2d4c0f55f7f5869a374b4ec3826bccbc (patch)
tree4be899992389661b5cd60f2f067e39e719577430
parent295c7a908cfeecc7276f559ff53282a177f4eb66 (diff)
downloadgem5-090496bf2d4c0f55f7f5869a374b4ec3826bccbc.tar.xz
Fixes to get new CPU model working for simple test case. The CPU does not yet support retrying accesses.
src/cpu/base_dyn_inst.cc: Delete the allocated data in destructor. src/cpu/base_dyn_inst.hh: Only copy the addresses if the translation succeeded. src/cpu/o3/alpha_cpu.hh: Return actual translating port. Don't panic on setNextNPC() as it's always called, regardless of the architecture, when the process initializes. src/cpu/o3/alpha_cpu_impl.hh: Pass in memobject to the thread state in SE mode. src/cpu/o3/commit_impl.hh: Initialize all variables. src/cpu/o3/decode_impl.hh: Handle early resolution of branches properly. src/cpu/o3/fetch.hh: Switch structure back to requests. src/cpu/o3/fetch_impl.hh: Initialize all variables, create/delete requests properly. src/cpu/o3/lsq_unit.hh: Include sender state along with the packet. Also include a more generic writeback event that's only used for stores forwarding data to loads. src/cpu/o3/lsq_unit_impl.hh: Redo writeback code to support the response path of the memory system. src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/mem_dep_unit_impl.hh: Wrap variables in #ifdefs. src/cpu/o3/store_set.cc: Include to get panic() function. src/cpu/o3/thread_state.hh: Create with MemObject as well. src/cpu/thread_state.hh: Have a translating port in the thread state object. src/python/m5/objects/AlphaFullCPU.py: Mem parameter no longer needed. --HG-- extra : convert_revision : a99381fb25cb183322882ce20935a6f3d1f2b64d
-rw-r--r--src/cpu/base_dyn_inst.cc8
-rw-r--r--src/cpu/base_dyn_inst.hh15
-rw-r--r--src/cpu/o3/alpha_cpu.hh4
-rw-r--r--src/cpu/o3/alpha_cpu_impl.hh5
-rw-r--r--src/cpu/o3/commit_impl.hh2
-rw-r--r--src/cpu/o3/decode_impl.hh5
-rw-r--r--src/cpu/o3/fetch.hh4
-rw-r--r--src/cpu/o3/fetch_impl.hh30
-rw-r--r--src/cpu/o3/lsq_unit.hh61
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh164
-rw-r--r--src/cpu/o3/mem_dep_unit.cc2
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh14
-rw-r--r--src/cpu/o3/store_set.cc1
-rw-r--r--src/cpu/o3/thread_state.hh11
-rw-r--r--src/cpu/thread_state.hh26
-rw-r--r--src/python/m5/objects/AlphaFullCPU.py3
16 files changed, 229 insertions, 126 deletions
diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc
index 66e425d5c..a62930560 100644
--- a/src/cpu/base_dyn_inst.cc
+++ b/src/cpu/base_dyn_inst.cc
@@ -96,12 +96,14 @@ void
BaseDynInst<Impl>::initVars()
{
req = NULL;
+ memData = NULL;
effAddr = 0;
physEffAddr = 0;
storeSize = 0;
readyRegs = 0;
+ // May want to turn this into a bit vector or something.
completed = false;
resultReady = false;
canIssue = false;
@@ -161,7 +163,11 @@ template <class Impl>
BaseDynInst<Impl>::~BaseDynInst()
{
if (req) {
- req = NULL;
+ delete req;
+ }
+
+ if (memData) {
+ delete [] memData;
}
if (traceData) {
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index a250427ce..1f2b44e02 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -660,11 +660,11 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
fault = cpu->translateDataReadReq(req);
- effAddr = req->getVaddr();
- physEffAddr = req->getPaddr();
- memReqFlags = req->getFlags();
-
if (fault == NoFault) {
+ effAddr = req->getVaddr();
+ physEffAddr = req->getPaddr();
+ memReqFlags = req->getFlags();
+
#if FULL_SYSTEM
if (cpu->system->memctrl->badaddr(physEffAddr)) {
fault = TheISA::genMachineCheckFault();
@@ -715,11 +715,10 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
fault = cpu->translateDataWriteReq(req);
- effAddr = req->getVaddr();
- physEffAddr = req->getPaddr();
- memReqFlags = req->getFlags();
-
if (fault == NoFault) {
+ effAddr = req->getVaddr();
+ physEffAddr = req->getPaddr();
+ memReqFlags = req->getFlags();
#if FULL_SYSTEM
if (cpu->system->memctrl->badaddr(physEffAddr)) {
fault = TheISA::genMachineCheckFault();
diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh
index 2e5c856a8..3c16c3b2e 100644
--- a/src/cpu/o3/alpha_cpu.hh
+++ b/src/cpu/o3/alpha_cpu.hh
@@ -96,7 +96,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
/** Reads this CPU's ID. */
virtual int readCpuId() { return cpu->cpu_id; }
- virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
+ virtual TranslatingPort *getMemPort() { return thread->port; }
#if FULL_SYSTEM
/** Returns a pointer to the system. */
@@ -226,7 +226,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
}
virtual void setNextNPC(uint64_t val)
- { panic("Alpha has no NextNPC!"); }
+ { }
/** Reads a miscellaneous register. */
virtual MiscReg readMiscReg(int misc_reg)
diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh
index ad4401f7e..7c136638d 100644
--- a/src/cpu/o3/alpha_cpu_impl.hh
+++ b/src/cpu/o3/alpha_cpu_impl.hh
@@ -73,7 +73,8 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
if (i < params->workload.size()) {
DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x",
i, this->thread[i]);
- this->thread[i] = new Thread(this, i, params->workload[i], i);
+ this->thread[i] = new Thread(this, i, params->workload[i],
+ i, params->mem);
this->thread[i]->setStatus(ExecContext::Suspended);
//usedTids[i] = true;
@@ -83,7 +84,7 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
//when scheduling threads to CPU
Process* dummy_proc = NULL;
- this->thread[i] = new Thread(this, i, dummy_proc, i);
+ this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem);
//usedTids[i] = false;
}
#endif // !FULL_SYSTEM
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index f8a252b87..629acb310 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -75,6 +75,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
iewWidth(params->executeWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads),
+ switchPending(false),
switchedOut(false),
trapLatency(params->trapLatency),
fetchTrapLatency(params->fetchTrapLatency)
@@ -115,6 +116,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
changedROBNumEntries[i] = false;
trapSquash[i] = false;
xcSquash[i] = false;
+ PC[i] = nextPC[i] = 0;
}
fetchFaultTick = 0;
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 64b04bc3d..8a6ea6626 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -280,7 +280,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].squash = true;
- toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
+ toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
toFetch->decodeInfo[tid].branchTaken =
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
@@ -723,9 +723,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
- inst->setNextPC(inst->branchTarget());
- if (inst->mispredicted()) {
+ if (inst->branchTarget() != inst->readPredTarg()) {
++decodeBranchMispred;
// Might want to set some sort of boolean and just do
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 23328c534..9e8aeb8fb 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -323,8 +323,8 @@ class DefaultFetch
/** Per-thread next PC. */
Addr nextPC[Impl::MaxThreads];
- /** Memory packet used to access cache. */
- PacketPtr memPkt[Impl::MaxThreads];
+ /** Memory request used to access cache. */
+ RequestPtr memReq[Impl::MaxThreads];
/** Variable that tracks if fetch has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 69c43a6a2..84f2c3b7e 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -105,7 +105,8 @@ DefaultFetch<Impl>::IcachePort::recvRetry()
template<class Impl>
DefaultFetch<Impl>::DefaultFetch(Params *params)
- : branchPred(params),
+ : mem(params->mem),
+ branchPred(params),
decodeToFetchDelay(params->decodeToFetchDelay),
renameToFetchDelay(params->renameToFetchDelay),
iewToFetchDelay(params->iewToFetchDelay),
@@ -113,7 +114,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
fetchWidth(params->fetchWidth),
numThreads(params->numberOfThreads),
numFetchingThreads(params->smtNumFetchingThreads),
- interruptPending(false)
+ interruptPending(false),
+ switchedOut(false)
{
if (numThreads > Impl::MaxThreads)
fatal("numThreads is not a valid value\n");
@@ -161,7 +163,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
priorityList.push_back(tid);
- memPkt[tid] = NULL;
+ memReq[tid] = NULL;
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
@@ -283,6 +285,10 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
// Name is finally available, so create the port.
icachePort = new IcachePort(this);
+ Port *mem_dport = mem->getPort("");
+ icachePort->setPeer(mem_dport);
+ mem_dport->setPeer(icachePort);
+
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
@@ -355,10 +361,12 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
// Only change the status if it's still waiting on the icache access
// to return.
if (fetchStatus[tid] != IcacheWaitResponse ||
- pkt != memPkt[tid] ||
+ pkt->req != memReq[tid] ||
isSwitchedOut()) {
++fetchIcacheSquashes;
+ delete pkt->req;
delete pkt;
+ memReq[tid] = NULL;
return;
}
@@ -383,7 +391,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
// Reset the mem req to NULL.
delete pkt->req;
delete pkt;
- memPkt[tid] = NULL;
+ memReq[tid] = NULL;
}
template <class Impl>
@@ -514,7 +522,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags,
fetch_PC, cpu->readCpuId(), tid);
- memPkt[tid] = NULL;
+ memReq[tid] = mem_req;
// Translate the instruction request.
//#if FULL_SYSTEM
@@ -565,6 +573,9 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
"response.\n", tid);
fetchStatus[tid] = IcacheWaitResponse;
+ } else {
+ delete mem_req;
+ memReq[tid] = NULL;
}
ret_fault = fault;
@@ -585,8 +596,9 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
if (fetchStatus[tid] == IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
tid);
- delete memPkt[tid];
- memPkt[tid] = NULL;
+ // Should I delete this here or when it comes back from the cache?
+// delete memReq[tid];
+ memReq[tid] = NULL;
}
fetchStatus[tid] = Squashing;
@@ -1083,7 +1095,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
- fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
+ warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#endif // FULL_SYSTEM
}
}
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 393d8947d..414309679 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -130,8 +130,6 @@ class LSQUnit {
void completeDataAccess(PacketPtr pkt);
- void completeStoreDataAccess(DynInstPtr &inst);
-
// @todo: Include stats in the LSQ unit.
//void regStats();
@@ -206,10 +204,12 @@ class LSQUnit {
/** Returns if the LSQ unit will writeback on this cycle. */
bool willWB() { return storeQueue[storeWBIdx].canWB &&
- !storeQueue[storeWBIdx].completed/* &&
- !dcacheInterface->isBlocked()*/; }
+ !storeQueue[storeWBIdx].completed &&
+ !isStoreBlocked; }
private:
+ void writeback(DynInstPtr &inst, PacketPtr pkt);
+
/** Completes the store at the specified index. */
void completeStore(int store_idx);
@@ -265,9 +265,43 @@ class LSQUnit {
/** Pointer to the D-cache. */
DcachePort *dcachePort;
+ class LSQSenderState : public Packet::SenderState
+ {
+ public:
+ LSQSenderState()
+ : noWB(false)
+ { }
+
+// protected:
+ DynInstPtr inst;
+ bool isLoad;
+ int idx;
+ bool noWB;
+ };
+
/** Pointer to the page table. */
// PageTable *pTable;
+ class WritebackEvent : public Event {
+ public:
+ /** Constructs a writeback event. */
+ WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
+
+ /** Processes the writeback event. */
+ void process();
+
+ /** Returns the description of this event. */
+ const char *description();
+
+ private:
+ DynInstPtr inst;
+
+ PacketPtr pkt;
+
+ /** The pointer to the LSQ unit that issued the store. */
+ LSQUnit<Impl> *lsqPtr;
+ };
+
public:
struct SQEntry {
/** Constructs an empty store queue entry. */
@@ -362,6 +396,8 @@ class LSQUnit {
/** The index of the above store. */
int stallingLoadIdx;
+ bool isStoreBlocked;
+
/** Whether or not a load is blocked due to the memory system. */
bool isLoadBlocked;
@@ -521,16 +557,17 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
"addr %#x, data %#x\n",
store_idx, req->getVaddr(), *(load_inst->memData));
-/*
- typename LdWritebackEvent *wb =
- new typename LdWritebackEvent(load_inst,
- iewStage);
+
+ PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(load_inst->memData);
+
+ WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
// We'll say this has a 1 cycle load-store forwarding latency
// for now.
// @todo: Need to make this a parameter.
wb->schedule(curTick);
-*/
+
// Should keep track of stat for forwarded data
return NoFault;
} else if ((store_has_lower_limit && lower_load_has_store_part) ||
@@ -585,6 +622,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
data_pkt->dataStatic(load_inst->memData);
+ LSQSenderState *state = new LSQSenderState;
+ state->isLoad = true;
+ state->idx = load_idx;
+ state->inst = load_inst;
+ data_pkt->senderState = state;
+
// if we have a cache, do cache access too
if (!dcachePort->sendTiming(data_pkt)) {
// There's an older load that's already going to squash.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 1ad561dc0..5398426e2 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -32,65 +32,57 @@
#include "mem/request.hh"
template<class Impl>
-void
-LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
+LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
+ LSQUnit *lsq_ptr)
+ : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
{
-/*
- DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
- DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
- //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
- if (iewStage->isSwitchedOut()) {
- inst = NULL;
- return;
- } else if (inst->isSquashed()) {
- iewStage->wakeCPU();
- inst = NULL;
- return;
- }
-
- iewStage->wakeCPU();
-
- if (!inst->isExecuted()) {
- inst->setExecuted();
+ this->setFlags(Event::AutoDelete);
+}
- // Complete access to copy data to proper place.
- inst->completeAcc();
+template<class Impl>
+void
+LSQUnit<Impl>::WritebackEvent::process()
+{
+ if (!lsqPtr->isSwitchedOut()) {
+ lsqPtr->writeback(inst, pkt);
}
+ delete pkt;
+}
- // Need to insert instruction into queue to commit
- iewStage->instToCommit(inst);
-
- iewStage->activityThisCycle();
-
- inst = NULL;
-*/
+template<class Impl>
+const char *
+LSQUnit<Impl>::WritebackEvent::description()
+{
+ return "Store writeback event";
}
template<class Impl>
void
-LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
+LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
{
-/*
- DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx);
- DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx);
+ LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
+ DynInstPtr inst = state->inst;
+ DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum);
+// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
- //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
-
- if (lsqPtr->isSwitchedOut()) {
- if (wbEvent)
- delete wbEvent;
+ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+ if (isSwitchedOut() || inst->isSquashed()) {
+ delete state;
+ delete pkt;
return;
- }
+ } else {
+ if (!state->noWB) {
+ writeback(inst, pkt);
+ }
- lsqPtr->cpu->wakeCPU();
+ if (inst->isStore()) {
+ completeStore(state->idx);
+ }
+ }
- if (wb)
- lsqPtr->completeDataAccess(storeIdx);
- lsqPtr->completeStore(storeIdx);
-*/
+ delete state;
+ delete pkt;
}
template <class Impl>
@@ -146,7 +138,8 @@ LSQUnit<Impl>::DcachePort::recvRetry()
template <class Impl>
LSQUnit<Impl>::LSQUnit()
- : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+ : loads(0), stores(0), storesToWB(0), stalled(false),
+ isStoreBlocked(false), isLoadBlocked(false),
loadBlockedHandled(false)
{
}
@@ -176,9 +169,7 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
usedPorts = 0;
cachePorts = params->cachePorts;
- Port *mem_dport = params->mem->getPort("");
- dcachePort->setPeer(mem_dport);
- mem_dport->setPeer(dcachePort);
+ mem = params->mem;
memDepViolator = NULL;
@@ -191,6 +182,10 @@ LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr)
{
cpu = cpu_ptr;
dcachePort = new DcachePort(cpu, this);
+
+ Port *mem_dport = mem->getPort("");
+ dcachePort->setPeer(mem_dport);
+ mem_dport->setPeer(dcachePort);
}
template<class Impl>
@@ -446,7 +441,6 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
int load_idx = store_inst->lqIdx;
Fault store_fault = store_inst->initiateAcc();
-// Fault store_fault = store_inst->execute();
if (storeQueue[store_idx].size == 0) {
DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
@@ -562,6 +556,12 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].canWB &&
usedPorts < cachePorts) {
+ if (isStoreBlocked) {
+ DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
+ " is blocked!\n");
+ break;
+ }
+
// Store didn't write any data so no need to write it back to
// memory.
if (storeQueue[storeWBIdx].size == 0) {
@@ -571,13 +571,7 @@ LSQUnit<Impl>::writebackStores()
continue;
}
-/*
- if (dcacheInterface && dcacheInterface->isBlocked()) {
- DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
- " is blocked!\n");
- break;
- }
-*/
+
++usedPorts;
if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
@@ -596,11 +590,18 @@ LSQUnit<Impl>::writebackStores()
assert(!inst->memData);
inst->memData = new uint8_t[64];
- memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize());
+ memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data,
+ req->getSize());
PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
data_pkt->dataStatic(inst->memData);
+ LSQSenderState *state = new LSQSenderState;
+ state->isLoad = false;
+ state->idx = storeWBIdx;
+ state->inst = inst;
+ data_pkt->senderState = state;
+
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
@@ -609,11 +610,8 @@ LSQUnit<Impl>::writebackStores()
if (!dcachePort->sendTiming(data_pkt)) {
// Need to handle becoming blocked on a store.
+ isStoreBlocked = true;
} else {
- /*
- StoreCompletionEvent *store_event = new
- StoreCompletionEvent(storeWBIdx, NULL, this);
- */
if (isStalled() &&
storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
@@ -623,18 +621,13 @@ LSQUnit<Impl>::writebackStores()
stallingStoreIsn = 0;
iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
}
-/*
- typename LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
- // Stx_C should not generate a system port transaction
- // if it misses in the cache, but that might be hard
- // to accomplish without explicit cache support.
- wb = new typename
- LdWritebackEvent(storeQueue[storeWBIdx].inst,
- iewStage);
- store_event->wbEvent = wb;
+
+ if (!(req->getFlags() & LOCKED)) {
+ assert(!storeQueue[storeWBIdx].inst->isStoreConditional());
+ // Non-store conditionals do not need a writeback.
+ state->noWB = true;
}
-*/
+
if (data_pkt->result != Packet::Success) {
DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
storeWBIdx);
@@ -761,6 +754,31 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
template <class Impl>
void
+LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
+{
+ iewStage->wakeCPU();
+
+ // Squashed instructions do not need to complete their access.
+ if (inst->isSquashed()) {
+ assert(!inst->isStore());
+ return;
+ }
+
+ if (!inst->isExecuted()) {
+ inst->setExecuted();
+
+ // Complete access to copy data to proper place.
+ inst->completeAcc(pkt);
+ }
+
+ // Need to insert instruction into queue to commit
+ iewStage->instToCommit(inst);
+
+ iewStage->activityThisCycle();
+}
+
+template <class Impl>
+void
LSQUnit<Impl>::completeStore(int store_idx)
{
assert(storeQueue[store_idx].inst);
diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc
index 1284361cc..a95103266 100644
--- a/src/cpu/o3/mem_dep_unit.cc
+++ b/src/cpu/o3/mem_dep_unit.cc
@@ -37,6 +37,7 @@
// AlphaSimpleImpl.
template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
+#ifdef DEBUG
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
@@ -46,3 +47,4 @@ MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
template <>
int
MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
+#endif
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 50ad1e2c8..16f67a4e0 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -61,7 +61,9 @@ MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
}
}
+#ifdef DEBUG
assert(MemDepEntry::memdep_count == 0);
+#endif
}
template <class MemDepPred, class Impl>
@@ -143,7 +145,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
instList[tid].push_back(inst);
@@ -229,7 +233,9 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
// Insert the MemDepEntry into the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
// Add the instruction to the list.
instList[tid].push_back(inst);
@@ -277,7 +283,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
+#ifdef DEBUG
MemDepEntry::memdep_insert++;
+#endif
// Add the instruction to the instruction list.
instList[tid].push_back(barr_inst);
@@ -377,7 +385,9 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
+#ifdef DEBUG
MemDepEntry::memdep_erase++;
+#endif
}
template <class MemDepPred, class Impl>
@@ -472,7 +482,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
+#ifdef DEBUG
MemDepEntry::memdep_erase++;
+#endif
instList[tid].erase(squash_it--);
}
@@ -553,5 +565,7 @@ MemDepUnit<MemDepPred, Impl>::dumpLists()
cprintf("Memory dependence hash size: %i\n", memDepHash.size());
+#ifdef DEBUG
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
+#endif
}
diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc
index 0023cee36..2d28b617f 100644
--- a/src/cpu/o3/store_set.cc
+++ b/src/cpu/o3/store_set.cc
@@ -29,6 +29,7 @@
*/
#include "base/intmath.hh"
+#include "base/misc.hh"
#include "base/trace.hh"
#include "cpu/o3/store_set.hh"
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index dfb1530d0..3fa60f093 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -86,14 +86,9 @@ struct O3ThreadState : public ThreadState {
inSyscall(0), trapPending(0)
{ }
#else
- O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
- : ThreadState(-1, _thread_num, NULL, _process, _asid),
- cpu(_cpu), inSyscall(0), trapPending(0)
- { }
-
- O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
- int _asid)
- : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+ O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid,
+ MemObject *mem)
+ : ThreadState(-1, _thread_num, mem, _process, _asid),
cpu(_cpu), inSyscall(0), trapPending(0)
{ }
#endif
diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh
index e09cb12fd..a96884d5b 100644
--- a/src/cpu/thread_state.hh
+++ b/src/cpu/thread_state.hh
@@ -31,6 +31,10 @@
#include "cpu/exec_context.hh"
+#if !FULL_SYSTEM
+#include "mem/translating_port.hh"
+#endif
+
#if FULL_SYSTEM
class EndQuiesceEvent;
class FunctionProfile;
@@ -51,17 +55,27 @@ class Process;
*/
struct ThreadState {
#if FULL_SYSTEM
- ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem)
- : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0),
+ ThreadState(int _cpuId, int _tid)
+ : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0),
profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL)
#else
- ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem,
+ ThreadState(int _cpuId, int _tid, MemObject *mem,
Process *_process, short _asid)
- : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid)
+ : cpuId(_cpuId), tid(_tid), process(_process), asid(_asid)
#endif
{
funcExeInst = 0;
storeCondFailures = 0;
+#if !FULL_SYSTEM
+ /* Use this port to for syscall emulation writes to memory. */
+ Port *mem_port;
+ port = new TranslatingPort(csprintf("%d-funcport",
+ tid),
+ process->pTable, false);
+ mem_port = mem->getPort("functional");
+ mem_port->setPeer(port);
+ port->setPeer(mem_port);
+#endif
}
ExecContext::Status status;
@@ -79,8 +93,6 @@ struct ThreadState {
Counter numLoad;
Counter startNumLoad;
- FunctionalMemory *mem; // functional storage for process address space
-
#if FULL_SYSTEM
Tick lastActivate;
Tick lastSuspend;
@@ -93,6 +105,8 @@ struct ThreadState {
Kernel::Statistics *kernelStats;
#else
+ TranslatingPort *port;
+
Process *process;
// Address space ID. Note that this is used for TIMING cache
diff --git a/src/python/m5/objects/AlphaFullCPU.py b/src/python/m5/objects/AlphaFullCPU.py
index 043c3c08f..7c772d3f2 100644
--- a/src/python/m5/objects/AlphaFullCPU.py
+++ b/src/python/m5/objects/AlphaFullCPU.py
@@ -6,9 +6,6 @@ class DerivAlphaFullCPU(BaseCPU):
activity = Param.Unsigned("Initial count")
numThreads = Param.Unsigned("number of HW thread contexts")
- if not build_env['FULL_SYSTEM']:
- mem = Param.FunctionalMemory(NULL, "memory")
-
checker = Param.BaseCPU(NULL, "checker")
cachePorts = Param.Unsigned("Cache Ports")