summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/o3/cpu.cc16
-rw-r--r--src/cpu/o3/cpu.hh2
-rw-r--r--src/cpu/o3/fetch.hh3
-rw-r--r--src/cpu/o3/fetch_impl.hh8
-rw-r--r--src/cpu/o3/lsq.hh59
-rw-r--r--src/cpu/o3/lsq_impl.hh53
-rw-r--r--src/cpu/o3/lsq_unit.hh88
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh51
8 files changed, 158 insertions, 122 deletions
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 7d2727401..6e9b425c0 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -441,7 +441,7 @@ FullO3CPU<Impl>::tick()
if (!tickEvent.scheduled()) {
if (_status == SwitchedOut ||
- getState() == SimObject::DrainedTiming) {
+ getState() == SimObject::Drained) {
// increment stat
lastRunningCycle = curTick;
} else if (!activityRec.active()) {
@@ -803,7 +803,7 @@ FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
}
template <class Impl>
-bool
+unsigned int
FullO3CPU<Impl>::drain(Event *drain_event)
{
drainCount = 0;
@@ -815,7 +815,7 @@ FullO3CPU<Impl>::drain(Event *drain_event)
// Wake the CPU and record activity so everything can drain out if
// the CPU was not able to immediately drain.
- if (getState() != SimObject::DrainedTiming) {
+ if (getState() != SimObject::Drained) {
// A bit of a hack...set the drainEvent after all the drain()
// calls have been made, that way if all of the stages drain
// immediately, the signalDrained() function knows not to call
@@ -825,9 +825,9 @@ FullO3CPU<Impl>::drain(Event *drain_event)
wakeCPU();
activityRec.activity();
- return false;
+ return 1;
} else {
- return true;
+ return 0;
}
}
@@ -835,19 +835,21 @@ template <class Impl>
void
FullO3CPU<Impl>::resume()
{
+ assert(system->getMemoryMode() == System::Timing);
fetch.resume();
decode.resume();
rename.resume();
iew.resume();
commit.resume();
+ changeState(SimObject::Running);
+
if (_status == SwitchedOut || _status == Idle)
return;
if (!tickEvent.scheduled())
tickEvent.schedule(curTick);
_status = Running;
- changeState(SimObject::Timing);
}
template <class Impl>
@@ -858,7 +860,7 @@ FullO3CPU<Impl>::signalDrained()
if (tickEvent.scheduled())
tickEvent.squash();
- changeState(SimObject::DrainedTiming);
+ changeState(SimObject::Drained);
if (drainEvent) {
drainEvent->process();
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 2fbd013ac..83cb966e3 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -330,7 +330,7 @@ class FullO3CPU : public BaseO3CPU
/** Starts draining the CPU's pipeline of all instructions in
* order to stop all memory accesses. */
- virtual bool drain(Event *drain_event);
+ virtual unsigned int drain(Event *drain_event);
/** Resumes execution after a drain. */
virtual void resume();
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 0331cf07f..931919af8 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -407,6 +407,9 @@ class DefaultFetch
/** The PC of the cacheline that has been loaded. */
Addr cacheDataPC[Impl::MaxThreads];
+ /** Whether or not the cache data is valid. */
+ bool cacheDataValid[Impl::MaxThreads];
+
/** Size of instructions. */
int instSize;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 4045492ca..4184e1867 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -162,6 +162,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
+ cacheDataPC[tid] = 0;
+ cacheDataValid[tid] = false;
stalls[tid].decode = 0;
stalls[tid].rename = 0;
@@ -358,6 +360,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
}
memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
+ cacheDataValid[tid] = true;
if (!drainPending) {
// Wake up the CPU (if it went to sleep and was waiting on
@@ -520,7 +523,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
fetch_PC = icacheBlockAlignPC(fetch_PC);
// If we've already got the block, no need to try to fetch it again.
- if (fetch_PC == cacheDataPC[tid]) {
+ if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
return true;
}
@@ -555,9 +558,10 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
// Build packet here.
PacketPtr data_pkt = new Packet(mem_req,
Packet::ReadReq, Packet::Broadcast);
- data_pkt->dataDynamic(new uint8_t[cacheBlkSize]);
+ data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
cacheDataPC[tid] = fetch_PC;
+ cacheDataValid[tid] = false;
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index d5890950f..190734dc2 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -70,7 +70,7 @@ class LSQ {
* to work. For now it just returns the port from one of the
* threads.
*/
- Port *getDcachePort() { return thread[0].getDcachePort(); }
+ Port *getDcachePort() { return &dcachePort; }
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr);
@@ -258,6 +258,15 @@ class LSQ {
bool willWB(unsigned tid)
{ return thread[tid].willWB(); }
+ /** Returns if the cache is currently blocked. */
+ bool cacheBlocked()
+ { return retryTid != -1; }
+
+ /** Sets the retry thread id, indicating that one of the LSQUnits
+ * tried to access the cache but the cache was blocked. */
+ void setRetryTid(int tid)
+ { retryTid = tid; }
+
/** Debugging function to print out all instructions. */
void dumpInsts();
/** Debugging function to print out instructions from a specific thread. */
@@ -274,7 +283,49 @@ class LSQ {
template <class T>
Fault write(RequestPtr req, T &data, int store_idx);
- private:
+ /** DcachePort class for this LSQ. Handles doing the
+ * communication with the cache/memory.
+ */
+ class DcachePort : public Port
+ {
+ protected:
+ /** Pointer to LSQ. */
+ LSQ *lsq;
+
+ public:
+ /** Default constructor. */
+ DcachePort(LSQ *_lsq)
+ : lsq(_lsq)
+ { }
+
+ protected:
+ /** Atomic version of receive. Panics. */
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ /** Functional version of receive. Panics. */
+ virtual void recvFunctional(PacketPtr pkt);
+
+ /** Receives status change. Other than range changing, panics. */
+ virtual void recvStatusChange(Status status);
+
+ /** Returns the address ranges of this device. */
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ /** Timing version of receive. Handles writing back and
+ * completing the load or store that has returned from
+ * memory. */
+ virtual bool recvTiming(PacketPtr pkt);
+
+ /** Handles doing a retry of the previous send. */
+ virtual void recvRetry();
+ };
+
+ /** D-cache port. */
+ DcachePort dcachePort;
+
+ protected:
/** The LSQ policy for SMT mode. */
LSQPolicy lsqPolicy;
@@ -303,6 +354,10 @@ class LSQ {
/** Number of Threads. */
unsigned numThreads;
+
+ /** The thread id of the LSQ Unit that is currently waiting for a
+ * retry. */
+ int retryTid;
};
template <class Impl>
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 89fd1a71d..4e3957029 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -36,9 +36,53 @@
using namespace std;
template <class Impl>
+Tick
+LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+ panic("O3CPU model does not work with atomic mode!");
+ return curTick;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+ lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+ return true;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvRetry()
+{
+ lsq->thread[lsq->retryTid].recvRetry();
+ // Speculatively clear the retry Tid. This will get set again if
+ // the LSQUnit was unable to complete its access.
+ lsq->retryTid = -1;
+}
+
+template <class Impl>
LSQ<Impl>::LSQ(Params *params)
- : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
- numThreads(params->numberOfThreads)
+ : dcachePort(this), LQEntries(params->LQEntries),
+ SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
+ retryTid(-1)
{
DPRINTF(LSQ, "Creating LSQ object.\n");
@@ -94,7 +138,8 @@ LSQ<Impl>::LSQ(Params *params)
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
- thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+ thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+ thread[tid].setDcachePort(&dcachePort);
}
}
@@ -130,6 +175,8 @@ LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
{
cpu = cpu_ptr;
+ dcachePort.setName(name());
+
for (int tid=0; tid < numThreads; tid++) {
thread[tid].setCPU(cpu_ptr);
}
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 4d7a8350b..a76a73f0c 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -64,6 +64,7 @@ class LSQUnit {
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::LSQ LSQ;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
public:
@@ -71,17 +72,12 @@ class LSQUnit {
LSQUnit();
/** Initializes the LSQ unit with the specified number of entries. */
- void init(Params *params, unsigned maxLQEntries,
+ void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id);
/** Returns the name of the LSQ unit. */
std::string name() const;
- /** Returns the dcache port.
- * @todo: Remove this once the port moves up to the LSQ level.
- */
- Port *getDcachePort() { return dcachePort; }
-
/** Registers statistics. */
void regStats();
@@ -92,6 +88,10 @@ class LSQUnit {
void setIEW(IEW *iew_ptr)
{ iewStage = iew_ptr; }
+ /** Sets the pointer to the dcache port. */
+ void setDcachePort(Port *dcache_port)
+ { dcachePort = dcache_port; }
+
/** Switches out LSQ unit. */
void switchOut();
@@ -211,6 +211,9 @@ class LSQUnit {
!storeQueue[storeWBIdx].completed &&
!isStoreBlocked; }
+ /** Handles doing the retry. */
+ void recvRetry();
+
private:
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
@@ -221,9 +224,6 @@ class LSQUnit {
/** Completes the store at the specified index. */
void completeStore(int store_idx);
- /** Handles doing the retry. */
- void recvRetry();
-
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx);
/** Decrements the given store index (circular queue). */
@@ -244,54 +244,11 @@ class LSQUnit {
/** Pointer to the IEW stage. */
IEW *iewStage;
- /** Pointer to memory object. */
- MemObject *mem;
+ /** Pointer to the LSQ. */
+ LSQ *lsq;
- /** DcachePort class for this LSQ Unit. Handles doing the
- * communication with the cache/memory.
- * @todo: Needs to be moved to the LSQ level and have some sort
- * of arbitration.
- */
- class DcachePort : public Port
- {
- protected:
- /** Pointer to CPU. */
- O3CPU *cpu;
- /** Pointer to LSQ. */
- LSQUnit *lsq;
-
- public:
- /** Default constructor. */
- DcachePort(O3CPU *_cpu, LSQUnit *_lsq)
- : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
- { }
-
- protected:
- /** Atomic version of receive. Panics. */
- virtual Tick recvAtomic(PacketPtr pkt);
-
- /** Functional version of receive. Panics. */
- virtual void recvFunctional(PacketPtr pkt);
-
- /** Receives status change. Other than range changing, panics. */
- virtual void recvStatusChange(Status status);
-
- /** Returns the address ranges of this device. */
- virtual void getDeviceAddressRanges(AddrRangeList &resp,
- AddrRangeList &snoop)
- { resp.clear(); snoop.clear(); }
-
- /** Timing version of receive. Handles writing back and
- * completing the load or store that has returned from
- * memory. */
- virtual bool recvTiming(PacketPtr pkt);
-
- /** Handles doing a retry of the previous send. */
- virtual void recvRetry();
- };
-
- /** Pointer to the D-cache. */
- DcachePort *dcachePort;
+ /** Pointer to the dcache port. Used only for sending. */
+ Port *dcachePort;
/** Derived class to hold any sender state the LSQ needs. */
class LSQSenderState : public Packet::SenderState
@@ -658,7 +615,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
}
// If there's no forwarding case, then go access memory
- DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+ DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
load_inst->seqNum, load_inst->readPC());
assert(!load_inst->memData);
@@ -666,9 +623,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
++usedPorts;
- DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
- load_inst->readPC());
-
PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
data_pkt->dataStatic(load_inst->memData);
@@ -678,8 +632,18 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
state->inst = load_inst;
data_pkt->senderState = state;
- // if we have a cache, do cache access too
- if (!dcachePort->sendTiming(data_pkt)) {
+ // if we the cache is not blocked, do cache access
+ if (!lsq->cacheBlocked()) {
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // If the access didn't succeed, tell the LSQ by setting
+ // the retry thread id.
+ lsq->setRetryTid(lsqID);
+ }
+ }
+
+ // If the cache was blocked, or has become blocked due to the access,
+ // handle it.
+ if (lsq->cacheBlocked()) {
++lsqCacheBlocked;
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 8e951534f..85b150cd9 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -31,6 +31,7 @@
#include "config/use_checker.hh"
+#include "cpu/o3/lsq.hh"
#include "cpu/o3/lsq_unit.hh"
#include "base/str.hh"
#include "mem/packet.hh"
@@ -96,46 +97,6 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
template <class Impl>
-Tick
-LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
-{
- panic("O3CPU model does not work with atomic mode!");
- return curTick;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
-{
- panic("O3CPU doesn't expect recvFunctional callback!");
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
-{
- if (status == RangeChange)
- return;
-
- panic("O3CPU doesn't expect recvStatusChange callback!");
-}
-
-template <class Impl>
-bool
-LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
-{
- lsq->completeDataAccess(pkt);
- return true;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::DcachePort::recvRetry()
-{
- lsq->recvRetry();
-}
-
-template <class Impl>
LSQUnit<Impl>::LSQUnit()
: loads(0), stores(0), storesToWB(0), stalled(false),
isStoreBlocked(false), isLoadBlocked(false),
@@ -145,13 +106,15 @@ LSQUnit<Impl>::LSQUnit()
template<class Impl>
void
-LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id)
{
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
switchedOut = false;
+ lsq = lsq_ptr;
+
lsqID = id;
// Add 1 for the sentinel entry (they are circular queues).
@@ -168,8 +131,6 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
usedPorts = 0;
cachePorts = params->cachePorts;
- mem = params->mem;
-
memDepViolator = NULL;
blockedLoadSeqNum = 0;
@@ -180,7 +141,6 @@ void
LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
{
cpu = cpu_ptr;
- dcachePort = new DcachePort(cpu, this);
#if USE_CHECKER
if (cpu->checker) {
@@ -588,7 +548,7 @@ LSQUnit<Impl>::writebackStores()
storeQueue[storeWBIdx].canWB &&
usedPorts < cachePorts) {
- if (isStoreBlocked) {
+ if (isStoreBlocked || lsq->cacheBlocked()) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
" is blocked!\n");
break;
@@ -911,6 +871,7 @@ LSQUnit<Impl>::recvRetry()
} else {
// Still blocked!
++lsqCacheBlocked;
+ lsq->setRetryTid(lsqID);
}
} else if (isLoadBlocked) {
DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "