diff options
author | Mitch Hayenga <mitch.hayenga@arm.com> | 2016-07-21 17:19:16 +0100 |
---|---|---|
committer | Mitch Hayenga <mitch.hayenga@arm.com> | 2016-07-21 17:19:16 +0100 |
commit | ff4009ac005be0347015f8ba5a8e37a3aa930e69 (patch) | |
tree | b80cfa7c70c0e39f54c8c3d78527722cb6658510 /src/cpu/minor/lsq.cc | |
parent | 8a476d387c84f037d0ccf3cc20dc88870ab45fec (diff) | |
download | gem5-ff4009ac005be0347015f8ba5a8e37a3aa930e69.tar.xz |
cpu: Add SMT support to MinorCPU
This patch adds SMT support to the MinorCPU. Currently
RoundRobin or Random thread scheduling are supported.
Change-Id: I91faf39ff881af5918cca05051829fc6261f20e3
Diffstat (limited to 'src/cpu/minor/lsq.cc')
-rw-r--r-- | src/cpu/minor/lsq.cc | 56 |
1 files changed, 46 insertions, 10 deletions
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index b5c0bc974..5995a52c2 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -216,13 +216,14 @@ operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) void LSQ::clearMemBarrier(MinorDynInstPtr inst) { - bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier; + bool is_last_barrier = + inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId]; DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", (is_last_barrier ? "last" : "a"), *inst); if (is_last_barrier) - lastMemBarrier = 0; + lastMemBarrier[inst->id.threadId] = 0; } void @@ -676,7 +677,8 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, while (ret == NoAddrRangeCoverage && i != slots.rend()) { LSQRequestPtr slot = *i; - if (slot->packet) { + if (slot->packet && + slot->inst->id.threadId == request->inst->id.threadId) { AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); if (coverage != NoAddrRangeCoverage) { @@ -1042,8 +1044,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request) request->issuedToMemory = true; } - if (tryToSend(request)) + if (tryToSend(request)) { moveFromRequestsToTransfers(request); + } } else { request->setState(LSQRequest::Complete); moveFromRequestsToTransfers(request); @@ -1145,6 +1148,9 @@ LSQ::tryToSend(LSQRequestPtr request) } } + if (ret) + threadSnoop(request); + return ret; } @@ -1293,7 +1299,7 @@ LSQ::LSQ(std::string name_, std::string dcache_port_name_, cpu(cpu_), execute(execute_), dcachePort(dcache_port_name_, *this, cpu_), - lastMemBarrier(0), + lastMemBarrier(cpu.numThreads, 0), state(MemoryRunning), inMemorySystemLimit(in_memory_system_limit), lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), @@ -1526,7 +1532,7 @@ LSQ::minorTrace() const MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" " lastMemBarrier=%d\n", state, numAccessesInDTLB, numAccessesInMemorySystem, - numStoresInTransfers, lastMemBarrier); + numStoresInTransfers, lastMemBarrier[0]); requests.minorTrace(); transfers.minorTrace(); storeBuffer.minorTrace(); @@ -1565,12 +1571,12 @@ void LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) { assert(inst->isInst() && inst->staticInst->isMemBarrier()); - assert(inst->id.execSeqNum > lastMemBarrier); + assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]); /* Remember the barrier. We only have a notion of one * barrier so this may result in some mem refs being * delayed if they are between barriers */ - lastMemBarrier = inst->id.execSeqNum; + lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum; } void @@ -1616,10 +1622,40 @@ LSQ::recvTimingSnoopReq(PacketPtr pkt) /* LLSC operations in Minor can't be speculative and are executed from * the head of the requests queue. We shouldn't need to do more than * this action on snoops. */ + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu.wakeup(tid); + } + } - /* THREAD */ if (pkt->isInvalidate() || pkt->isWrite()) { - TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask); + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, + cacheBlockMask); + } + } +} + +void +LSQ::threadSnoop(LSQRequestPtr request) +{ + /* LLSC operations in Minor can't be speculative and are executed from + * the head of the requests queue. We shouldn't need to do more than + * this action on snoops. */ + ThreadID req_tid = request->inst->id.threadId; + PacketPtr pkt = request->packet; + + for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { + if (tid != req_tid) { + if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu.wakeup(tid); + } + + if (pkt->isInvalidate() || pkt->isWrite()) { + TheISA::handleLockedSnoop(cpu.getContext(tid), pkt, + cacheBlockMask); + } + } } } |