From 2c99e0e616c61bb31c6902fb7d11e10042b5b210 Mon Sep 17 00:00:00 2001 From: Iru Cai Date: Thu, 28 Feb 2019 17:07:16 +0800 Subject: invisispec-1.0 source --- src/cpu/o3/O3CPU.py | 16 +- src/cpu/o3/commit.hh | 17 ++ src/cpu/o3/commit_impl.hh | 62 +++- src/cpu/o3/cpu.cc | 3 + src/cpu/o3/iew_impl.hh | 47 ++- src/cpu/o3/inst_queue_impl.hh | 13 +- src/cpu/o3/lsq.hh | 21 ++ src/cpu/o3/lsq_impl.hh | 50 +++ src/cpu/o3/lsq_unit.hh | 201 +++++++++++- src/cpu/o3/lsq_unit_impl.hh | 697 ++++++++++++++++++++++++++++++++++++++++-- src/cpu/o3/rename_impl.hh | 3 +- src/cpu/o3/rob.hh | 4 + src/cpu/o3/rob_impl.hh | 79 +++++ 13 files changed, 1176 insertions(+), 37 deletions(-) (limited to 'src/cpu/o3') diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index b8152f663..371433eef 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -68,6 +68,10 @@ class DerivO3CPU(BaseCPU): cacheStorePorts = Param.Unsigned(200, "Cache Ports. " "Constrains stores only. Loads are constrained by load FUs.") + # we deal with validation very similar as store writes back + # FIXME: not sure whether it is the correct parameter or not + cacheValidationPorts = Param.Unsigned(200, "Validation Ports. " + "Constrains validations only. Loads are constrained by load FUs.") decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay") renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay") @@ -124,7 +128,7 @@ class DerivO3CPU(BaseCPU): LFSTSize = Param.Unsigned(1024, "Last fetched store table size") SSITSize = Param.Unsigned(1024, "Store set ID table size") - numRobs = Param.Unsigned(1, "Number of Reorder Buffers"); + numRobs = Param.Unsigned(1, "Number of Reorder Buffers") numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers") numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point " @@ -157,10 +161,14 @@ class DerivO3CPU(BaseCPU): smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy") branchPred = Param.BranchPredictor(TournamentBP(numThreads = - Parent.numThreads), + Parent.numThreads), "Branch Predictor") - needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86', - "Enable TSO Memory model") + + # [mengjia] add configuration variables + simulateScheme = Param.String('UnsafeBaseline', + "The scheme specificed for simulation") + needsTSO = Param.Bool(False, "Enable TSO Memory model") + allowSpecBuffHit = Param.Bool(True, "Enable hit/reuse spec buffer entries") def addCheckerCpu(self): if buildEnv['TARGET_ISA'] in ['arm']: diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index f508a372e..7fe4ad731 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -450,6 +450,7 @@ class DefaultCommit /** The sequence number of the last commited instruction. */ InstSeqNum lastCommitedSeqNum[Impl::MaxThreads]; + Tick lastCommitTick; /** Records if there is a trap currently in flight. */ bool trapInFlight[Impl::MaxThreads]; @@ -479,6 +480,9 @@ class DefaultCommit /** Updates commit stats based on this instruction. */ void updateComInstStats(DynInstPtr &inst); + /** [InvisiSpec] Updates squash stats based on this instruction. */ + void updateSquashStats(DynInstPtr &inst); + /** Stat for the total number of squashed instructions discarded by commit. */ Stats::Scalar commitSquashedInsts; @@ -488,6 +492,19 @@ class DefaultCommit Stats::Scalar commitNonSpecStalls; /** Stat for the total number of branch mispredicts that caused a squash. */ Stats::Scalar branchMispredicts; + + // [InvisiSpec] count #squash + /** Stat for the total number of invalidation packets + * that caused a squash. */ + Stats::Scalar loadHitInvalidations; + Stats::Scalar loadHitExternalEvictions; + /** Stat for the total number of failed validations + * that caused a squash. */ + Stats::Scalar loadValidationFails; + // [InvisiSpec] count cycles stall due to waiting for + // validation responses + Stats::Scalar validationStalls; + /** Distribution of the number of committed instructions each cycle. */ Stats::Distribution numCommittedDist; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index d32493cbc..d83011a66 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -143,6 +143,7 @@ DefaultCommit::DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params) lastCommitedSeqNum[tid] = 0; squashAfterInst[tid] = NULL; } + lastCommitTick = curTick(); interrupt = NoFault; } @@ -183,6 +184,28 @@ DefaultCommit::regStats() .desc("The number of times a branch was mispredicted") .prereq(branchMispredicts); + // [InvisiSpec] stat for squash due to invalidation, failed validation + loadHitInvalidations + .name(name() + ".loadHitInvalidations") + .desc("The number of times a load hits a invalidation"); + //.prereq(loadHitInvalidations); + + loadHitExternalEvictions + .name(name() + ".loadHitExternalEvictions") + .desc("The number of times a load hits an external invalidation"); + //.prereq(loadHitInvalidations); + + loadValidationFails + .name(name() + ".loadValidationFails") + .desc("The number of times a load fails validation"); + //.prereq(loadValidationFails); + + validationStalls + .name(name() + ".validationStalls") + .desc("The number of ticks the commit is stalled due to waiting " + "for validation responses"); + //.prereq(loadValidationFails); + numCommittedDist .init(0,commitWidth,1) .name(name() + ".committed_per_cycle") @@ -579,6 +602,9 @@ DefaultCommit::squashAll(ThreadID tid) toIEW->commitInfo[tid].squashInst = NULL; toIEW->commitInfo[tid].pc = pc[tid]; + + //TODO: send a packet to SpecBuffer to indicate flush + // } template @@ -705,13 +731,21 @@ DefaultCommit::tick() } else if (!rob->isEmpty(tid)) { DynInstPtr inst = rob->readHeadInst(tid); + if (inst->isExecuted() && inst->needPostFetch() + && !inst->isExposeCompleted()){ + //stall due to waiting for validation response + if (curTick()-lastCommitTick > 0){ + validationStalls+= curTick()-lastCommitTick; + } + + } ppCommitStall->notify(inst); DPRINTF(Commit,"[tid:%i]: Can't commit, Instruction [sn:%lli] PC " "%s is head of ROB and not ready\n", tid, inst->seqNum, inst->pcState()); } - + lastCommitTick = curTick(); DPRINTF(Commit, "[tid:%i]: ROB has %d insts & %d free entries.\n", tid, rob->countInsts(tid), rob->numFreeEntries(tid)); } @@ -832,6 +866,7 @@ DefaultCommit::commit() squashFromTrap(tid); } else if (tcSquash[tid]) { assert(commitStatus[tid] != TrapPending); + //TC: thread context. [mengjia] squashFromTC(tid); } else if (commitStatus[tid] == SquashAfterPending) { // A squash from the previous cycle of the commit stage (i.e., @@ -1039,6 +1074,7 @@ DefaultCommit::commitInsts() toIEW->commitInfo[tid].doneSeqNum = head_inst->seqNum; if (tid == 0) { + //maybe we can use this to mask interrupts [mengjia] canHandleInterrupts = (!head_inst->isDelayedCommit()) && ((THE_ISA != ALPHA_ISA) || (!(pc[0].instAddr() & 0x3))); @@ -1219,6 +1255,8 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // execution doesn't generate extra squashes. thread[tid]->noSquashFromTC = true; + // [InvisiSpec] update squash stat for invalidation or validation fails + updateSquashStats(head_inst); // Execute the trap. Although it's slightly unrealistic in // terms of timing (as it doesn't wait for the full timing of // the trap event to complete before updating state), it's @@ -1350,6 +1388,7 @@ DefaultCommit::markCompletedInsts() // Grab completed insts out of the IEW instruction queue, and mark // instructions completed within the ROB. for (int inst_num = 0; inst_num < fromIEW->size; ++inst_num) { + DPRINTF(Commit, "get the inst [num:%d]\n", inst_num); assert(fromIEW->insts[inst_num]); if (!fromIEW->insts[inst_num]->isSquashed()) { DPRINTF(Commit, "[tid:%i]: Marking PC %s, [sn:%lli] ready " @@ -1362,6 +1401,27 @@ DefaultCommit::markCompletedInsts() fromIEW->insts[inst_num]->setCanCommit(); } } + + // [InvisiSpec] + // update load status + // isPrevInstsCompleted; isPrevBrsResolved + rob->updateVisibleState(); +} + +// [InvisiSpec] update squash stat for loads +template +void +DefaultCommit::updateSquashStats(DynInstPtr &inst) +{ + if (inst->hitInvalidation()){ + loadHitInvalidations++; + } + if (inst->validationFail()){ + loadValidationFails++; + } + if (inst->hitExternalEviction()){ + loadHitExternalEvictions++; + } } template diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c4bc13fb4..27ad78e2e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -593,6 +593,7 @@ FullO3CPU::tick() activityRec.advance(); + DPRINTF(O3CPU, "activityRec.advance() complete\n"); if (removeInstsThisCycle) { cleanUpRemovedInsts(); } @@ -610,6 +611,8 @@ FullO3CPU::tick() schedule(tickEvent, clockEdge(Cycles(1))); DPRINTF(O3CPU, "Scheduling next tick!\n"); } + } else { + DPRINTF(O3CPU, "tickEvent.scheduled == false, %lu", curTick()); } if (!FullSystem) diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 8270a71b5..063394fdd 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1186,9 +1186,16 @@ DefaultIEW::executeInsts() fetchRedirect[tid] = false; } + // [mengjia] Validate/Expose any loads which are ready last cycle + // very tricky, need make the state consistent + // if we successfully commit sth, then we need to activate the stage or somehow + // problems happen when interacting with squash + // NOTE: we always send validations before execute load requests + ldstQueue.exposeLoads(); + // Uncomment this if you want to see all available instructions. // @todo This doesn't actually work anymore, we should fix it. -// printAvailableInsts(); + // printAvailableInsts(); // Execute/writeback any instructions that are available. int insts_to_execute = fromIssue->size; @@ -1235,18 +1242,40 @@ DefaultIEW::executeInsts() DPRINTF(IEW, "Execute: Calculating address for memory " "reference.\n"); + DPRINTF(IEW, "Execute: %s\n", inst->staticInst->getName()); // Tell the LDSTQ to execute this instruction (if it is a load). if (inst->isLoad()) { // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit + + // [InvisiSpec] a lifetime of a load + // always let it translate --> translation not complete, defer + // if !loadInExec, need to check whether there + // is a virtual fence ahead + // --> if existing virtual fence, defer + if (inst->fenceDelay()){ + DPRINTF(IEW, "Deferring load due to virtual fence.\n"); + inst->onlyWaitForFence(true); + instQueue.deferMemInst(inst); + continue; + } + fault = ldstQueue.executeLoad(inst); - if (inst->isTranslationDelayed() && + // [InvisiSpec] delay the load if there is a virtual fence ahead + if ((inst->isTranslationDelayed() ) && fault == NoFault) { // A hw page table walk is currently going on; the // instruction must be deferred. - DPRINTF(IEW, "Execute: Delayed translation, deferring " - "load.\n"); + DPRINTF(IEW, "Execute: Delayed translation, deferring load.\n"); + instQueue.deferMemInst(inst); + continue; + } + + if ((inst->specTLBMiss() ) && + fault == NoFault) { + DPRINTF(IEW, "Execute: Speculative load gets a TLB miss," + " deferring load.\n"); instQueue.deferMemInst(inst); continue; } @@ -1381,10 +1410,11 @@ DefaultIEW::executeInsts() ++memOrderViolationEvents; } } - } + } // Update and record activity if we processed any instructions. if (inst_num) { + if (exeStatus == Idle) { exeStatus = Running; } @@ -1476,16 +1506,18 @@ DefaultIEW::tick() dispatch(tid); } + ldstQueue.updateVisibleState(); + if (exeStatus != Squashing) { executeInsts(); - + writebackInsts(); // Have the instruction queue try to schedule any ready instructions. // (In actuality, this scheduling is for instructions that will // be executed next cycle.) instQueue.scheduleReadyInsts(); - + // Also should advance its own time buffers if the stage ran. // Not the best place for it, but this works (hopefully). issueToExecQueue.advance(); @@ -1502,6 +1534,7 @@ DefaultIEW::tick() // Writeback any stores using any leftover bandwidth. ldstQueue.writebackStores(); + // Check the committed load/store signals to see if there's a load // or store to commit. Also check if it's being told to execute a diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 84ac5799c..504084165 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -1172,8 +1172,19 @@ InstructionQueue::getDeferredMemInstToExecute() { for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); ++it) { - if ((*it)->translationCompleted() || (*it)->isSquashed()) { + // [InvisiSpec] we need to check the FenceDelay + // a load can be delayed due to + // 1. translation delay + // 2. virtual fence ahead + // 3. not ready to expose and gets a TLB miss + // for both (2, 3) we need to restart the translation + if ( (*it)->translationCompleted() + || ((*it)->onlyWaitForFence() && !(*it)->fenceDelay()) + || ((*it)->onlyWaitForExpose() && (*it)->readyToExpose()) + || (*it)->isSquashed()) { DynInstPtr mem_inst = *it; + mem_inst->onlyWaitForFence(false); + mem_inst->onlyWaitForExpose(false); deferredMemInsts.erase(it); return mem_inst; } diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 7c78156d5..518153990 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -136,6 +136,23 @@ class LSQ { /** Same as above, but only for one thread. */ void writebackStores(ThreadID tid); + + /** [mengjia] + * Attempts to validate loads until all cache ports are used or the + * interface becomes blocked. + */ + int exposeLoads(); + /** Same as above, but only for one thread. */ + int exposeLoads(ThreadID tid); + + /** [mengjia] + * attempt to update FenceDelay state for load insts + */ + void updateVisibleState(); + /** Same as above, but only for one thread. */ + void updateVisibleState(ThreadID tid); + + /** * Squash instructions from a thread until the specified sequence number. */ @@ -257,6 +274,10 @@ class LSQ { int numStoresToWB(ThreadID tid) { return thread[tid].numStoresToWB(); } + /** Returns the number of stores a specific thread has to write back. */ + int numLoadsToVLD(ThreadID tid) + { return thread[tid].numLoadsToVLD(); } + /** Returns if the LSQ will write back to memory this cycle. */ bool willWB(); /** Returns if the LSQ of a specific thread will write back to memory this diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 56b95a5b6..1e78f534e 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -310,6 +310,44 @@ LSQ::writebackStores() } } +// [mengjia] +template +int +LSQ::exposeLoads() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + int exposedLoads = 0; + while (threads != end) { + ThreadID tid = *threads++; + + if (numLoadsToVLD(tid) > 0) { + DPRINTF(Writeback,"[tid:%i] Validate loads. %i loads " + "available for Validate.\n", tid, numLoadsToVLD(tid)); + } + + exposedLoads += thread[tid].exposeLoads(); + } + return exposedLoads; +} + + +// [mengjia] +template +void +LSQ::updateVisibleState() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + thread[tid].updateVisibleState(); + } +} + template bool LSQ::violation() @@ -339,6 +377,7 @@ LSQ::recvReqRetry() } } +// [InvisiSpec] Callback function for receiving a response template bool LSQ::recvTimingResp(PacketPtr pkt) @@ -347,6 +386,17 @@ LSQ::recvTimingResp(PacketPtr pkt) DPRINTF(LSQ, "Got error packet back for address: %#X\n", pkt->getAddr()); + // for expose or validate request, + // if the instruction is squashed, maybe the req has been deleted + if (pkt->isValidate() || pkt->isExpose()){ + if (!pkt->req){ + delete pkt; + return true; + } + DPRINTF(LSQ, "Receive an expose/validate response, idx=%d\n", + pkt->reqIdx); + } + thread[cpu->contextToThread(pkt->req->contextId())] .completeDataAccess(pkt); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index f5b60b2fc..d3dd34ee2 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -136,6 +136,12 @@ class LSQUnit { */ void checkSnoop(PacketPtr pkt); + // [InvisiSpec] check whether current request will hit in the + // spec buffer or not + int checkSpecBuffHit(const RequestPtr req, const int req_idx); + void setSpecBuffState(const RequestPtr req); + + bool checkPrevLoadsExecuted(const int req_idx); /** Executes a load instruction. */ Fault executeLoad(DynInstPtr &inst); @@ -154,6 +160,15 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); + /** [mengjia] Validate loads. */ + int exposeLoads(); + + /** [mengjia] Update Visbible State. + * In the mode defence relying on fence: setup fenceDelay state. + * In the mode defence relying on invisibleSpec: + * setup readyToExpose*/ + void updateVisibleState(); + /** Completes the data access that has been returned from the * memory system. */ void completeDataAccess(PacketPtr pkt); @@ -219,6 +234,8 @@ class LSQUnit { /** Returns the number of stores to writeback. */ int numStoresToWB() { return storesToWB; } + /** [InvisiSpec] Returns the number of loads to validate. */ + int numLoadsToVLD() { return loadsToVLD; } /** Returns if the LSQ unit will writeback on this cycle. */ bool willWB() { return storeQueue[storeWBIdx].canWB && @@ -235,18 +252,30 @@ class LSQUnit { /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); + // [InvisiSpec] complete Validates + void completeValidate(DynInstPtr &inst, PacketPtr pkt); + /** Writes back a store that couldn't be completed the previous cycle. */ void writebackPendingStore(); + /** Validates a load that couldn't be completed the previous cycle. */ + void validatePendingLoad(); + /** Handles completing the send of a store to memory. */ void storePostSend(PacketPtr pkt); + /** Handles completing the send of a validation to memory. */ + //void validationPostSend(PacketPtr pkt, int loadVLDIdx); + /** Completes the store at the specified index. */ void completeStore(int store_idx); /** Attempts to send a store to the cache. */ bool sendStore(PacketPtr data_pkt); + /** Attempts to send a validation to the cache. */ + //bool sendValidation(PacketPtr data_pkt, int loadVLDIdx); + /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx) const; /** Decrements the given store index (circular queue). */ @@ -409,6 +438,8 @@ class LSQUnit { /** The number of load instructions in the LQ. */ int loads; + /** [mengjia] The number of store instructions in the SQ waiting to writeback. */ + int loadsToVLD; /** The number of store instructions in the SQ. */ int stores; /** The number of store instructions in the SQ waiting to writeback. */ @@ -416,6 +447,10 @@ class LSQUnit { /** The index of the head instruction in the LQ. */ int loadHead; + /** [mengjia] The index of the first instruction that may be ready to be + * validated, and has not yet been validated. + */ + //int pendingLoadVLDIdx; /** The index of the tail instruction in the LQ. */ int loadTail; @@ -432,7 +467,7 @@ class LSQUnit { /** The number of cache ports available each cycle (stores only). */ int cacheStorePorts; - /** The number of used cache ports in this cycle by stores. */ + /** [InvisiSpec] The number of used cache ports in this cycle by stores. */ int usedStorePorts; //list mshrSeqNums; @@ -458,6 +493,9 @@ class LSQUnit { /** Whehter or not a store is blocked due to the memory system. */ bool isStoreBlocked; + /** Whehter or not a validation is blocked due to the memory system. */ + bool isValidationBlocked; + /** Whether or not a store is in flight. */ bool storeInFlight; @@ -471,9 +509,21 @@ class LSQUnit { /** The packet that is pending free cache ports. */ PacketPtr pendingPkt; + /* [mengjia] define scheme variables */ + // Flag for whether issue packets in execution stage + bool loadInExec; + + // Flag for whether to use invisible speculative load + bool isInvisibleSpec; + /** Flag for memory model. */ bool needsTSO; + // Flag for whether defending against spectre attack or future attacks + bool isFuturistic; + bool allowSpecBuffHit; + /* [mengjia] different schemes determine values of 4 variables. */ + // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. @@ -508,6 +558,12 @@ class LSQUnit { /** Number of times the LSQ is blocked due to the cache. */ Stats::Scalar lsqCacheBlocked; + Stats::Scalar specBuffHits; + Stats::Scalar specBuffMisses; + Stats::Scalar numValidates; + Stats::Scalar numExposes; + Stats::Scalar numConvertedExposes; + public: /** Executes the load at the given index. */ Fault read(const RequestPtr &req, @@ -549,6 +605,8 @@ class LSQUnit { bool isStalled() { return stalled; } }; + +// IMPORTANT: the function to issue packets, interact with memory [mengjia] template Fault LSQUnit::read(const RequestPtr &req, @@ -578,6 +636,7 @@ LSQUnit::read(const RequestPtr &req, } // Check the SQ for any previous stores that might lead to forwarding + // why we have store queue index for a load operation? [mengjia] int store_idx = load_inst->sqIdx; int store_size = 0; @@ -587,6 +646,7 @@ LSQUnit::read(const RequestPtr &req, load_idx, store_idx, storeHead, req->getPaddr(), sreqLow ? " split" : ""); + // LLSC: load-link/store-conditional [mengjia] if (req->isLLSC()) { assert(!sreqLow); // Disable recording the result temporarily. Writing to misc @@ -597,12 +657,14 @@ LSQUnit::read(const RequestPtr &req, load_inst->recordResult(true); } + // request to memory mapped register [mengjia] if (req->isMmappedIpr()) { assert(!load_inst->memData); load_inst->memData = new uint8_t[64]; ThreadContext *thread = cpu->tcBase(lsqID); Cycles delay(0); + PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq); data_pkt->dataStatic(load_inst->memData); @@ -750,6 +812,7 @@ LSQUnit::read(const RequestPtr &req, DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n", load_inst->seqNum, load_inst->pcState()); + // Allocate memory if this is the first time a load is issued. if (!load_inst->memData) { load_inst->memData = new uint8_t[req->getSize()]; @@ -757,10 +820,35 @@ LSQUnit::read(const RequestPtr &req, // if we the cache is not blocked, do cache access bool completedFirst = false; - PacketPtr data_pkt = Packet::createRead(req); + + PacketPtr data_pkt = NULL; PacketPtr fst_data_pkt = NULL; PacketPtr snd_data_pkt = NULL; + // According to the isInsivisibleSpec variable to create + // corresponding type of packets [mengjia] + bool sendSpecRead = false; + if(isInvisibleSpec){ + if(!load_inst->readyToExpose()){ + assert(!req->isLLSC()); + assert(!req->isStrictlyOrdered()); + assert(!req->isMmappedIpr()); + sendSpecRead = true; + DPRINTF(LSQUnit, "send a spec read for inst [sn:%lli]\n", + load_inst->seqNum); + } + + } + + assert( !(sendSpecRead && load_inst->isSpecCompleted()) && + "Sending specRead twice for the same load insts"); + + if(sendSpecRead){ + data_pkt = Packet::createReadSpec(req); + }else{ + data_pkt = Packet::createRead(req); + } + data_pkt->dataStatic(load_inst->memData); LSQSenderState *state = new LSQSenderState; @@ -772,17 +860,64 @@ LSQUnit::read(const RequestPtr &req, if (!TheISA::HasUnalignedMemAcc || !sreqLow) { // Point the first packet at the main data packet. fst_data_pkt = data_pkt; + + fst_data_pkt->setFirst(); + if (sendSpecRead){ + int src_idx = checkSpecBuffHit(req, load_idx); + if (src_idx != -1) { + if (allowSpecBuffHit){ + data_pkt->setOnlyAccessSpecBuff(); + } + data_pkt->srcIdx = src_idx; + specBuffHits++; + }else{ + specBuffMisses++; + } + } + fst_data_pkt->reqIdx = load_idx; } else { // Create the split packets. - fst_data_pkt = Packet::createRead(sreqLow); - snd_data_pkt = Packet::createRead(sreqHigh); + if(sendSpecRead){ + + fst_data_pkt = Packet::createReadSpec(sreqLow); + int fst_src_idx = checkSpecBuffHit(sreqLow, load_idx); + if ( fst_src_idx != -1 ) { + if (allowSpecBuffHit){ + fst_data_pkt->setOnlyAccessSpecBuff(); + } + fst_data_pkt->srcIdx = fst_src_idx; + specBuffHits++; + } else { + specBuffMisses++; + } + + snd_data_pkt = Packet::createReadSpec(sreqHigh); + int snd_src_idx = checkSpecBuffHit(sreqHigh, load_idx); + if ( snd_src_idx != -1 ) { + if (allowSpecBuffHit){ + snd_data_pkt->setOnlyAccessSpecBuff(); + } + snd_data_pkt->srcIdx = snd_src_idx; + specBuffHits++; + } else { + specBuffMisses++; + } + }else{ + fst_data_pkt = Packet::createRead(sreqLow); + snd_data_pkt = Packet::createRead(sreqHigh); + } + fst_data_pkt->setFirst(); fst_data_pkt->dataStatic(load_inst->memData); snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize()); fst_data_pkt->senderState = state; snd_data_pkt->senderState = state; + fst_data_pkt->reqIdx = load_idx; + snd_data_pkt->reqIdx = load_idx; + fst_data_pkt->isSplit = true; + snd_data_pkt->isSplit = true; state->isSplit = true; state->outstanding = 2; state->mainPkt = data_pkt; @@ -794,6 +929,8 @@ LSQUnit::read(const RequestPtr &req, // @todo We should account for cache port contention // and arbitrate between loads and stores. bool successful_load = true; + // MARK: here is the place memory request of read is sent [mengjia] + // [InvisiSpec] Sending out a memory request if (!dcachePort->sendTimingReq(fst_data_pkt)) { successful_load = false; } else if (TheISA::HasUnalignedMemAcc && sreqLow) { @@ -850,6 +987,62 @@ LSQUnit::read(const RequestPtr &req, return NoFault; } + DPRINTF(LSQUnit, "successfully sent out packet(s) for inst [sn:%lli]\n", + load_inst->seqNum); + // Set everything ready for expose/validation after the read is + // successfully sent out + if(sendSpecRead){ // sending actual request + + // [mengjia] Here we set the needExposeOnly flag + if (needsTSO && !load_inst->isDataPrefetch()){ + // need to check whether previous load_instructions specComplete or not + if ( checkPrevLoadsExecuted(load_idx) ){ + load_inst->needExposeOnly(true); + DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as " + "needExposeOnly\n", + load_inst->pcState(), load_inst->seqNum); + } else { + DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as " + "needValidation\n", + load_inst->pcState(), load_inst->seqNum); + } + }else{ + //if RC, always only need expose + load_inst->needExposeOnly(true); + DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as needExposeOnly\n", + load_inst->pcState(), load_inst->seqNum); + } + + load_inst->needPostFetch(true); + assert(!req->isMmappedIpr()); + //save expose requestPtr + if (TheISA::HasUnalignedMemAcc && sreqLow) { + load_inst->postSreqLow = std::make_shared(*sreqLow); + load_inst->postSreqHigh = std::make_shared(*sreqHigh); + load_inst->postReq = nullptr; + }else{ + load_inst->postReq = std::make_shared(*req); + load_inst->postSreqLow = nullptr; + load_inst->postSreqHigh = nullptr; + } + load_inst->needDeletePostReq(true); + DPRINTF(LSQUnit, "created validation/expose" + " request for inst [sn:%lli]" + "req=%#x, reqLow=%#x, reqHigh=%#x\n", + load_inst->seqNum, (Addr)(load_inst->postReq), + (Addr)(load_inst->postSreqLow), + (Addr)(load_inst->postSreqHigh)); + } else { + load_inst->setExposeCompleted(); + load_inst->needPostFetch(false); + if (TheISA::HasUnalignedMemAcc && sreqLow) { + setSpecBuffState(sreqLow); + setSpecBuffState(sreqHigh); + } else { + setSpecBuffState(req); + } + } + return NoFault; } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index c2750be7d..3da248977 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -89,6 +89,9 @@ LSQUnit::WritebackEvent::description() const return "Store writeback"; } + +// [InvisiSpec] This function deals with +// acknowledge response to memory read/write template void LSQUnit::completeDataAccess(PacketPtr pkt) @@ -107,6 +110,17 @@ LSQUnit::completeDataAccess(PacketPtr pkt) return; } + // need to update hit info for corresponding instruction + if (pkt->isL1Hit() && pkt->isSpec() && pkt->isRead()){ + if (state->isSplit && ! pkt->isFirst()){ + inst->setL1HitHigh(); + } else { + inst->setL1HitLow(); + } + } else if (!pkt->isSpec()) { + setSpecBuffState(pkt->req); + } + // If this is a split access, wait until all packets are received. if (TheISA::HasUnalignedMemAcc && !state->complete()) { return; @@ -117,7 +131,9 @@ LSQUnit::completeDataAccess(PacketPtr pkt) if (!state->noWB) { // Only loads and store conditionals perform the writeback // after receving the response from the memory + // [mengjia] validation also needs writeback, expose do not need assert(inst->isLoad() || inst->isStoreConditional()); + if (!TheISA::HasUnalignedMemAcc || !state->isSplit || !state->isLoad) { writeback(inst, pkt); @@ -129,6 +145,10 @@ LSQUnit::completeDataAccess(PacketPtr pkt) if (inst->isStore()) { completeStore(state->idx); } + + if (pkt->isValidate() || pkt->isExpose()) { + completeValidate(inst, pkt); + } } if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) { @@ -136,6 +156,7 @@ LSQUnit::completeDataAccess(PacketPtr pkt) } pkt->req->setAccessLatency(); + // probe point, not sure about the mechanism [mengjia] cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt)); delete state; @@ -143,8 +164,8 @@ LSQUnit::completeDataAccess(PacketPtr pkt) template LSQUnit::LSQUnit() - : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), - isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false), + : loads(0), loadsToVLD(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false), + isStoreBlocked(false), isValidationBlocked(false), storeInFlight(false), hasPendingPkt(false), pendingPkt(nullptr) { } @@ -178,7 +199,52 @@ LSQUnit::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, depCheckShift = params->LSQDepCheckShift; checkLoads = params->LSQCheckLoads; cacheStorePorts = params->cacheStorePorts; + + // According to the scheme, we need to define actions as follows. + // loadInExec: if False, no packets are sent in execution stage; + // if True, send either readReq or readSpecReq + // isInvisibleSpec: if True, send readSpecReq in execution statge; + // if False, send readReq + // needsTSO: if True, squash read on receiving invalidations, and only allow one outstanding write at a time; + // if False, no squash on receiving invalidaiton, and allow multiple outstanding writes. + // isConservative: if True, react after all preceding instructions complete/no exception; + // if False, react only after all preceding stores/brancehs complete + const std::string scheme = params->simulateScheme; + if (scheme.compare("UnsafeBaseline")==0){ + loadInExec = true; + isInvisibleSpec = false; // send real request + isFuturistic = false; // not relevant in unsafe mode. + }else if (scheme.compare("FuturisticSafeFence")==0){ + // "LFENCE" before every load + loadInExec = false; + isInvisibleSpec = false; // not used since loadInExec is false + isFuturistic = true; // send readReq at head of ROB + }else if (scheme.compare("FuturisticSafeInvisibleSpec")==0){ + // only make load visible when all preceding instructions + // complete and no exception + loadInExec = true; + isInvisibleSpec = true; // send request but not change cache state + isFuturistic = true; // conservative condition to send validations + }else if (scheme.compare("SpectreSafeFence")==0){ + // "LFENCE" after every branch + loadInExec = false; + isInvisibleSpec = false; // not used since loadInExec is false + isFuturistic = false; // commit when preceding branches are resolved + }else if (scheme.compare("SpectreSafeInvisibleSpec")==0){ + // make load visible when all preceiding branches are resolved + loadInExec = true; + isInvisibleSpec = true; // send request but not change cache state + isFuturistic = false; // only deal with spectre attacks + }else { + cprintf("ERROR: unsupported simulation scheme: %s!\n", scheme); + exit(1); + } needsTSO = params->needsTSO; + allowSpecBuffHit = params->allowSpecBuffHit; + cprintf("Info: simulation uses scheme: %s; " + "needsTSO=%d; allowSpecBuffHit=%d\n", + scheme, needsTSO, allowSpecBuffHit); + // [mengjia] end of setting configuration variables resetState(); } @@ -188,7 +254,7 @@ template void LSQUnit::resetState() { - loads = stores = storesToWB = 0; + loads = stores = loadsToVLD = storesToWB = 0; loadHead = loadTail = 0; @@ -258,6 +324,26 @@ LSQUnit::regStats() lsqCacheBlocked .name(name() + ".cacheBlocked") .desc("Number of times an access to memory failed due to the cache being blocked"); + + specBuffHits + .name(name() + ".specBuffHits") + .desc("Number of times an access hits in speculative buffer"); + + specBuffMisses + .name(name() + ".specBuffMisses") + .desc("Number of times an access misses in speculative buffer"); + + numValidates + .name(name() + ".numValidates") + .desc("Number of validates sent to cache"); + + numExposes + .name(name() + ".numExposes") + .desc("Number of exposes sent to cache"); + + numConvertedExposes + .name(name() + ".numConvertedExposes") + .desc("Number of exposes converted from validation"); } template @@ -289,6 +375,7 @@ LSQUnit::drainSanityCheck() const assert(!loadQueue[i]); assert(storesToWB == 0); + assert(loadsToVLD == 0); assert(!retryPkt); } @@ -377,6 +464,7 @@ LSQUnit::insertLoad(DynInstPtr &load_inst) incrLdIdx(loadTail); ++loads; + } template @@ -400,6 +488,7 @@ LSQUnit::insertStore(DynInstPtr &store_inst) ++stores; } +// It is an empty function? why? [mengjia] template typename Impl::DynInstPtr LSQUnit::getMemDepViolator() @@ -460,13 +549,16 @@ LSQUnit::checkSnoop(PacketPtr pkt) Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask; // Check that this snoop didn't just invalidate our lock flag - if (ld_inst->effAddrValid() && (load_addr_low == invalidate_addr - || load_addr_high == invalidate_addr) + // [InvisiSpec] also make sure the instruction has been sent out + // otherwise, we cause unneccessary squash + if (ld_inst->effAddrValid() && !ld_inst->fenceDelay() + && (load_addr_low == invalidate_addr + || load_addr_high == invalidate_addr) && ld_inst->memReqFlags & Request::LLSC) TheISA::handleLockedSnoopHit(ld_inst.get()); } - // If this is the only load in the LSQ we don't care + // If not match any load entry, then do nothing [mengjia] if (load_idx == loadTail) return; @@ -477,7 +569,10 @@ LSQUnit::checkSnoop(PacketPtr pkt) while (load_idx != loadTail) { DynInstPtr ld_inst = loadQueue[load_idx]; - if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { + // [SafeSpce] check snoop violation when the load has + // been sent out; otherwise, unneccessary squash + if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered() + || ld_inst->fenceDelay()) { incrLdIdx(load_idx); continue; } @@ -495,11 +590,29 @@ LSQUnit::checkSnoop(PacketPtr pkt) // all other loads, this load as well as *all* subsequent loads // need to be squashed to prevent possible load reordering. force_squash = true; + + // [InvisiSpec] in InvisiSpec, we do not need to squash + // the load at the head of LQ, + // as well as the one do not need validation + if (isInvisibleSpec && + (load_idx==loadHead || ld_inst->needExposeOnly())){ + force_squash = false; + } + if (!pkt->isExternalEviction() && isInvisibleSpec){ + force_squash = false; + ld_inst->clearL1HitHigh(); + ld_inst->clearL1HitLow(); + } } if (ld_inst->possibleLoadViolation() || force_squash) { DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n", pkt->getAddr(), ld_inst->seqNum); + //[InvisiSpec] mark the load hit invalidation + ld_inst->hitInvalidation(true); + if (pkt->isExternalEviction()){ + ld_inst->hitExternalEviction(true); + } // Mark the load for re-execution ld_inst->fault = std::make_shared(); } else { @@ -523,6 +636,103 @@ LSQUnit::checkSnoop(PacketPtr pkt) return; } +template +bool +LSQUnit::checkPrevLoadsExecuted(int req_idx) +{ + int load_idx = loadHead; + while (load_idx != req_idx){ + if (!loadQueue[load_idx]->isExecuted()){ + // if at least on load ahead of current load + // does not finish spec access, + // then return false + return false; + } + incrLdIdx(load_idx); + } + + //if all executed, return true + return true; +} + +template +void +LSQUnit::setSpecBuffState(RequestPtr expose_req) +{ + Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask; + + int load_idx = loadHead; + while (load_idx != loadTail){ + DynInstPtr ld_inst = loadQueue[load_idx]; + if (ld_inst->effAddrValid()){ + + Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask; + Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask; + if (ld_eff_addr1 == req_eff_addr1){ + ld_inst->setSpecBuffObsoleteLow(); + } else if (ld_eff_addr2 == req_eff_addr1){ + ld_inst->setSpecBuffObsoleteHigh(); + } + } + incrLdIdx(load_idx); + } +} + + +template +int +LSQUnit::checkSpecBuffHit(RequestPtr req, int req_idx) +{ + + Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask; + //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask; + // the req should be within the same cache line + //assert (req_eff_addr1 == req_eff_addr2); + assert (!loadQueue[req_idx]->isExecuted()); + + int load_idx = loadHead; + + while (load_idx != loadTail){ + DynInstPtr ld_inst = loadQueue[load_idx]; + if (ld_inst->effAddrValid()){ + Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask; + Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask; + + if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow()) + || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){ + return -1; + //already in L1, do not copy from buffer + } else { + + if (ld_inst->isExecuted() && ld_inst->needPostFetch() + && !ld_inst->isSquashed() && ld_inst->fault==NoFault){ + if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow() + && !ld_inst->isSpecBuffObsoleteLow()){ + DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] " + "and [sn:%lli] (low) at address %#x\n", + loadQueue[req_idx]->seqNum, ld_inst->seqNum, + req_eff_addr1); + return load_idx; + } else if ( ld_eff_addr2 !=0 && + req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh() + && !ld_inst->isSpecBuffObsoleteHigh()){ + DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] " + "and [sn:%lli] (high) at address %#x\n", + loadQueue[req_idx]->seqNum, ld_inst->seqNum, + req_eff_addr1); + return load_idx; + } + } + } + } + incrLdIdx(load_idx); + } + + return -1; +} + + + template Fault LSQUnit::checkViolations(int load_idx, DynInstPtr &inst) @@ -537,7 +747,10 @@ LSQUnit::checkViolations(int load_idx, DynInstPtr &inst) */ while (load_idx != loadTail) { DynInstPtr ld_inst = loadQueue[load_idx]; - if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { + // [InvisiSpec] no need to check violation for unsent load + // otherwise, unneccessary squash + if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered() + || ld_inst->fenceDelay()) { incrLdIdx(load_idx); continue; } @@ -616,14 +829,25 @@ LSQUnit::executeLoad(DynInstPtr &inst) assert(!inst->isSquashed()); + // use ISA interface to generate correct access request + // initiateAcc is implemented in dyn_inst_impl.hh + // The interface calls corresponding ISA defined function + // check buld/ARM/arch/generic/memhelper.hh for more info [mengjia] load_fault = inst->initiateAcc(); - if (inst->isTranslationDelayed() && + // if translation delay, deferMem [mengjia] + // in the case it is not the correct time to send the load + // also defer it + if ( (inst->isTranslationDelayed() || inst->fenceDelay() + || inst->specTLBMiss()) && load_fault == NoFault) return load_fault; // If the instruction faulted or predicated false, then we need to send it // along to commit without the instruction completing. + // + // if it is faulty, not execute it, send it to commit, and commit statge will deal with it + // here is signling the ROB, the inst can commit [mengjia] if (load_fault != NoFault || !inst->readPredicate()) { // Send this instruction to commit, also make sure iew stage // realizes there is activity. Mark it as executed unless it @@ -671,6 +895,8 @@ LSQUnit::executeStore(DynInstPtr &store_inst) // address. If so, then we have a memory ordering violation. int load_idx = store_inst->lqIdx; + // TODO: Check whether this store tries to get an exclusive copy + // of target line [mengjia] Fault store_fault = store_inst->initiateAcc(); if (store_inst->isTranslationDelayed() && @@ -769,15 +995,343 @@ LSQUnit::writebackPendingStore() if (hasPendingPkt) { assert(pendingPkt != NULL); - // If the cache is blocked, this will store the packet for retry. - if (sendStore(pendingPkt)) { - storePostSend(pendingPkt); + if(pendingPkt->isWrite()){ + // If the cache is blocked, this will store the packet for retry. + if (sendStore(pendingPkt)) { + storePostSend(pendingPkt); + } + pendingPkt = NULL; + hasPendingPkt = false; } - pendingPkt = NULL; - hasPendingPkt = false; } } + + + +// [InvisiSpec] update FenceDelay State +template +void +LSQUnit::updateVisibleState() +{ + int load_idx = loadHead; + + //iterate all the loads and update its fencedelay state accordingly + while (load_idx != loadTail && loadQueue[load_idx]){ + DynInstPtr inst = loadQueue[load_idx]; + + if (!loadInExec){ + + if ( (isFuturistic && inst->isPrevInstsCommitted()) || + (!isFuturistic && inst->isPrevBrsCommitted())){ + if (inst->fenceDelay()){ + DPRINTF(LSQUnit, "Clear virtual fence for " + "inst [sn:%lli] PC %s\n", + inst->seqNum, inst->pcState()); + } + inst->fenceDelay(false); + }else { + if (!inst->fenceDelay()){ + DPRINTF(LSQUnit, "Deffering an inst [sn:%lli] PC %s" + " due to virtual fence\n", + inst->seqNum, inst->pcState()); + } + inst->fenceDelay(true); + } + inst->readyToExpose(true); + } else if (loadInExec && isInvisibleSpec){ + + if ( (isFuturistic && inst->isPrevInstsCompleted()) || + (!isFuturistic && inst->isPrevBrsResolved())){ + if (!inst->readyToExpose()){ + DPRINTF(LSQUnit, "Set readyToExpose for " + "inst [sn:%lli] PC %s\n", + inst->seqNum, inst->pcState()); + if (inst->needPostFetch()){ + ++loadsToVLD; + } + } + inst->readyToExpose(true); + }else { + if (inst->readyToExpose()){ + DPRINTF(LSQUnit, "The load can not be validated " + "[sn:%lli] PC %s\n", + inst->seqNum, inst->pcState()); + assert(0); + //--loadsToVLD; + } + inst->readyToExpose(false); + } + inst->fenceDelay(false); + } else { + inst->readyToExpose(true); + inst->fenceDelay(false); + } + incrLdIdx(load_idx); + } +} + +// [InvisiSpec] validate loads +template +int +LSQUnit::exposeLoads() +{ + if(!isInvisibleSpec){ + assert(loadsToVLD==0 + && "request validation on Non invisible Spec mode"); + } + + int old_loadsToVLD = loadsToVLD; + + // [InvisiSpec] Note: + // need to iterate from the head every time + // since the load can be exposed out-of-order + int loadVLDIdx = loadHead; + + while (loadsToVLD > 0 && + loadVLDIdx != loadTail && + loadQueue[loadVLDIdx]) { + + if (loadQueue[loadVLDIdx]->isSquashed()){ + incrLdIdx(loadVLDIdx); + continue; + } + // skip the loads that either do not need to expose + // or exposed already + if(!loadQueue[loadVLDIdx]->needPostFetch() + || loadQueue[loadVLDIdx]->isExposeSent() ){ + incrLdIdx(loadVLDIdx); + continue; + } + + DynInstPtr load_inst = loadQueue[loadVLDIdx]; + if (loadQueue[loadVLDIdx]->fault!=NoFault){ + //load is executed, so it wait for expose complete + //to send it to commit, regardless of whether it is ready + //to expose + load_inst->setExposeCompleted(); + load_inst->setExposeSent(); + loadsToVLD--; + if (load_inst->isExecuted()){ + DPRINTF(LSQUnit, "Execute finished and gets violation fault." + "Send inst [sn:%lli] to commit stage.\n", + load_inst->seqNum); + iewStage->instToCommit(load_inst); + iewStage->activityThisCycle(); + } + incrLdIdx(loadVLDIdx); + continue; + } + + // skip the loads that need expose but + // are not ready + if (loadQueue[loadVLDIdx]->needPostFetch() + && !loadQueue[loadVLDIdx]->readyToExpose()){ + incrLdIdx(loadVLDIdx); + continue; + } + + assert(loadQueue[loadVLDIdx]->needPostFetch() + && loadQueue[loadVLDIdx]->readyToExpose() ); + + assert(!load_inst->isCommitted()); + + + RequestPtr req = load_inst->postReq; + RequestPtr sreqLow = load_inst->postSreqLow; + RequestPtr sreqHigh = load_inst->postSreqHigh; + + // we should not have both req and sreqLow not NULL + assert( !(req && sreqLow)); + + DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]" + " PC= %s. req=%#x, reqLow=%#x, reqHigh=%#x\n", + load_inst->seqNum, load_inst->pcState(), + (Addr)(load_inst->postReq), + (Addr)(load_inst->postSreqLow), (Addr)(load_inst->postSreqHigh)); + + PacketPtr data_pkt = NULL; + PacketPtr snd_data_pkt = NULL; + + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = loadVLDIdx; + state->inst = load_inst; + state->noWB = true; + + bool split = false; + if (TheISA::HasUnalignedMemAcc && sreqLow) { + split = true; + } else { + assert(req); + } + + bool onlyExpose = false; + if (!split) { + if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){ + data_pkt = Packet::createExpose(req); + onlyExpose = true; + }else { + data_pkt = Packet::createValidate(req); + if (!load_inst->vldData) + load_inst->vldData = new uint8_t[1]; + data_pkt->dataStatic(load_inst->vldData); + } + data_pkt->senderState = state; + data_pkt->setFirst(); + data_pkt->reqIdx = loadVLDIdx; + DPRINTF(LSQUnit, "contextid = %d\n", req->contextId()); + } else { + // allocate memory if we need at least one validation + if (!load_inst->needExposeOnly() && + (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){ + if (!load_inst->vldData) + load_inst->vldData = new uint8_t[2]; + } else { + onlyExpose = true; + } + + // Create the split packets. - first one + if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){ + data_pkt = Packet::createExpose(sreqLow); + }else{ + data_pkt = Packet::createValidate(sreqLow); + assert(load_inst->vldData); + data_pkt->dataStatic(load_inst->vldData); + } + + // Create the split packets. - second one + if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){ + snd_data_pkt = Packet::createExpose(sreqHigh); + } else { + snd_data_pkt = Packet::createValidate(sreqHigh); + assert(load_inst->vldData); + snd_data_pkt->dataStatic(&(load_inst->vldData[1])); + } + + data_pkt->senderState = state; + data_pkt->setFirst(); + snd_data_pkt->senderState = state; + data_pkt->reqIdx = loadVLDIdx; + snd_data_pkt->reqIdx = loadVLDIdx; + + data_pkt->isSplit = true; + snd_data_pkt->isSplit = true; + state->isSplit = true; + state->outstanding = 2; + state->mainPkt = data_pkt; + + DPRINTF(LSQUnit, "contextid = %d, %d\n", + sreqLow->contextId(), sreqHigh->contextId()); + req = sreqLow; + } + + assert(!req->isStrictlyOrdered()); + assert(!req->isMmappedIpr()); + + DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s " + "to Addr:%#x, data:%#x [sn:%lli]\n", + loadVLDIdx, load_inst->pcState(), + //FIXME: resultData not memData + req->getPaddr(), (int)*(load_inst->memData), + load_inst->seqNum); + + bool successful_expose = true; + bool completedFirst = false; + + if (!dcachePort->sendTimingReq(data_pkt)){ + DPRINTF(IEW, "D-Cache became blocked when " + "validating [sn:%lli], will retry later\n", + load_inst->seqNum); + successful_expose = false; + } else { + if (split) { + // If split, try to send the second packet too + completedFirst = true; + assert(snd_data_pkt); + + if (!dcachePort->sendTimingReq(snd_data_pkt)){ + state->complete(); + state->cacheBlocked = true; + successful_expose = false; + DPRINTF(IEW, "D-Cache became blocked when validating" + " [sn:%lli] second packet, will retry later\n", + load_inst->seqNum); + } + } + } + + if (!successful_expose){ + if (!split) { + delete state; + delete data_pkt; + }else{ + if (!completedFirst){ + delete state; + delete data_pkt; + delete snd_data_pkt; + } else { + delete snd_data_pkt; + } + } + //cpu->wakeCPU(); // This will cause issue(wrong activity count and affects the memory transactions + ++lsqCacheBlocked; + break; + } else { + // Here is to fix memory leakage + // it is ugly, but we have to do it now. + load_inst->needDeletePostReq(false); + + // if all the packets we sent out is expose, + // we assume the expose is alreay completed + if (onlyExpose) { + load_inst->setExposeCompleted(); + numExposes++; + } else { + numValidates++; + } + if (load_inst->needExposeOnly()){ + numConvertedExposes++; + } + if (load_inst->isExecuted() && load_inst->isExposeCompleted() + && !load_inst->isSquashed()){ + DPRINTF(LSQUnit, "Expose finished. Execution done." + "Send inst [sn:%lli] to commit stage.\n", + load_inst->seqNum); + iewStage->instToCommit(load_inst); + iewStage->activityThisCycle(); + } else{ + DPRINTF(LSQUnit, "Need validation or execution not finishes." + "Need to wait for readResp/validateResp " + "for inst [sn:%lli].\n", + load_inst->seqNum); + } + + load_inst->setExposeSent(); + --loadsToVLD; + incrLdIdx(loadVLDIdx); + if (!split){ + setSpecBuffState(req); + } else { + setSpecBuffState(sreqLow); + setSpecBuffState(sreqHigh); + } + } + } + + DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d" + ". loadHead=%d. loadTail=%d.\n", + old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead, + loadTail); + + assert(loads>=0 && loadsToVLD >= 0); + + return old_loadsToVLD-loadsToVLD; +} + + + + template void LSQUnit::writebackStores() @@ -797,7 +1351,7 @@ LSQUnit::writebackStores() if (isStoreBlocked) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" - " is blocked!\n"); + " is blocked on stores!\n"); break; } @@ -1007,6 +1561,12 @@ LSQUnit::squash(const InstSeqNum &squashed_num) stallingLoadIdx = 0; } + if (loadQueue[load_idx]->needPostFetch() && + loadQueue[load_idx]->readyToExpose() && + !loadQueue[load_idx]->isExposeSent()){ + loadsToVLD --; + } + // Clear the smart pointer to make sure it is decremented. loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; @@ -1017,6 +1577,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) decrLdIdx(load_idx); ++lsqSquashedLoads; + } if (memDepViolator && squashed_num < memDepViolator->seqNum) { @@ -1071,6 +1632,10 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } } + +// after sent, we assume the store is complete +// thus, we can wekeup and forward data +// In TSO, mark inFlightStore as true to block following stores [mengjia] template void LSQUnit::storePostSend(PacketPtr pkt) @@ -1100,9 +1665,58 @@ LSQUnit::storePostSend(PacketPtr pkt) storeInFlight = true; } + DPRINTF(LSQUnit, "Post sending store for inst [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); incrStIdx(storeWBIdx); } + + +template +void +LSQUnit::completeValidate(DynInstPtr &inst, PacketPtr pkt) +{ + iewStage->wakeCPU(); + // if instruction fault, no need to check value, + // return directly + //assert(!inst->needExposeOnly()); + if (inst->isExposeCompleted() || inst->isSquashed()){ + //assert(inst->fault != NoFault); + //Already sent to commit, do nothing + return; + } + //Check validation result + bool validation_fail = false; + if (!inst->isL1HitLow() && inst->vldData[0]==0) { + validation_fail = true; + } else { + if (pkt->isSplit && !inst->isL1HitHigh() + && inst->vldData[1]==0){ + validation_fail = true; + } + } + if (validation_fail){ + // Mark the load for re-execution + inst->fault = std::make_shared(); + inst->validationFail(true); + DPRINTF(LSQUnit, "Validation failed.\n", + inst->seqNum); + } + + inst->setExposeCompleted(); + if ( inst->isExecuted() && inst->isExposeCompleted() ){ + DPRINTF(LSQUnit, "Validation finished. Execution done." + "Send inst [sn:%lli] to commit stage.\n", + inst->seqNum); + iewStage->instToCommit(inst); + iewStage->activityThisCycle(); + } else{ + DPRINTF(LSQUnit, "Validation done. Execution not finishes." + "Need to wait for readResp for inst [sn:%lli].\n", + inst->seqNum); + } +} + template void LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) @@ -1116,6 +1730,11 @@ LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) return; } + //DPRINTF(LSQUnit, "write back for inst [sn:%lli]\n", inst->seqNum); + assert(!(inst->isExecuted() && inst->isExposeCompleted() && + inst->fault==NoFault) && + "in this case, we will put it into ROB twice."); + if (!inst->isExecuted()) { inst->setExecuted(); @@ -1135,8 +1754,42 @@ LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) } } - // Need to insert instruction into queue to commit - iewStage->instToCommit(inst); + // [mengjia] + // check schemes to decide whether to set load can be committed + // on receiving readResp or readSpecResp + if(!isInvisibleSpec){ + // if not invisibleSpec mode, we only receive readResp + assert(!pkt->isSpec() && !pkt->isValidate() && + "Receiving spec or validation response " + "in non invisibleSpec mode"); + iewStage->instToCommit(inst); + } else if (inst->fault != NoFault){ + inst->setExposeCompleted(); + inst->setExposeSent(); + iewStage->instToCommit(inst); + } else { + //isInvisibleSpec == true + if (pkt->isSpec()) { + inst->setSpecCompleted(); + } + + assert(!pkt->isValidate() && "receiving validation response" + "in invisibleSpec RC mode"); + assert(!pkt->isExpose() && "receiving expose response" + "on write back path"); + + // check whether the instruction can be committed + if ( !inst->isExposeCompleted() && inst->needPostFetch() ){ + DPRINTF(LSQUnit, "Expose not finished. " + "Wait until expose completion" + " to send inst [sn:%lli] to commit stage\n", inst->seqNum); + }else{ + DPRINTF(LSQUnit, "Expose and execution both finished. " + "Send inst [sn:%lli] to commit stage\n", inst->seqNum); + iewStage->instToCommit(inst); + } + + } iewStage->activityThisCycle(); @@ -1144,6 +1797,8 @@ LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) iewStage->checkMisprediction(inst); } +// set store to complete [mengjia] +// complete the store after it commits template void LSQUnit::completeStore(int store_idx) @@ -1219,9 +1874,12 @@ LSQUnit::sendStore(PacketPtr data_pkt) retryPkt = data_pkt; return false; } + setSpecBuffState(data_pkt->req); return true; } + + template void LSQUnit::recvRetry() @@ -1229,6 +1887,7 @@ LSQUnit::recvRetry() if (isStoreBlocked) { DPRINTF(LSQUnit, "Receiving retry: store blocked\n"); assert(retryPkt != NULL); + assert(retryPkt->isWrite()); LSQSenderState *state = dynamic_cast(retryPkt->senderState); @@ -1277,7 +1936,7 @@ template inline void LSQUnit::incrLdIdx(int &load_idx) const { - if (++load_idx >= LQEntries) + if ((++load_idx) >= LQEntries) load_idx = 0; } @@ -1285,7 +1944,7 @@ template inline void LSQUnit::decrLdIdx(int &load_idx) const { - if (--load_idx < 0) + if ((--load_idx) < 0) load_idx += LQEntries; } diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index bc024f603..2720ab914 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -1370,7 +1370,8 @@ DefaultRename::serializeAfter(InstQueue &inst_list, ThreadID tid) // Mark a bit to say that I must serialize on the next instruction. serializeOnNextInst[tid] = true; return; - } + } + // Set the next instruction as serializing. inst_list.front()->setSerializeBefore(); diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 1c3cc2815..7024d9920 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -212,6 +212,10 @@ class ROB /** Updates the tail instruction with the new youngest instruction. */ void updateTail(); + /** [SafeSpce] Updates load instructions visible condition + * set isPrevInstsCompleted and isPrevBrsResolved. */ + void updateVisibleState(); + /** Reads the PC of the oldest head instruction. */ // uint64_t readHeadPC(); diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 5a9dc90f9..ebfbb9754 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -402,6 +402,85 @@ ROB::doSquash(ThreadID tid) } +/* ************************** + * [InvisiSpec] update load insts state + * isPrevInstsCompleted; isPrevBrsResolved + * *************************/ +template +void +ROB::updateVisibleState() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (instList[tid].empty()) + continue; + + InstIt inst_it = instList[tid].begin(); + InstIt tail_inst_it = instList[tid].end(); + + bool prevInstsComplete=true; + bool prevBrsResolved=true; + bool prevInstsCommitted=true; + bool prevBrsCommitted=true; + + while (inst_it != tail_inst_it) { + DynInstPtr inst = *inst_it++; + + assert(inst!=0); + + if (!prevInstsComplete && + !prevBrsResolved) { + break; + } + + if (inst->isLoad()) { + if (prevInstsComplete) { + inst->setPrevInstsCompleted(); + } + if (prevBrsResolved){ + inst->setPrevBrsResolved(); + } + if (prevInstsCommitted) { + inst->setPrevInstsCommitted(); + } + if (prevBrsCommitted) { + inst->setPrevBrsCommitted(); + } + } + + // Update prev control insts state + if (inst->isControl()){ + prevBrsCommitted = false; + if (!inst->readyToCommit() || inst->getFault()!=NoFault + || inst->isSquashed()){ + prevBrsResolved = false; + } + } + + prevInstsCommitted = false; + + // Update prev insts state + if (inst->isNonSpeculative() || inst->isStoreConditional() + || inst->isMemBarrier() || inst->isWriteBarrier() || + (inst->isLoad() && inst->strictlyOrdered())){ + //Some special instructions, directly set canCommit + //when entering ROB + prevInstsComplete = false; + } + if (!inst->readyToCommit() || inst->getFault()!=NoFault + || inst->isSquashed()){ + prevInstsComplete = false; + } + + } + } +} + + template void ROB::updateHead() -- cgit v1.2.3