From 8b9b85e92cde81ef9eb0cf6595be59c96fd13f97 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 15 Nov 2010 19:37:03 -0800 Subject: O3: Make O3 support variably lengthed instructions. --- src/cpu/o3/fetch.hh | 13 ++- src/cpu/o3/fetch_impl.hh | 272 ++++++++++++++++++++++++++--------------------- 2 files changed, 161 insertions(+), 124 deletions(-) (limited to 'src/cpu/o3') diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 22e9e51b4..56f97e463 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -235,13 +235,14 @@ class DefaultFetch * Fetches the cache line that contains fetch_PC. Returns any * fault that happened. Puts the data into the class variable * cacheData. - * @param fetch_PC The PC address that is being fetched from. + * @param vaddr The memory address that is being fetched from. * @param ret_fault The fault reference that will be set to the result of * the icache access. * @param tid Thread id. + * @param pc The actual PC of the current instruction. * @return Any fault that occured. */ - bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid); + bool fetchCacheLine(Addr vaddr, Fault &ret_fault, ThreadID tid, Addr pc); /** Squashes a specific thread and resets the PC. */ inline void doSquash(const TheISA::PCState &newPC, ThreadID tid); @@ -291,6 +292,10 @@ class DefaultFetch } private: + DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, + StaticInstPtr curMacroop, TheISA::PCState thisPC, + TheISA::PCState nextPC, bool trace); + /** Handles retrying the fetch access. */ void recvRetry(); @@ -347,6 +352,10 @@ class DefaultFetch TheISA::PCState pc[Impl::MaxThreads]; + Addr fetchOffset[Impl::MaxThreads]; + + StaticInstPtr macroop[Impl::MaxThreads]; + /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index bbd9ce4a2..cca6b7a57 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -317,6 +317,8 @@ DefaultFetch::initStage() // Setup PC and nextPC with initial state. for (ThreadID tid = 0; tid < numThreads; tid++) { pc[tid] = cpu->pcState(tid); + fetchOffset[tid] = 0; + macroop[tid] = NULL; } for (ThreadID tid = 0; tid < numThreads; tid++) { @@ -534,7 +536,8 @@ DefaultFetch::lookupAndUpdateNextPC( template bool -DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid) +DefaultFetch::fetchCacheLine(Addr vaddr, Fault &ret_fault, ThreadID tid, + Addr pc) { Fault fault = NoFault; @@ -547,7 +550,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n", tid); return false; - } else if (interruptPending && !(fetch_PC & 0x3)) { + } else if (interruptPending && !(pc & 0x3)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -557,8 +560,8 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid return false; } - // Align the fetch PC so it's at the start of a cache block. - Addr block_PC = icacheBlockAlignPC(fetch_PC); + // Align the fetch address so it's at the start of a cache block. + Addr block_PC = icacheBlockAlignPC(vaddr); // If we've already got the block, no need to try to fetch it again. if (cacheDataValid[tid] && block_PC == cacheDataPC[tid]) { @@ -570,7 +573,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid // Build request here. RequestPtr mem_req = new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, - fetch_PC, cpu->thread[tid]->contextId(), tid); + pc, cpu->thread[tid]->contextId(), tid); memReq[tid] = mem_req; @@ -645,6 +648,9 @@ DefaultFetch::doSquash(const TheISA::PCState &newPC, ThreadID tid) tid, newPC); pc[tid] = newPC; + fetchOffset[tid] = 0; + macroop[tid] = NULL; + predecoder.reset(); // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -957,6 +963,53 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid) return false; } +template +typename Impl::DynInstPtr +DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst, + StaticInstPtr curMacroop, TheISA::PCState thisPC, + TheISA::PCState nextPC, bool trace) +{ + // Get a sequence number. + InstSeqNum seq = cpu->getAndIncrementInstSeq(); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = + new DynInst(staticInst, thisPC, nextPC, seq, cpu); + instruction->setTid(tid); + + instruction->setASID(tid); + + instruction->setThreadState(cpu->thread[tid]); + + DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created " + "[sn:%lli]\n", tid, thisPC.instAddr(), + thisPC.microPC(), seq); + + DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, + instruction->staticInst-> + disassemble(thisPC.instAddr())); + +#if TRACING_ON + if (trace) { + instruction->traceData = + cpu->getTracer()->getInstRecord(curTick, cpu->tcBase(tid), + instruction->staticInst, thisPC, curMacroop); + } +#else + instruction->traceData = NULL; +#endif + + // Add instruction to the CPU's list of instructions. + instruction->setInstListIt(cpu->addInst(instruction)); + + // Write the instruction to the first slot in the queue + // that heads to decode. + assert(numInst < fetchWidth); + toDecode->insts[toDecode->size++] = instruction; + + return instruction; +} + template void DefaultFetch::fetch(bool &status_change) @@ -977,25 +1030,28 @@ DefaultFetch::fetch(bool &status_change) DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); // The current PC. - TheISA::PCState fetchPC = pc[tid]; + TheISA::PCState thisPC = pc[tid]; // Fault code for memory access. Fault fault = NoFault; + Addr pcOffset = fetchOffset[tid]; + Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + // If returning from the delay of a cache miss, then update the status // to running, otherwise do the cache access. Possibly move this up // to tick() function. if (fetchStatus[tid] == IcacheAccessComplete) { - DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", - tid); + DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",tid); fetchStatus[tid] = Running; status_change = true; } else if (fetchStatus[tid] == Running) { DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " - "instruction, starting at PC %s.\n", tid, fetchPC); + "instruction, starting at PC %#x.\n", tid, fetchAddr); - bool fetch_success = fetchCacheLine(fetchPC.instAddr(), fault, tid); + bool fetch_success = fetchCacheLine(fetchAddr, fault, tid, + thisPC.instAddr()); if (!fetch_success) { if (cacheBlocked) { ++icacheStallCycles; @@ -1033,143 +1089,133 @@ DefaultFetch::fetch(bool &status_change) return; } - TheISA::PCState nextPC = fetchPC; - - InstSeqNum inst_seq; - MachInst inst; - ExtMachInst ext_inst; + TheISA::PCState nextPC = thisPC; StaticInstPtr staticInst = NULL; - StaticInstPtr macroop = NULL; + StaticInstPtr curMacroop = macroop[tid]; if (fault == NoFault) { - //XXX Masking out pal mode bit. This will break x86. Alpha needs - //to pull the pal mode bit ouf ot the instruction address. - unsigned offset = (fetchPC.instAddr() & ~1) - cacheDataPC[tid]; - assert(offset < cacheBlkSize); // If the read of the first instruction was successful, then grab the // instructions from the rest of the cache line and put them into the // queue heading to decode. - DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " - "decode.\n",tid); + DPRINTF(Fetch, + "[tid:%i]: Adding instructions to queue to decode.\n", tid); // Need to keep track of whether or not a predicted branch // ended this fetch block. - bool predicted_branch = false; + bool predictedBranch = false; - while (offset < cacheBlkSize && - numInst < fetchWidth && - !predicted_branch) { + TheISA::MachInst *cacheInsts = + reinterpret_cast(cacheData[tid]); - // Make sure this is a valid index. - assert(offset <= cacheBlkSize - instSize); + const unsigned numInsts = cacheBlkSize / instSize; + unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; - if (!macroop) { - // Get the instruction from the array of the cache line. - inst = TheISA::gtoh(*reinterpret_cast - (&cacheData[tid][offset])); + // Loop through instruction memory from the cache. + while (blkOffset < numInsts && + numInst < fetchWidth && + !predictedBranch) { + + // If we need to process more memory, do it now. + if (!curMacroop && !predecoder.extMachInstReady()) { + if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { + // Walk past any annulled delay slot instructions. + Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; + while (fetchAddr != pcAddr && blkOffset < numInsts) { + blkOffset++; + fetchAddr += instSize; + } + if (blkOffset >= numInsts) + break; + } + MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); predecoder.setTC(cpu->thread[tid]->getTC()); - predecoder.moreBytes(fetchPC, fetchPC.instAddr(), inst); + predecoder.moreBytes(thisPC, fetchAddr, inst); - ext_inst = predecoder.getExtMachInst(fetchPC); - staticInst = StaticInstPtr(ext_inst, fetchPC.instAddr()); - if (staticInst->isMacroop()) - macroop = staticInst; + if (predecoder.needMoreBytes()) { + blkOffset++; + fetchAddr += instSize; + pcOffset += instSize; + } } + + // Extract as many instructions and/or microops as we can from + // the memory we've processed so far. do { - if (macroop) { - staticInst = macroop->fetchMicroop(fetchPC.microPC()); + if (!curMacroop) { + if (predecoder.extMachInstReady()) { + ExtMachInst extMachInst; + + extMachInst = predecoder.getExtMachInst(thisPC); + pcOffset = 0; + staticInst = StaticInstPtr(extMachInst, + thisPC.instAddr()); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + if (staticInst->isMacroop()) + curMacroop = staticInst; + } else { + // We need more bytes for this instruction. + break; + } + } + if (curMacroop) { + staticInst = curMacroop->fetchMicroop(thisPC.microPC()); if (staticInst->isLastMicroop()) - macroop = NULL; + curMacroop = NULL; } - // Get a sequence number. - inst_seq = cpu->getAndIncrementInstSeq(); - - // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(staticInst, - fetchPC, nextPC, - inst_seq, cpu); - instruction->setTid(tid); - - instruction->setASID(tid); - - instruction->setThreadState(cpu->thread[tid]); - - DPRINTF(Fetch, "[tid:%i]: Instruction PC %s (%d) created " - "[sn:%lli]\n", tid, instruction->pcState(), - instruction->microPC(), inst_seq); + DynInstPtr instruction = + buildInst(tid, staticInst, curMacroop, + thisPC, nextPC, true); - //DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst); + numInst++; - DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid, - instruction->staticInst-> - disassemble(fetchPC.instAddr())); - -#if TRACING_ON - instruction->traceData = - cpu->getTracer()->getInstRecord(curTick, cpu->tcBase(tid), - instruction->staticInst, fetchPC, macroop); -#else - instruction->traceData = NULL; -#endif + nextPC = thisPC; // If we're branching after this instruction, quite fetching // from the same block then. - predicted_branch = fetchPC.branching(); - predicted_branch |= + predictedBranch |= thisPC.branching(); + predictedBranch |= lookupAndUpdateNextPC(instruction, nextPC); - if (predicted_branch) { - DPRINTF(Fetch, "Branch detected with PC = %s\n", fetchPC); + if (predictedBranch) { + DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); } - // Add instruction to the CPU's list of instructions. - instruction->setInstListIt(cpu->addInst(instruction)); - - // Write the instruction to the first slot in the queue - // that heads to decode. - toDecode->insts[numInst] = instruction; - - toDecode->size++; - - // Increment stat of fetched instructions. - ++fetchedInsts; - // Move to the next instruction, unless we have a branch. - fetchPC = nextPC; + thisPC = nextPC; if (instruction->isQuiesce()) { - DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!", - curTick); + DPRINTF(Fetch, + "Quiesce instruction encountered, halting fetch!"); fetchStatus[tid] = QuiescePending; - ++numInst; status_change = true; break; } - - ++numInst; - } while (staticInst->isMicroop() && - !staticInst->isLastMicroop() && + } while ((curMacroop || predecoder.extMachInstReady()) && numInst < fetchWidth); - //XXX Masking out pal mode bit. - offset = (fetchPC.instAddr() & ~1) - cacheDataPC[tid]; } - if (predicted_branch) { + if (predictedBranch) { DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " "instruction encountered.\n", tid); } else if (numInst >= fetchWidth) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " "for this cycle.\n", tid); - } else if (offset >= cacheBlkSize) { + } else if (blkOffset >= cacheBlkSize) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " "block.\n", tid); } } + macroop[tid] = curMacroop; + fetchOffset[tid] = pcOffset; + if (numInst > 0) { wroteToTimeBuffer = true; } @@ -1188,42 +1234,24 @@ DefaultFetch::fetch(bool &status_change) // Send the fault to commit. This thread will not do anything // until commit handles the fault. The only other way it can - // wake up is if a squash comes along and changes the PC. - assert(numInst < fetchWidth); - // Get a sequence number. - inst_seq = cpu->getAndIncrementInstSeq(); - // We will use a nop in order to carry the fault. - ext_inst = TheISA::NoopMachInst; - - // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, fetchPC, nextPC, - inst_seq, cpu); - TheISA::advancePC(nextPC, instruction->staticInst); - instruction->setPredTarg(nextPC); - instruction->setTid(tid); - - instruction->setASID(tid); + // wake up is if a squash comes along and changes the PC. Send the + // fault on a dummy nop. + staticInst = StaticInstPtr(TheISA::NoopMachInst, thisPC.instAddr()); - instruction->setThreadState(cpu->thread[tid]); - - instruction->traceData = NULL; - - instruction->setInstListIt(cpu->addInst(instruction)); + DynInstPtr instruction = + buildInst(tid, staticInst, NULL, thisPC, nextPC, false); + TheISA::advancePC(nextPC, staticInst); + instruction->setPredTarg(nextPC); instruction->fault = fault; - toDecode->insts[numInst] = instruction; - toDecode->size++; - - wroteToTimeBuffer = true; - DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid); fetchStatus[tid] = TrapPending; status_change = true; DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s", - tid, fault->name(), pc[tid]); + tid, fault->name(), thisPC); } } -- cgit v1.2.3