diff options
Diffstat (limited to 'src/cpu/trace/trace_cpu.cc')
-rw-r--r-- | src/cpu/trace/trace_cpu.cc | 1454 |
1 files changed, 1454 insertions, 0 deletions
diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc new file mode 100644 index 000000000..2e989f6ff --- /dev/null +++ b/src/cpu/trace/trace_cpu.cc @@ -0,0 +1,1454 @@ +/* + * Copyright (c) 2013 - 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Radhika Jagtap + * Andreas Hansson + * Thomas Grass + */ + +#include "cpu/trace/trace_cpu.hh" + +#include "sim/sim_exit.hh" + +// Declare and initialize the static counter for number of trace CPUs. +int TraceCPU::numTraceCPUs = 0; + +TraceCPU::TraceCPU(TraceCPUParams *params) + : BaseCPU(params), + icachePort(this), + dcachePort(this), + instMasterID(params->system->getMasterId(name() + ".inst")), + dataMasterID(params->system->getMasterId(name() + ".data")), + instTraceFile(params->instTraceFile), + dataTraceFile(params->dataTraceFile), + icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile), + dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile, + params->sizeROB, params->sizeStoreBuffer, + params->sizeLoadBuffer), + icacheNextEvent(this), + dcacheNextEvent(this), + oneTraceComplete(false), + firstFetchTick(0), + execCompleteEvent(nullptr) +{ + // Increment static counter for number of Trace CPUs. + ++TraceCPU::numTraceCPUs; + + // Check that the python parameters for sizes of ROB, store buffer and load + // buffer do not overflow the corresponding C++ variables. + fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the " + "max. value of %d.\n", params->sizeROB, UINT16_MAX); + fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d " + "exceeds the max. value of %d.\n", params->sizeROB, + UINT16_MAX); + fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to" + " %d exceeds the max. value of %d.\n", + params->sizeLoadBuffer, UINT16_MAX); +} + +TraceCPU::~TraceCPU() +{ + +} + +TraceCPU* +TraceCPUParams::create() +{ + return new TraceCPU(this); +} + +void +TraceCPU::takeOverFrom(BaseCPU *oldCPU) +{ + // Unbind the ports of the old CPU and bind the ports of the TraceCPU. + assert(!getInstPort().isConnected()); + assert(oldCPU->getInstPort().isConnected()); + BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort(); + oldCPU->getInstPort().unbind(); + getInstPort().bind(inst_peer_port); + + assert(!getDataPort().isConnected()); + assert(oldCPU->getDataPort().isConnected()); + BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort(); + oldCPU->getDataPort().unbind(); + getDataPort().bind(data_peer_port); +} + +void +TraceCPU::init() +{ + DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"." + "\n", instTraceFile); + DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", + dataTraceFile); + + BaseCPU::init(); + + // Get the send tick of the first instruction read request and schedule + // icacheNextEvent at that tick. + Tick first_icache_tick = icacheGen.init(); + schedule(icacheNextEvent, first_icache_tick); + + // Get the send tick of the first data read/write request and schedule + // dcacheNextEvent at that tick. + Tick first_dcache_tick = dcacheGen.init(); + schedule(dcacheNextEvent, first_dcache_tick); + + // The static counter for number of Trace CPUs is correctly set at this + // point so create an event and pass it. + execCompleteEvent = new CountedExitEvent("end of all traces reached.", + numTraceCPUs); + // Save the first fetch request tick to dump it as tickOffset + firstFetchTick = first_icache_tick; +} + +void +TraceCPU::schedIcacheNext() +{ + DPRINTF(TraceCPUInst, "IcacheGen event.\n"); + + // Try to send the current packet or a retry packet if there is one + bool sched_next = icacheGen.tryNext(); + // If packet sent successfully, schedule next event + if (sched_next) { + DPRINTF(TraceCPUInst, "Scheduling next icacheGen event " + "at %d.\n", curTick() + icacheGen.tickDelta()); + schedule(icacheNextEvent, curTick() + icacheGen.tickDelta()); + ++numSchedIcacheEvent; + } else { + // check if traceComplete. If not, do nothing because sending failed + // and next event will be scheduled via RecvRetry() + if (icacheGen.isTraceComplete()) { + // If this is the first trace to complete, set the variable. If it + // is already set then both traces are complete to exit sim. + checkAndSchedExitEvent(); + } + } + return; +} + +void +TraceCPU::schedDcacheNext() +{ + DPRINTF(TraceCPUData, "DcacheGen event.\n"); + + dcacheGen.execute(); + if (dcacheGen.isExecComplete()) { + checkAndSchedExitEvent(); + } +} + +void +TraceCPU::checkAndSchedExitEvent() +{ + if (!oneTraceComplete) { + oneTraceComplete = true; + } else { + // Schedule event to indicate execution is complete as both + // instruction and data access traces have been played back. + inform("%s: Execution complete.\n", name()); + + // Record stats which are computed at the end of simulation + tickOffset = firstFetchTick; + numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); + numOps = dcacheGen.getMicroOpCount(); + schedule(*execCompleteEvent, curTick()); + } +} + +void +TraceCPU::regStats() +{ + + BaseCPU::regStats(); + + numSchedDcacheEvent + .name(name() + ".numSchedDcacheEvent") + .desc("Number of events scheduled to trigger data request generator") + ; + + numSchedIcacheEvent + .name(name() + ".numSchedIcacheEvent") + .desc("Number of events scheduled to trigger instruction request generator") + ; + + numOps + .name(name() + ".numOps") + .desc("Number of micro-ops simulated by the Trace CPU") + ; + + cpi + .name(name() + ".cpi") + .desc("Cycles per micro-op used as a proxy for CPI") + .precision(6) + ; + cpi = numCycles/numOps; + + tickOffset + .name(name() + ".tickOffset") + .desc("The first execution tick for the root node of elastic traces") + ; + + icacheGen.regStats(); + dcacheGen.regStats(); +} + +void +TraceCPU::ElasticDataGen::regStats() +{ + using namespace Stats; + + maxDependents + .name(name() + ".maxDependents") + .desc("Max number of dependents observed on a node") + ; + + maxReadyListSize + .name(name() + ".maxReadyListSize") + .desc("Max size of the ready list observed") + ; + + numSendAttempted + .name(name() + ".numSendAttempted") + .desc("Number of first attempts to send a request") + ; + + numSendSucceeded + .name(name() + ".numSendSucceeded") + .desc("Number of successful first attempts") + ; + + numSendFailed + .name(name() + ".numSendFailed") + .desc("Number of failed first attempts") + ; + + numRetrySucceeded + .name(name() + ".numRetrySucceeded") + .desc("Number of successful retries") + ; + + numSplitReqs + .name(name() + ".numSplitReqs") + .desc("Number of split requests") + ; + + numSOLoads + .name(name() + ".numSOLoads") + .desc("Number of strictly ordered loads") + ; + + numSOStores + .name(name() + ".numSOStores") + .desc("Number of strictly ordered stores") + ; + + dataLastTick + .name(name() + ".dataLastTick") + .desc("Last tick simulated from the elastic data trace") + ; +} + +Tick +TraceCPU::ElasticDataGen::init() +{ + DPRINTF(TraceCPUData, "Initializing data memory request generator " + "DcacheGen: elastic issue with retry.\n"); + + if (!readNextWindow()) + panic("Trace has %d elements. It must have at least %d elements.\n", + depGraph.size(), 2 * windowSize); + DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n", + depGraph.size()); + + if (!readNextWindow()) + panic("Trace has %d elements. It must have at least %d elements.\n", + depGraph.size(), 2 * windowSize); + DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n", + depGraph.size()); + + // Print readyList + if (DTRACE(TraceCPUData)) { + printReadyList(); + } + auto free_itr = readyList.begin(); + DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli" + " is %d.\n", free_itr->seqNum, free_itr->execTick); + // Return the execute tick of the earliest ready node so that an event + // can be scheduled to call execute() + return (free_itr->execTick); +} + +void +TraceCPU::ElasticDataGen::exit() +{ + trace.reset(); +} + +bool +TraceCPU::ElasticDataGen::readNextWindow() +{ + + // Read and add next window + DPRINTF(TraceCPUData, "Reading next window from file.\n"); + + if (traceComplete) { + // We are at the end of the file, thus we have no more records. + // Return false. + return false; + } + + DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n", + depGraph.size()); + + uint32_t num_read = 0; + while (num_read != windowSize) { + + // Create a new graph node + GraphNode* new_node = new GraphNode; + + // Read the next line to get the next record. If that fails then end of + // trace has been reached and traceComplete needs to be set in addition + // to returning false. + if (!trace.read(new_node)) { + DPRINTF(TraceCPUData, "\tTrace complete!\n"); + traceComplete = true; + return false; + } + + // Annotate the ROB dependencies of the new node onto the parent nodes. + addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep); + // Annotate the register dependencies of the new node onto the parent + // nodes. + addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep); + + num_read++; + // Add to map + depGraph[new_node->seqNum] = new_node; + if (new_node->numRobDep == 0 && new_node->numRegDep == 0) { + // Source dependencies are already complete, check if resources + // are available and issue. The execution time is approximated + // to current time plus the computational delay. + checkAndIssue(new_node); + } + } + + DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n", + depGraph.size()); + return true; +} + +template<typename T> void +TraceCPU::ElasticDataGen::addDepsOnParent(GraphNode *new_node, + T& dep_array, uint8_t& num_dep) +{ + for (auto& a_dep : dep_array) { + // The convention is to set the dependencies starting with the first + // index in the ROB and register dependency arrays. Thus, when we reach + // a dependency equal to the initialisation value of zero, we know have + // iterated over all dependencies and can break. + if (a_dep == 0) + break; + // We look up the valid dependency, i.e. the parent of this node + auto parent_itr = depGraph.find(a_dep); + if (parent_itr != depGraph.end()) { + // If the parent is found, it is yet to be executed. Append a + // pointer to the new node to the dependents list of the parent + // node. + parent_itr->second->dependents.push_back(new_node); + auto num_depts = parent_itr->second->dependents.size(); + maxDependents = std::max<double>(num_depts, maxDependents.value()); + } else { + // The dependency is not found in the graph. So consider + // the execution of the parent is complete, i.e. remove this + // dependency. + a_dep = 0; + num_dep--; + } + } +} + +void +TraceCPU::ElasticDataGen::execute() +{ + DPRINTF(TraceCPUData, "Execute start occupancy:\n"); + DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " + "depFreeQueue = %d ,", depGraph.size(), readyList.size(), + depFreeQueue.size()); + hwResource.printOccupancy(); + + // Read next window to make sure that dependents of all dep-free nodes + // are in the depGraph + if (nextRead) { + readNextWindow(); + nextRead = false; + } + + // First attempt to issue the pending dependency-free nodes held + // in depFreeQueue. If resources have become available for a node, + // then issue it, i.e. add the node to readyList. + while (!depFreeQueue.empty()) { + if (checkAndIssue(depFreeQueue.front(), false)) { + DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num " + "%lli.\n", (depFreeQueue.front())->seqNum); + depFreeQueue.pop(); + } else { + break; + } + } + // Proceed to execute from readyList + auto graph_itr = depGraph.begin(); + auto free_itr = readyList.begin(); + // Iterate through readyList until the next free node has its execute + // tick later than curTick or the end of readyList is reached + while (free_itr->execTick <= curTick() && free_itr != readyList.end()) { + + // Get pointer to the node to be executed + graph_itr = depGraph.find(free_itr->seqNum); + assert(graph_itr != depGraph.end()); + GraphNode* node_ptr = graph_itr->second; + + // If there is a retryPkt send that else execute the load + if (retryPkt) { + // The retryPkt must be the request that was created by the + // first node in the readyList. + if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) { + panic("Retry packet's seqence number does not match " + "the first node in the readyList.\n"); + } + if (port.sendTimingReq(retryPkt)) { + ++numRetrySucceeded; + retryPkt = nullptr; + } + } else if (node_ptr->isLoad || node_ptr->isStore) { + // If there is no retryPkt, attempt to send a memory request in + // case of a load or store node. If the send fails, executeMemReq() + // returns a packet pointer, which we save in retryPkt. In case of + // a comp node we don't do anything and simply continue as if the + // execution of the comp node succedded. + retryPkt = executeMemReq(node_ptr); + } + // If the retryPkt or a new load/store node failed, we exit from here + // as a retry from cache will bring the control to execute(). The + // first node in readyList then, will be the failed node. + if (retryPkt) { + break; + } + + // Proceed to remove dependencies for the successfully executed node. + // If it is a load which is not strictly ordered and we sent a + // request for it successfully, we do not yet mark any register + // dependencies complete. But as per dependency modelling we need + // to mark ROB dependencies of load and non load/store nodes which + // are based on successful sending of the load as complete. + if (node_ptr->isLoad && !node_ptr->isStrictlyOrdered()) { + // If execute succeeded mark its dependents as complete + DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up " + "dependents..\n", node_ptr->seqNum); + + auto child_itr = (node_ptr->dependents).begin(); + while (child_itr != (node_ptr->dependents).end()) { + // ROB dependency of a store on a load must not be removed + // after load is sent but after response is received + if (!(*child_itr)->isStore && + (*child_itr)->removeRobDep(node_ptr->seqNum)) { + + // Check if the child node has become dependency free + if ((*child_itr)->numRobDep == 0 && + (*child_itr)->numRegDep == 0) { + + // Source dependencies are complete, check if + // resources are available and issue + checkAndIssue(*child_itr); + } + // Remove this child for the sent load and point to new + // location of the element following the erased element + child_itr = node_ptr->dependents.erase(child_itr); + } else { + // This child is not dependency-free, point to the next + // child + child_itr++; + } + } + } else { + // If it is a strictly ordered load mark its dependents as complete + // as we do not send a request for this case. If it is a store or a + // comp node we also mark all its dependents complete. + DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking" + " up dependents..\n", node_ptr->seqNum); + + for (auto child : node_ptr->dependents) { + // If the child node is dependency free removeDepOnInst() + // returns true. + if (child->removeDepOnInst(node_ptr->seqNum)) { + // Source dependencies are complete, check if resources + // are available and issue + checkAndIssue(child); + } + } + } + + // After executing the node, remove from readyList and delete node. + readyList.erase(free_itr); + // If it is a cacheable load which was sent, don't delete + // just yet. Delete it in completeMemAccess() after the + // response is received. If it is an strictly ordered + // load, it was not sent and all dependencies were simply + // marked complete. Thus it is safe to delete it. For + // stores and non load/store nodes all dependencies were + // marked complete so it is safe to delete it. + if (!node_ptr->isLoad || node_ptr->isStrictlyOrdered()) { + // Release all resources occupied by the completed node + hwResource.release(node_ptr); + // clear the dynamically allocated set of dependents + (node_ptr->dependents).clear(); + // delete node + delete node_ptr; + // remove from graph + depGraph.erase(graph_itr); + } + // Point to first node to continue to next iteration of while loop + free_itr = readyList.begin(); + } // end of while loop + + // Print readyList, sizes of queues and resource status after updating + if (DTRACE(TraceCPUData)) { + printReadyList(); + DPRINTF(TraceCPUData, "Execute end occupancy:\n"); + DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, " + "depFreeQueue = %d ,", depGraph.size(), readyList.size(), + depFreeQueue.size()); + hwResource.printOccupancy(); + } + + if (retryPkt) { + DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry" + "event from the cache for seq. num %lli.\n", + retryPkt->req->getReqInstSeqNum()); + return; + } + // If the size of the dependency graph is less than the dependency window + // then read from the trace file to populate the graph next time we are in + // execute. + if (depGraph.size() < windowSize && !traceComplete) + nextRead = true; + + // If cache is not blocked, schedule an event for the first execTick in + // readyList else retry from cache will schedule the event. If the ready + // list is empty then check if the next pending node has resources + // available to issue. If yes, then schedule an event for the next cycle. + if (!readyList.empty()) { + Tick next_event_tick = std::max(readyList.begin()->execTick, + curTick()); + DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", + next_event_tick); + owner.schedDcacheNextEvent(next_event_tick); + } else if (readyList.empty() && !depFreeQueue.empty() && + hwResource.isAvailable(depFreeQueue.front())) { + DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", + owner.clockEdge(Cycles(1))); + owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1))); + } + + // If trace is completely read, readyList is empty and depGraph is empty, + // set execComplete to true + if (depGraph.empty() && readyList.empty() && traceComplete && + !hwResource.awaitingResponse()) { + DPRINTF(TraceCPUData, "\tExecution Complete!\n"); + execComplete = true; + dataLastTick = curTick(); + } +} + +PacketPtr +TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) +{ + + DPRINTF(TraceCPUData, "Executing memory request %lli (addr %d, pc %#x, " + "size %d, flags %d).\n", node_ptr->seqNum, node_ptr->addr, + node_ptr->pc, node_ptr->size, node_ptr->flags); + + // If the request is strictly ordered, do not send it. Just return nullptr + // as if it was succesfully sent. + if (node_ptr->isStrictlyOrdered()) { + node_ptr->isLoad ? ++numSOLoads : ++numSOStores; + DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n", + node_ptr->seqNum); + return nullptr; + } + + // Check if the request spans two cache lines as this condition triggers + // an assert fail in the L1 cache. If it does then truncate the size to + // access only until the end of that line and ignore the remainder. The + // stat counting this is useful to keep a check on how frequently this + // happens. If required the code could be revised to mimick splitting such + // a request into two. + unsigned blk_size = owner.cacheLineSize(); + Addr blk_offset = (node_ptr->addr & (Addr)(blk_size - 1)); + if (!(blk_offset + node_ptr->size <= blk_size)) { + node_ptr->size = blk_size - blk_offset; + ++numSplitReqs; + } + + // Create a request and the packet containing request + Request* req = new Request(node_ptr->addr, node_ptr->size, node_ptr->flags, + masterID, node_ptr->seqNum, + ContextID(0), ThreadID(0)); + req->setPC(node_ptr->pc); + PacketPtr pkt; + uint8_t* pkt_data = new uint8_t[req->getSize()]; + if (node_ptr->isLoad) { + pkt = Packet::createRead(req); + } else { + pkt = Packet::createWrite(req); + memset(pkt_data, 0xA, req->getSize()); + } + pkt->dataDynamic(pkt_data); + + // Call MasterPort method to send a timing request for this packet + bool success = port.sendTimingReq(pkt); + ++numSendAttempted; + + if (!success) { + // If it fails, return the packet to retry when a retry is signalled by + // the cache + ++numSendFailed; + DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n"); + return pkt; + } else { + // It is succeeds, return nullptr + ++numSendSucceeded; + return nullptr; + } +} + +bool +TraceCPU::ElasticDataGen::checkAndIssue(const GraphNode* node_ptr, bool first) +{ + // Assert the node is dependency-free + assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0); + + // If this is the first attempt, print a debug message to indicate this. + if (first) { + DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now" + " dependency free.\n", node_ptr->seqNum, + node_ptr->isLoad ? "L" : (node_ptr->isStore ? "S" : "C"), + node_ptr->robNum); + } + + // Check if resources are available to issue the specific node + if (hwResource.isAvailable(node_ptr)) { + // If resources are free only then add to readyList + DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding" + " to readyList, occupying resources.\n", node_ptr->seqNum); + // Compute the execute tick by adding the compute delay for the node + // and add the ready node to the ready list + addToSortedReadyList(node_ptr->seqNum, + owner.clockEdge() + node_ptr->compDelay); + // Account for the resources taken up by this issued node. + hwResource.occupy(node_ptr); + return true; + + } else { + if (first) { + // Although dependencies are complete, resources are not available. + DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli." + " Adding to depFreeQueue.\n", node_ptr->seqNum); + depFreeQueue.push(node_ptr); + } else { + DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. " + "Still pending issue.\n", node_ptr->seqNum); + } + return false; + } +} + +void +TraceCPU::ElasticDataGen::completeMemAccess(PacketPtr pkt) +{ + // Release the resources for this completed node. + if (pkt->isWrite()) { + // Consider store complete. + hwResource.releaseStoreBuffer(); + // If it is a store response then do nothing since we do not model + // dependencies on store completion in the trace. But if we were + // blocking execution due to store buffer fullness, we need to schedule + // an event and attempt to progress. + } else { + // If it is a load response then release the dependents waiting on it. + // Get pointer to the completed load + auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum()); + assert(graph_itr != depGraph.end()); + GraphNode* node_ptr = graph_itr->second; + + // Release resources occupied by the load + hwResource.release(node_ptr); + + DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up" + " dependents..\n", node_ptr->seqNum); + + for (auto child : node_ptr->dependents) { + if (child->removeDepOnInst(node_ptr->seqNum)) { + checkAndIssue(child); + } + } + + // clear the dynamically allocated set of dependents + (node_ptr->dependents).clear(); + // delete node + delete node_ptr; + // remove from graph + depGraph.erase(graph_itr); + } + + if (DTRACE(TraceCPUData)) { + printReadyList(); + } + + // If the size of the dependency graph is less than the dependency window + // then read from the trace file to populate the graph next time we are in + // execute. + if (depGraph.size() < windowSize && !traceComplete) + nextRead = true; + + // If not waiting for retry, attempt to schedule next event + if (!retryPkt) { + // We might have new dep-free nodes in the list which will have execute + // tick greater than or equal to curTick. But a new dep-free node might + // have its execute tick earlier. Therefore, attempt to reschedule. It + // could happen that the readyList is empty and we got here via a + // last remaining response. So, either the trace is complete or there + // are pending nodes in the depFreeQueue. The checking is done in the + // execute() control flow, so schedule an event to go via that flow. + Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) : + std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1))); + DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n", + next_event_tick); + owner.schedDcacheNextEvent(next_event_tick); + } +} + +void +TraceCPU::ElasticDataGen::addToSortedReadyList(NodeSeqNum seq_num, + Tick exec_tick) +{ + ReadyNode ready_node; + ready_node.seqNum = seq_num; + ready_node.execTick = exec_tick; + + // Iterator to readyList + auto itr = readyList.begin(); + + // If the readyList is empty, simply insert the new node at the beginning + // and return + if (itr == readyList.end()) { + readyList.insert(itr, ready_node); + maxReadyListSize = std::max<double>(readyList.size(), + maxReadyListSize.value()); + return; + } + + // If the new node has its execution tick equal to the first node in the + // list then go to the next node. If the first node in the list failed + // to execute, its position as the first is thus maintained. + if (retryPkt) + if (retryPkt->req->getReqInstSeqNum() == itr->seqNum) + itr++; + + // Increment the iterator and compare the node pointed to by it to the new + // node till the position to insert the new node is found. + bool found = false; + while (!found && itr != readyList.end()) { + // If the execution tick of the new node is less than the node then + // this is the position to insert + if (exec_tick < itr->execTick) + found = true; + // If the execution tick of the new node is equal to the node then + // sort in ascending order of sequence numbers + else if (exec_tick == itr->execTick) { + // If the sequence number of the new node is less than the node + // then this is the position to insert + if (seq_num < itr->seqNum) + found = true; + // Else go to next node + else + itr++; + } + // If the execution tick of the new node is greater than the node then + // go to the next node + else + itr++; + } + readyList.insert(itr, ready_node); + // Update the stat for max size reached of the readyList + maxReadyListSize = std::max<double>(readyList.size(), + maxReadyListSize.value()); +} + +void +TraceCPU::ElasticDataGen::printReadyList() { + + auto itr = readyList.begin(); + if (itr == readyList.end()) { + DPRINTF(TraceCPUData, "readyList is empty.\n"); + return; + } + DPRINTF(TraceCPUData, "Printing readyList:\n"); + while (itr != readyList.end()) { + auto graph_itr = depGraph.find(itr->seqNum); + GraphNode* node_ptr M5_VAR_USED = graph_itr->second; + DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum, + node_ptr->isLoad ? "L" : (node_ptr->isStore ? "S" : "C"), + itr->execTick); + itr++; + } +} + +TraceCPU::ElasticDataGen::HardwareResource::HardwareResource( + uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) + : sizeROB(max_rob), + sizeStoreBuffer(max_stores), + sizeLoadBuffer(max_loads), + oldestInFlightRobNum(UINT64_MAX), + numInFlightLoads(0), + numInFlightStores(0) +{} + +void +TraceCPU::ElasticDataGen::HardwareResource::occupy(const GraphNode* new_node) +{ + // Occupy ROB entry for the issued node + // Merely maintain the oldest node, i.e. numerically least robNum by saving + // it in the variable oldestInFLightRobNum. + inFlightNodes[new_node->seqNum] = new_node->robNum; + oldestInFlightRobNum = inFlightNodes.begin()->second; + + // Occupy Load/Store Buffer entry for the issued node if applicable + if (new_node->isLoad) { + ++numInFlightLoads; + } else if (new_node->isStore) { + ++numInFlightStores; + } // else if it is a non load/store node, no buffer entry is occupied + + printOccupancy(); +} + +void +TraceCPU::ElasticDataGen::HardwareResource::release(const GraphNode* done_node) +{ + assert(!inFlightNodes.empty()); + DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n", + done_node->seqNum); + + assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end()); + inFlightNodes.erase(done_node->seqNum); + + if (inFlightNodes.empty()) { + // If we delete the only in-flight node and then the + // oldestInFlightRobNum is set to it's initialized (max) value. + oldestInFlightRobNum = UINT64_MAX; + } else { + // Set the oldest in-flight node rob number equal to the first node in + // the inFlightNodes since that will have the numerically least value. + oldestInFlightRobNum = inFlightNodes.begin()->second; + } + + DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, " + "oldestInFlightRobNum = %d\n", inFlightNodes.size(), + oldestInFlightRobNum); + + // A store is considered complete when a request is sent, thus ROB entry is + // freed. But it occupies an entry in the Store Buffer until its response + // is received. A load is considered complete when a response is received, + // thus both ROB and Load Buffer entries can be released. + if (done_node->isLoad) { + assert(numInFlightLoads != 0); + --numInFlightLoads; + } + // For normal writes, we send the requests out and clear a store buffer + // entry on response. For writes which are strictly ordered, for e.g. + // writes to device registers, we do that within release() which is called + // when node is executed and taken off from readyList. + if (done_node->isStore && done_node->isStrictlyOrdered()) { + releaseStoreBuffer(); + } +} + +void +TraceCPU::ElasticDataGen::HardwareResource::releaseStoreBuffer() +{ + assert(numInFlightStores != 0); + --numInFlightStores; +} + +bool +TraceCPU::ElasticDataGen::HardwareResource::isAvailable( + const GraphNode* new_node) const +{ + uint16_t num_in_flight_nodes; + if (inFlightNodes.empty()) { + num_in_flight_nodes = 0; + DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" + " #in-flight nodes = 0", new_node->seqNum); + } else if (new_node->robNum > oldestInFlightRobNum) { + // This is the intuitive case where new dep-free node is younger + // instruction than the oldest instruction in-flight. Thus we make sure + // in_flight_nodes does not overflow. + num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum; + DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" + " #in-flight nodes = %d - %d = %d", new_node->seqNum, + new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes); + } else { + // This is the case where an instruction older than the oldest in- + // flight instruction becomes dep-free. Thus we must have already + // accounted for the entry in ROB for this new dep-free node. + // Immediately after this check returns true, oldestInFlightRobNum will + // be updated in occupy(). We simply let this node issue now. + num_in_flight_nodes = 0; + DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:" + " new oldestInFlightRobNum = %d, #in-flight nodes ignored", + new_node->seqNum, new_node->robNum); + } + DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n", + numInFlightLoads, sizeLoadBuffer, + numInFlightStores, sizeStoreBuffer); + // Check if resources are available to issue the specific node + if (num_in_flight_nodes >= sizeROB) { + return false; + } + if (new_node->isLoad && numInFlightLoads >= sizeLoadBuffer) { + return false; + } + if (new_node->isStore && numInFlightStores >= sizeStoreBuffer) { + return false; + } + return true; +} + +bool +TraceCPU::ElasticDataGen::HardwareResource::awaitingResponse() const { + // Return true if there is at least one read or write request in flight + return (numInFlightStores != 0 || numInFlightLoads != 0); +} + +void +TraceCPU::ElasticDataGen::HardwareResource::printOccupancy() { + DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, " + "LQ = %d/%d, SQ = %d/%d.\n", + oldestInFlightRobNum, + numInFlightLoads, sizeLoadBuffer, + numInFlightStores, sizeStoreBuffer); +} + +void +TraceCPU::FixedRetryGen::regStats() +{ + using namespace Stats; + + numSendAttempted + .name(name() + ".numSendAttempted") + .desc("Number of first attempts to send a request") + ; + + numSendSucceeded + .name(name() + ".numSendSucceeded") + .desc("Number of successful first attempts") + ; + + numSendFailed + .name(name() + ".numSendFailed") + .desc("Number of failed first attempts") + ; + + numRetrySucceeded + .name(name() + ".numRetrySucceeded") + .desc("Number of successful retries") + ; + + instLastTick + .name(name() + ".instLastTick") + .desc("Last tick simulated from the fixed inst trace") + ; +} + +Tick +TraceCPU::FixedRetryGen::init() +{ + DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator" + " IcacheGen: fixed issue with retry.\n"); + + if (nextExecute()) { + DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick); + return currElement.tick; + } else { + panic("Read of first message in the trace failed.\n"); + return MaxTick; + } +} + +bool +TraceCPU::FixedRetryGen::tryNext() +{ + // If there is a retry packet, try to send it + if (retryPkt) { + + DPRINTF(TraceCPUInst, "Trying to send retry packet.\n"); + + if (!port.sendTimingReq(retryPkt)) { + // Still blocked! This should never occur. + DPRINTF(TraceCPUInst, "Retry packet sending failed.\n"); + return false; + } + ++numRetrySucceeded; + } else { + + DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n"); + + // try sending current element + assert(currElement.isValid()); + + ++numSendAttempted; + + if (!send(currElement.addr, currElement.blocksize, + currElement.cmd, currElement.flags, currElement.pc)) { + DPRINTF(TraceCPUInst, "currElement sending failed.\n"); + ++numSendFailed; + // return false to indicate not to schedule next event + return false; + } else { + ++numSendSucceeded; + } + } + // If packet was sent successfully, either retryPkt or currElement, return + // true to indicate to schedule event at current Tick plus delta. If packet + // was sent successfully and there is no next packet to send, return false. + DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next " + "element.\n"); + retryPkt = nullptr; + // Read next element into currElement, currElement gets cleared so save the + // tick to calculate delta + Tick last_tick = currElement.tick; + if (nextExecute()) { + assert(currElement.tick >= last_tick); + delta = currElement.tick - last_tick; + } + return !traceComplete; +} + +void +TraceCPU::FixedRetryGen::exit() +{ + trace.reset(); +} + +bool +TraceCPU::FixedRetryGen::nextExecute() +{ + if (traceComplete) + // We are at the end of the file, thus we have no more messages. + // Return false. + return false; + + + //Reset the currElement to the default values + currElement.clear(); + + // Read the next line to get the next message. If that fails then end of + // trace has been reached and traceComplete needs to be set in addition + // to returning false. If successful then next message is in currElement. + if (!trace.read(&currElement)) { + traceComplete = true; + instLastTick = curTick(); + return false; + } + + DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n", + currElement.cmd.isRead() ? 'r' : 'w', + currElement.addr, + currElement.pc, + currElement.blocksize, + currElement.tick); + + return true; +} + +bool +TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd, + Request::FlagsType flags, Addr pc) +{ + + // Create new request + Request* req = new Request(addr, size, flags, masterID); + req->setPC(pc); + + // If this is not done it triggers assert in L1 cache for invalid contextId + req->setThreadContext(ContextID(0), ThreadID(0)); + + // Embed it in a packet + PacketPtr pkt = new Packet(req, cmd); + + uint8_t* pkt_data = new uint8_t[req->getSize()]; + pkt->dataDynamic(pkt_data); + + if (cmd.isWrite()) { + memset(pkt_data, 0xA, req->getSize()); + } + + // Call MasterPort method to send a timing request for this packet + bool success = port.sendTimingReq(pkt); + if (!success) { + // If it fails, save the packet to retry when a retry is signalled by + // the cache + retryPkt = pkt; + } + return success; +} + +void +TraceCPU::icacheRetryRecvd() +{ + // Schedule an event to go through the control flow in the same tick as + // retry is received + DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen" + " event @%lli.\n", curTick()); + schedule(icacheNextEvent, curTick()); +} + +void +TraceCPU::dcacheRetryRecvd() +{ + // Schedule an event to go through the execute flow in the same tick as + // retry is received + DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen" + " event @%lli.\n", curTick()); + schedule(dcacheNextEvent, curTick()); +} + +void +TraceCPU::schedDcacheNextEvent(Tick when) +{ + if (!dcacheNextEvent.scheduled()) { + DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n", + when); + schedule(dcacheNextEvent, when); + ++numSchedDcacheEvent; + } else if (when < dcacheNextEvent.when()) { + DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli" + " to %lli.\n", dcacheNextEvent.when(), when); + reschedule(dcacheNextEvent, when); + } + +} + +bool +TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) +{ + // All responses on the instruction fetch side are ignored. Simply delete + // the request and packet to free allocated memory + delete pkt->req; + delete pkt; + + return true; +} + +void +TraceCPU::IcachePort::recvReqRetry() +{ + owner->icacheRetryRecvd(); +} + +void +TraceCPU::dcacheRecvTimingResp(PacketPtr pkt) +{ + DPRINTF(TraceCPUData, "Received timing response from Dcache.\n"); + dcacheGen.completeMemAccess(pkt); +} + +bool +TraceCPU::DcachePort::recvTimingResp(PacketPtr pkt) +{ + // Handle the responses for data memory requests which is done inside the + // elastic data generator + owner->dcacheRecvTimingResp(pkt); + // After processing the response delete the request and packet to free + // memory + delete pkt->req; + delete pkt; + + return true; +} + +void +TraceCPU::DcachePort::recvReqRetry() +{ + owner->dcacheRetryRecvd(); +} + +TraceCPU::ElasticDataGen::InputStream::InputStream(const std::string& filename) + : trace(filename), + microOpCount(0) +{ + // Create a protobuf message for the header and read it from the stream + ProtoMessage::InstDepRecordHeader header_msg; + if (!trace.read(header_msg)) { + panic("Failed to read packet header from %s\n", filename); + + if (header_msg.tick_freq() != SimClock::Frequency) { + panic("Trace %s was recorded with a different tick frequency %d\n", + header_msg.tick_freq()); + } + } else { + // Assign window size equal to the field in the trace that was recorded + // when the data dependency trace was captured in the o3cpu model + windowSize = header_msg.window_size(); + } +} + +void +TraceCPU::ElasticDataGen::InputStream::reset() +{ + trace.reset(); +} + +bool +TraceCPU::ElasticDataGen::InputStream::read(GraphNode* element) +{ + ProtoMessage::InstDepRecord pkt_msg; + if (trace.read(pkt_msg)) { + // Required fields + element->seqNum = pkt_msg.seq_num(); + element->isLoad = pkt_msg.load(); + element->isStore = pkt_msg.store(); + element->compDelay = pkt_msg.comp_delay(); + + // Repeated field robDepList + element->clearRobDep(); + assert((pkt_msg.rob_dep()).size() <= element->maxRobDep); + for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) { + element->robDep[element->numRobDep] = pkt_msg.rob_dep(i); + element->numRobDep += 1; + } + + // Repeated field + element->clearRegDep(); + assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs); + for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) { + // There is a possibility that an instruction has both, a register + // and order dependency on an instruction. In such a case, the + // register dependency is omitted + bool duplicate = false; + for (int j = 0; j < element->numRobDep; j++) { + duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]); + } + if (!duplicate) { + element->regDep[element->numRegDep] = pkt_msg.reg_dep(i); + element->numRegDep += 1; + } + } + + // Optional fields + if (pkt_msg.has_addr()) + element->addr = pkt_msg.addr(); + else + element->addr = 0; + + if (pkt_msg.has_size()) + element->size = pkt_msg.size(); + else + element->size = 0; + + if (pkt_msg.has_flags()) + element->flags = pkt_msg.flags(); + else + element->flags = 0; + + if (pkt_msg.has_pc()) + element->pc = pkt_msg.pc(); + else + element->pc = 0; + + // ROB occupancy number + ++microOpCount; + if (pkt_msg.has_weight()) { + microOpCount += pkt_msg.weight(); + } + element->robNum = microOpCount; + return true; + } + + // We have reached the end of the file + return false; +} + +bool +TraceCPU::ElasticDataGen::GraphNode::removeRegDep(NodeSeqNum reg_dep) +{ + for (auto& own_reg_dep : regDep) { + if (own_reg_dep == reg_dep) { + // If register dependency is found, make it zero and return true + own_reg_dep = 0; + --numRegDep; + assert(numRegDep >= 0); + DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli " + "done.\n", seqNum, reg_dep); + return true; + } + } + + // Return false if the dependency is not found + return false; +} + +bool +TraceCPU::ElasticDataGen::GraphNode::removeRobDep(NodeSeqNum rob_dep) +{ + for (auto& own_rob_dep : robDep) { + if (own_rob_dep == rob_dep) { + // If the rob dependency is found, make it zero and return true + own_rob_dep = 0; + --numRobDep; + assert(numRobDep >= 0); + DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli " + "done.\n", seqNum, rob_dep); + return true; + } + } + return false; +} + +void +TraceCPU::ElasticDataGen::GraphNode::clearRegDep() { + for (auto& own_reg_dep : regDep) { + own_reg_dep = 0; + } + numRegDep = 0; +} + +void +TraceCPU::ElasticDataGen::GraphNode::clearRobDep() { + for (auto& own_rob_dep : robDep) { + own_rob_dep = 0; + } + numRobDep = 0; +} + +bool +TraceCPU::ElasticDataGen::GraphNode::removeDepOnInst(NodeSeqNum done_seq_num) +{ + // If it is an rob dependency then remove it + if (!removeRobDep(done_seq_num)) { + // If it is not an rob dependency then it must be a register dependency + // If the register dependency is not found, it violates an assumption + // and must be caught by assert. + bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num); + assert(regdep_found); + } + // Return true if the node is dependency free + return (numRobDep == 0 && numRegDep == 0); +} + +void +TraceCPU::ElasticDataGen::GraphNode::writeElementAsTrace() const +{ + DPRINTFR(TraceCPUData, "%lli", seqNum); + DPRINTFR(TraceCPUData, ",%s", (isLoad ? "True" : "False")); + DPRINTFR(TraceCPUData, ",%s", (isStore ? "True" : "False")); + if (isLoad || isStore) { + DPRINTFR(TraceCPUData, ",%i", addr); + DPRINTFR(TraceCPUData, ",%i", size); + DPRINTFR(TraceCPUData, ",%i", flags); + } + DPRINTFR(TraceCPUData, ",%lli", compDelay); + int i = 0; + DPRINTFR(TraceCPUData, "robDep:"); + while (robDep[i] != 0) { + DPRINTFR(TraceCPUData, ",%lli", robDep[i]); + i++; + } + i = 0; + DPRINTFR(TraceCPUData, "regDep:"); + while (regDep[i] != 0) { + DPRINTFR(TraceCPUData, ",%lli", regDep[i]); + i++; + } + auto child_itr = dependents.begin(); + DPRINTFR(TraceCPUData, "dependents:"); + while (child_itr != dependents.end()) { + DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum); + child_itr++; + } + + DPRINTFR(TraceCPUData, "\n"); +} + +TraceCPU::FixedRetryGen::InputStream::InputStream(const std::string& filename) + : trace(filename) +{ + // Create a protobuf message for the header and read it from the stream + ProtoMessage::PacketHeader header_msg; + if (!trace.read(header_msg)) { + panic("Failed to read packet header from %s\n", filename); + + if (header_msg.tick_freq() != SimClock::Frequency) { + panic("Trace %s was recorded with a different tick frequency %d\n", + header_msg.tick_freq()); + } + } +} + +void +TraceCPU::FixedRetryGen::InputStream::reset() +{ + trace.reset(); +} + +bool +TraceCPU::FixedRetryGen::InputStream::read(TraceElement* element) +{ + ProtoMessage::Packet pkt_msg; + if (trace.read(pkt_msg)) { + element->cmd = pkt_msg.cmd(); + element->addr = pkt_msg.addr(); + element->blocksize = pkt_msg.size(); + element->tick = pkt_msg.tick(); + element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0; + element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0; + return true; + } + + // We have reached the end of the file + return false; +} |