diff options
Diffstat (limited to 'src/mem')
59 files changed, 1635 insertions, 3324 deletions
diff --git a/src/mem/PhysicalMemory.py b/src/mem/PhysicalMemory.py index 99bd27f2b..95cc73daa 100644 --- a/src/mem/PhysicalMemory.py +++ b/src/mem/PhysicalMemory.py @@ -1,4 +1,4 @@ -# Copyright (c) 2005-2007 The Regents of The University of Michigan +# Copyright (c) 2005-2008 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -35,8 +35,10 @@ class PhysicalMemory(MemObject): port = VectorPort("the access port") range = Param.AddrRange(AddrRange('128MB'), "Device Address") file = Param.String('', "memory mapped file") - latency = Param.Latency('1t', "latency of an access") + latency = Param.Latency('30ns', "latency of an access") + latency_var = Param.Latency('0ns', "access variablity") zero = Param.Bool(False, "zero initialize memory") + null = Param.Bool(False, "do not store data, always return zero") class DRAMMemory(PhysicalMemory): type = 'DRAMMemory' diff --git a/src/mem/SConscript b/src/mem/SConscript index b572f703c..0b0017f81 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -43,6 +43,7 @@ Source('packet.cc') Source('physical.cc') Source('port.cc') Source('tport.cc') +Source('mport.cc') if env['FULL_SYSTEM']: Source('vport.cc') diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 3d3966491..cc9b83d3e 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -47,7 +47,7 @@ Bridge::BridgePort::BridgePort(const std::string &_name, int _delay, int _nack_delay, int _req_limit, int _resp_limit, std::vector<Range<Addr> > filter_ranges) - : Port(_name), bridge(_bridge), otherPort(_otherPort), + : Port(_name, _bridge), bridge(_bridge), otherPort(_otherPort), delay(_delay), nackDelay(_nack_delay), filterRanges(filter_ranges), outstandingResponses(0), queuedRequests(0), inRetry(false), reqQueueLimit(_req_limit), respQueueLimit(_resp_limit), sendEvent(this) @@ -89,7 +89,7 @@ void Bridge::init() { // Make sure that both sides are connected to. - if (portA.getPeer() == NULL || portB.getPeer() == NULL) + if (!portA.isConnected() || !portB.isConnected()) fatal("Both ports of bus bridge are not connected to a bus.\n"); if (portA.peerBlockSize() != portB.peerBlockSize()) @@ -130,7 +130,7 @@ Bridge::BridgePort::recvTiming(PacketPtr pkt) return true; } - if (pkt->needsResponse()) + if (pkt->needsResponse()) { if (respQueueFull()) { DPRINTF(BusBridge, "Local queue full, no space for response, nacking\n"); DPRINTF(BusBridge, "queue size: %d outreq: %d outstanding resp: %d\n", @@ -141,6 +141,7 @@ Bridge::BridgePort::recvTiming(PacketPtr pkt) DPRINTF(BusBridge, "Request Needs response, reserving space\n"); ++outstandingResponses; } + } otherPort->queueForSendTiming(pkt); @@ -161,7 +162,7 @@ Bridge::BridgePort::nackRequest(PacketPtr pkt) // nothing on the list, add it and we're done if (sendQueue.empty()) { assert(!sendEvent.scheduled()); - sendEvent.schedule(readyTime); + schedule(sendEvent, readyTime); sendQueue.push_back(buf); return; } @@ -183,7 +184,7 @@ Bridge::BridgePort::nackRequest(PacketPtr pkt) while (i != end && !done) { if (readyTime < (*i)->ready) { if (i == begin) - sendEvent.reschedule(readyTime); + reschedule(sendEvent, readyTime); sendQueue.insert(i,buf); done = true; } @@ -226,7 +227,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt) // should already be an event scheduled for sending the head // packet. if (sendQueue.empty()) { - sendEvent.schedule(readyTime); + schedule(sendEvent, readyTime); } sendQueue.push_back(buf); } @@ -280,7 +281,7 @@ Bridge::BridgePort::trySend() if (!sendQueue.empty()) { buf = sendQueue.front(); DPRINTF(BusBridge, "Scheduling next send\n"); - sendEvent.schedule(std::max(buf->ready, curTick + 1)); + schedule(sendEvent, std::max(buf->ready, curTick + 1)); } } else { DPRINTF(BusBridge, " unsuccessful\n"); @@ -301,7 +302,7 @@ Bridge::BridgePort::recvRetry() if (nextReady <= curTick) trySend(); else - sendEvent.schedule(nextReady); + schedule(sendEvent, nextReady); } /** Function called by the port when the bus is receiving a Atomic diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index 1331a45f9..40f033811 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -42,6 +42,7 @@ #include <inttypes.h> #include <queue> +#include "base/fast_alloc.hh" #include "mem/mem_object.hh" #include "mem/packet.hh" #include "mem/port.hh" @@ -73,7 +74,7 @@ class Bridge : public MemObject /** Pass ranges from one side of the bridge to the other? */ std::vector<Range<Addr> > filterRanges; - class PacketBuffer : public Packet::SenderState { + class PacketBuffer : public Packet::SenderState, public FastAlloc { public: Tick ready; @@ -145,11 +146,8 @@ class Bridge : public MemObject BridgePort *port; public: - SendEvent(BridgePort *p) - : Event(&mainEventQueue), port(p) {} - + SendEvent(BridgePort *p) : port(p) {} virtual void process() { port->trySend(); } - virtual const char *description() const { return "bridge send"; } }; diff --git a/src/mem/bus.cc b/src/mem/bus.cc index ff4512aca..2eb823051 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -97,34 +97,39 @@ Bus::init() intIter->second->sendStatusChange(Port::RangeChange); } -Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) : Event(&mainEventQueue), bus(_bus) +Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) + : bus(_bus) {} -void Bus::BusFreeEvent::process() +void +Bus::BusFreeEvent::process() { bus->recvRetry(-1); } -const char * Bus::BusFreeEvent::description() const +const char * +Bus::BusFreeEvent::description() const { return "bus became available"; } -void Bus::preparePacket(PacketPtr pkt, Tick & headerTime) +Tick +Bus::calcPacketTiming(PacketPtr pkt) { - //Bring tickNextIdle up to the present tick - //There is some potential ambiguity where a cycle starts, which might make - //a difference when devices are acting right around a cycle boundary. Using - //a < allows things which happen exactly on a cycle boundary to take up - //only the following cycle. Anything that happens later will have to "wait" - //for the end of that cycle, and then start using the bus after that. + // Bring tickNextIdle up to the present tick. + // There is some potential ambiguity where a cycle starts, which + // might make a difference when devices are acting right around a + // cycle boundary. Using a < allows things which happen exactly on + // a cycle boundary to take up only the following cycle. Anything + // that happens later will have to "wait" for the end of that + // cycle, and then start using the bus after that. if (tickNextIdle < curTick) { tickNextIdle = curTick; if (tickNextIdle % clock != 0) tickNextIdle = curTick - (curTick % clock) + clock; } - headerTime = tickNextIdle + headerCycles * clock; + Tick headerTime = tickNextIdle + headerCycles * clock; // The packet will be sent. Figure out how long it occupies the bus, and // how much of that time is for the first "word", aka bus width. @@ -142,17 +147,20 @@ void Bus::preparePacket(PacketPtr pkt, Tick & headerTime) pkt->firstWordTime = headerTime + clock; pkt->finishTime = headerTime + numCycles * clock; + + return headerTime; } void Bus::occupyBus(Tick until) { + if (until == 0) { + // shortcut for express snoop packets + return; + } + tickNextIdle = until; + reschedule(busIdle, tickNextIdle, true); - if (!busIdle.scheduled()) { - busIdle.schedule(tickNextIdle); - } else { - busIdle.reschedule(tickNextIdle); - } DPRINTF(Bus, "The bus is now occupied from tick %d to %d\n", curTick, tickNextIdle); } @@ -190,11 +198,8 @@ Bus::recvTiming(PacketPtr pkt) DPRINTF(Bus, "recvTiming: src %d dst %d %s 0x%x\n", src, pkt->getDest(), pkt->cmdString(), pkt->getAddr()); - Tick headerTime = 0; - - if (!pkt->isExpressSnoop()) { - preparePacket(pkt, headerTime); - } + Tick headerFinishTime = pkt->isExpressSnoop() ? 0 : calcPacketTiming(pkt); + Tick packetFinishTime = pkt->isExpressSnoop() ? 0 : pkt->finishTime; short dest = pkt->getDest(); int dest_port_id; @@ -243,17 +248,16 @@ Bus::recvTiming(PacketPtr pkt) DPRINTF(Bus, "recvTiming: src %d dst %d %s 0x%x TGT RETRY\n", src, pkt->getDest(), pkt->cmdString(), pkt->getAddr()); addToRetryList(src_port); - if (!pkt->isExpressSnoop()) { - occupyBus(headerTime); - } + occupyBus(headerFinishTime); return false; } - // send OK, fall through + // send OK, fall through... pkt may have been deleted by + // target at this point, so it should *not* be referenced + // again. We'll set it to NULL here just to be safe. + pkt = NULL; } - if (!pkt->isExpressSnoop()) { - occupyBus(pkt->finishTime); - } + occupyBus(packetFinishTime); // Packet was successfully sent. // Also take care of retries @@ -289,7 +293,7 @@ Bus::recvRetry(int id) //Burn a cycle for the missed grant. tickNextIdle += clock; - busIdle.reschedule(tickNextIdle, true); + reschedule(busIdle, tickNextIdle, true); } } //If we weren't able to drain before, we might be able to now. @@ -327,10 +331,10 @@ Bus::findPort(Addr addr) if (responderSet) { panic("Unable to find destination for addr (user set default " - "responder): %#llx", addr); + "responder): %#llx\n", addr); } else { DPRINTF(Bus, "Unable to find destination for addr: %#llx, will use " - "default port", addr); + "default port\n", addr); return defaultId; } @@ -519,9 +523,12 @@ Bus::recvStatusChange(Port::Status status, int id) for (iter = ranges.begin(); iter != ranges.end(); iter++) { DPRINTF(BusAddrRanges, "Adding range %#llx - %#llx for id %d\n", iter->start, iter->end, id); - if (portMap.insert(*iter, id) == portMap.end()) - panic("Two devices with same range\n"); - + if (portMap.insert(*iter, id) == portMap.end()) { + int conflict_id = portMap.find(*iter)->second; + fatal("%s has two ports with same range:\n\t%s\n\t%s\n", + name(), interfaces[id]->getPeer()->name(), + interfaces[conflict_id]->getPeer()->name()); + } } } DPRINTF(MMU, "port list has %d entries\n", portMap.size()); diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 274c02de4..74901d626 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -245,10 +245,12 @@ class Bus : public MemObject */ void addressRanges(AddrRangeList &resp, bool &snoop, int id); - /** Prepare a packet to be sent on the bus. The header finishes at tick - * headerTime + /** Calculate the timing parameters for the packet. Updates the + * firstWordTime and finishTime fields of the packet object. + * Returns the tick at which the packet header is completed (which + * will be all that is sent if the target rejects the packet). */ - void preparePacket(PacketPtr pkt, Tick & headerTime); + Tick calcPacketTiming(PacketPtr pkt); /** Occupy the bus until until */ void occupyBus(Tick until); diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index f6d42b1ef..bef1b45d2 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -38,8 +38,6 @@ class BaseCache(MemObject): block_size = Param.Int("block size in bytes") latency = Param.Latency("Latency") hash_delay = Param.Int(1, "time in cycles of hash access") - lifo = Param.Bool(False, - "whether this NIC partition should use LIFO repl. policy") max_miss_count = Param.Counter(0, "number of misses to handle before calling exit") mshrs = Param.Int("number of MSHRs (max outstanding requests)") @@ -47,9 +45,6 @@ class BaseCache(MemObject): "always service demand misses first") repl = Param.Repl(NULL, "replacement policy") size = Param.MemorySize("capacity in bytes") - split = Param.Bool(False, "whether or not this cache is split") - split_size = Param.Int(0, - "How many ways of the cache belong to CPU/LRU partition") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") @@ -57,12 +52,10 @@ class BaseCache(MemObject): two_queue = Param.Bool(False, "whether the lifo should have two queue replacement") write_buffers = Param.Int(8, "number of write buffers") - prefetch_miss = Param.Bool(False, - "wheter you are using the hardware prefetcher from Miss stream") - prefetch_access = Param.Bool(False, - "wheter you are using the hardware prefetcher from Access stream") + prefetch_on_access = Param.Bool(False, + "notify the hardware prefetcher on every access (not just misses)") prefetcher_size = Param.Int(100, - "Number of entries in the harware prefetch queue") + "Number of entries in the hardware prefetch queue") prefetch_past_page = Param.Bool(False, "Allow prefetches to cross virtual page boundaries") prefetch_serial_squash = Param.Bool(False, @@ -74,9 +67,9 @@ class BaseCache(MemObject): prefetch_policy = Param.Prefetch('none', "Type of prefetcher to use") prefetch_cache_check_push = Param.Bool(True, - "Check if in cash on push or pop of prefetch queue") + "Check if in cache on push or pop of prefetch queue") prefetch_use_cpu_id = Param.Bool(True, - "Use the CPU ID to seperate calculations of prefetches") + "Use the CPU ID to separate calculations of prefetches") prefetch_data_accesses_only = Param.Bool(False, "Only prefetch on data not on instruction accesses") cpu_side = Port("Port on side closer to CPU") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index ac0d54bf6..956375530 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -122,7 +122,7 @@ BaseCache::CachePort::clearBlocked() mustSendRetry = false; SendRetryEvent *ev = new SendRetryEvent(this, true); // @TODO: need to find a better time (next bus cycle?) - ev->schedule(curTick + 1); + schedule(ev, curTick + 1); } } diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index d97021024..4319717e5 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -223,14 +223,14 @@ class BaseCache : public MemObject */ /** Number of hits per thread for each type of command. @sa Packet::Command */ - Stats::Vector<> hits[MemCmd::NUM_MEM_CMDS]; + Stats::Vector hits[MemCmd::NUM_MEM_CMDS]; /** Number of hits for demand accesses. */ Stats::Formula demandHits; /** Number of hit for all accesses. */ Stats::Formula overallHits; /** Number of misses per thread for each type of command. @sa Packet::Command */ - Stats::Vector<> misses[MemCmd::NUM_MEM_CMDS]; + Stats::Vector misses[MemCmd::NUM_MEM_CMDS]; /** Number of misses for demand accesses. */ Stats::Formula demandMisses; /** Number of misses for all accesses. */ @@ -240,7 +240,7 @@ class BaseCache : public MemObject * Total number of cycles per thread/command spent waiting for a miss. * Used to calculate the average miss latency. */ - Stats::Vector<> missLatency[MemCmd::NUM_MEM_CMDS]; + Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS]; /** Total number of cycles spent waiting for demand misses. */ Stats::Formula demandMissLatency; /** Total number of cycles spent waiting for all misses. */ @@ -268,50 +268,50 @@ class BaseCache : public MemObject Stats::Formula overallAvgMissLatency; /** The total number of cycles blocked for each blocked cause. */ - Stats::Vector<> blocked_cycles; + Stats::Vector blocked_cycles; /** The number of times this cache blocked for each blocked cause. */ - Stats::Vector<> blocked_causes; + Stats::Vector blocked_causes; /** The average number of cycles blocked for each blocked cause. */ Stats::Formula avg_blocked; /** The number of fast writes (WH64) performed. */ - Stats::Scalar<> fastWrites; + Stats::Scalar fastWrites; /** The number of cache copies performed. */ - Stats::Scalar<> cacheCopies; + Stats::Scalar cacheCopies; /** Number of blocks written back per thread. */ - Stats::Vector<> writebacks; + Stats::Vector writebacks; /** Number of misses that hit in the MSHRs per command and thread. */ - Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS]; + Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS]; /** Demand misses that hit in the MSHRs. */ Stats::Formula demandMshrHits; /** Total number of misses that hit in the MSHRs. */ Stats::Formula overallMshrHits; /** Number of misses that miss in the MSHRs, per command and thread. */ - Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS]; + Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS]; /** Demand misses that miss in the MSHRs. */ Stats::Formula demandMshrMisses; /** Total number of misses that miss in the MSHRs. */ Stats::Formula overallMshrMisses; /** Number of misses that miss in the MSHRs, per command and thread. */ - Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS]; + Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS]; /** Total number of misses that miss in the MSHRs. */ Stats::Formula overallMshrUncacheable; /** Total cycle latency of each MSHR miss, per command and thread. */ - Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS]; + Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS]; /** Total cycle latency of demand MSHR misses. */ Stats::Formula demandMshrMissLatency; /** Total cycle latency of overall MSHR misses. */ Stats::Formula overallMshrMissLatency; /** Total cycle latency of each MSHR miss, per command and thread. */ - Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS]; + Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS]; /** Total cycle latency of overall MSHR misses. */ Stats::Formula overallMshrUncacheableLatency; @@ -342,11 +342,11 @@ class BaseCache : public MemObject Stats::Formula overallAvgMshrUncacheableLatency; /** The number of times a thread hit its MSHR cap. */ - Stats::Vector<> mshr_cap_events; + Stats::Vector mshr_cap_events; /** The number of times software prefetches caused the MSHR to block. */ - Stats::Vector<> soft_prefetch_mshr_full; + Stats::Vector soft_prefetch_mshr_full; - Stats::Scalar<> mshr_no_allocate_misses; + Stats::Scalar mshr_no_allocate_misses; /** * @} @@ -445,12 +445,6 @@ class BaseCache : public MemObject } } - Tick nextMSHRReadyTime() - { - return std::min(mshrQueue.nextMSHRReadyTime(), - writeBuffer.nextMSHRReadyTime()); - } - /** * Request the master bus for the given cause and time. * @param cause The reason for the request. @@ -467,10 +461,11 @@ class BaseCache : public MemObject */ void deassertMemSideBusRequest(RequestCause cause) { - // obsolete!! - assert(false); - // memSidePort->deassertBusRequest(cause); - // checkDrain(); + // Obsolete... we no longer signal bus requests explicitly so + // we can't deassert them. Leaving this in as a no-op since + // the prefetcher calls it to indicate that it no longer wants + // to request a prefetch, and someday that might be + // interesting again. } virtual unsigned int drain(Event *de); @@ -481,7 +476,7 @@ class BaseCache : public MemObject void incMissCount(PacketPtr pkt) { - misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; if (missCount) { --missCount; diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh index 127c547ac..fe65672d6 100644 --- a/src/mem/cache/blk.hh +++ b/src/mem/cache/blk.hh @@ -38,8 +38,8 @@ #include <list> #include "base/printable.hh" -#include "sim/core.hh" // for Tick -#include "arch/isa_traits.hh" // for Addr +#include "sim/core.hh" // for Tick +#include "arch/isa_traits.hh" // for Addr #include "mem/packet.hh" #include "mem/request.hh" @@ -48,17 +48,17 @@ */ enum CacheBlkStatusBits { /** valid, readable */ - BlkValid = 0x01, + BlkValid = 0x01, /** write permission */ - BlkWritable = 0x02, + BlkWritable = 0x02, /** read permission (yes, block can be valid but not readable) */ - BlkReadable = 0x04, + BlkReadable = 0x04, /** dirty (modified) */ - BlkDirty = 0x08, + BlkDirty = 0x08, /** block was referenced */ - BlkReferenced = 0x10, + BlkReferenced = 0x10, /** block was a hardware prefetch yet unaccessed*/ - BlkHWPrefetched = 0x20 + BlkHWPrefetched = 0x20 }; /** @@ -108,18 +108,16 @@ class CacheBlk */ class Lock { public: - int cpuNum; // locking CPU - int threadNum; // locking thread ID within CPU + int contextId; // locking context // check for matching execution context bool matchesContext(Request *req) { - return (cpuNum == req->getCpuNum() && - threadNum == req->getThreadNum()); + return (contextId == req->contextId()); } Lock(Request *req) - : cpuNum(req->getCpuNum()), threadNum(req->getThreadNum()) + : contextId(req->contextId()) { } }; @@ -207,7 +205,7 @@ class CacheBlk * be touched. * @return True if the block was a hardware prefetch, unaccesed. */ - bool isPrefetch() const + bool wasPrefetched() const { return (status & BlkHWPrefetched) != 0; } diff --git a/src/mem/cache/builder.cc b/src/mem/cache/builder.cc index db900c64c..599353b88 100644 --- a/src/mem/cache/builder.cc +++ b/src/mem/cache/builder.cc @@ -38,7 +38,6 @@ // Must be included first to determine which caches we want #include "enums/Prefetch.hh" #include "mem/config/cache.hh" -#include "mem/config/prefetch.hh" #include "mem/cache/base.hh" #include "mem/cache/cache.hh" #include "mem/bus.hh" @@ -57,149 +56,78 @@ #include "mem/cache/tags/iic.hh" #endif -#if defined(USE_CACHE_SPLIT) -#include "mem/cache/tags/split.hh" -#endif - -#if defined(USE_CACHE_SPLIT_LIFO) -#include "mem/cache/tags/split_lifo.hh" -#endif - //Prefetcher Headers -#if defined(USE_GHB) #include "mem/cache/prefetch/ghb.hh" -#endif -#if defined(USE_TAGGED) #include "mem/cache/prefetch/tagged.hh" -#endif -#if defined(USE_STRIDED) #include "mem/cache/prefetch/stride.hh" -#endif using namespace std; using namespace TheISA; -#define BUILD_CACHE(TAGS, tags) \ - do { \ - BasePrefetcher *pf; \ - if (prefetch_policy == Enums::tagged) { \ - BUILD_TAGGED_PREFETCHER(TAGS); \ - } \ - else if (prefetch_policy == Enums::stride) { \ - BUILD_STRIDED_PREFETCHER(TAGS); \ - } \ - else if (prefetch_policy == Enums::ghb) { \ - BUILD_GHB_PREFETCHER(TAGS); \ - } \ - else { \ - BUILD_NULL_PREFETCHER(TAGS); \ - } \ - Cache<TAGS> *retval = \ - new Cache<TAGS>(this, tags, pf); \ - return retval; \ +#define BUILD_CACHE(TAGS, tags) \ + do { \ + BasePrefetcher *pf; \ + if (prefetch_policy == Enums::tagged) { \ + pf = new TaggedPrefetcher(this); \ + } \ + else if (prefetch_policy == Enums::stride) { \ + pf = new StridePrefetcher(this); \ + } \ + else if (prefetch_policy == Enums::ghb) { \ + pf = new GHBPrefetcher(this); \ + } \ + else { \ + pf = NULL; \ + } \ + Cache<TAGS> *retval = \ + new Cache<TAGS>(this, tags, pf); \ + return retval; \ } while (0) -#define BUILD_CACHE_PANIC(x) do { \ - panic("%s not compiled into M5", x); \ +#define BUILD_CACHE_PANIC(x) do { \ + panic("%s not compiled into M5", x); \ } while (0) #if defined(USE_CACHE_FALRU) -#define BUILD_FALRU_CACHE do { \ +#define BUILD_FALRU_CACHE do { \ FALRU *tags = new FALRU(block_size, size, latency); \ - BUILD_CACHE(FALRU, tags); \ + BUILD_CACHE(FALRU, tags); \ } while (0) #else #define BUILD_FALRU_CACHE BUILD_CACHE_PANIC("falru cache") #endif #if defined(USE_CACHE_LRU) -#define BUILD_LRU_CACHE do { \ - LRU *tags = new LRU(numSets, block_size, assoc, latency); \ - BUILD_CACHE(LRU, tags); \ +#define BUILD_LRU_CACHE do { \ + LRU *tags = new LRU(numSets, block_size, assoc, latency); \ + BUILD_CACHE(LRU, tags); \ } while (0) #else #define BUILD_LRU_CACHE BUILD_CACHE_PANIC("lru cache") #endif -#if defined(USE_CACHE_SPLIT) -#define BUILD_SPLIT_CACHE do { \ - Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \ - two_queue, latency); \ - BUILD_CACHE(Split, tags); \ - } while (0) -#else -#define BUILD_SPLIT_CACHE BUILD_CACHE_PANIC("split cache") -#endif - -#if defined(USE_CACHE_SPLIT_LIFO) -#define BUILD_SPLIT_LIFO_CACHE do { \ - SplitLIFO *tags = new SplitLIFO(block_size, size, assoc, \ - latency, two_queue, -1); \ - BUILD_CACHE(SplitLIFO, tags); \ - } while (0) -#else -#define BUILD_SPLIT_LIFO_CACHE BUILD_CACHE_PANIC("lifo cache") -#endif - #if defined(USE_CACHE_IIC) -#define BUILD_IIC_CACHE do { \ - IIC *tags = new IIC(iic_params); \ - BUILD_CACHE(IIC, tags); \ +#define BUILD_IIC_CACHE do { \ + IIC *tags = new IIC(iic_params); \ + BUILD_CACHE(IIC, tags); \ } while (0) #else #define BUILD_IIC_CACHE BUILD_CACHE_PANIC("iic") #endif -#define BUILD_CACHES do { \ - if (repl == NULL) { \ - if (numSets == 1) { \ - BUILD_FALRU_CACHE; \ - } else { \ - if (split == true) { \ - BUILD_SPLIT_CACHE; \ - } else if (lifo == true) { \ - BUILD_SPLIT_LIFO_CACHE; \ - } else { \ - BUILD_LRU_CACHE; \ - } \ - } \ - } else { \ - BUILD_IIC_CACHE; \ - } \ - } while (0) - -#define BUILD_COHERENCE(b) do { \ +#define BUILD_CACHES do { \ + if (repl == NULL) { \ + if (numSets == 1) { \ + BUILD_FALRU_CACHE; \ + } else { \ + BUILD_LRU_CACHE; \ + } \ + } else { \ + BUILD_IIC_CACHE; \ + } \ } while (0) -#if defined(USE_TAGGED) -#define BUILD_TAGGED_PREFETCHER(t) \ - pf = new TaggedPrefetcher(this) -#else -#define BUILD_TAGGED_PREFETCHER(t) BUILD_CACHE_PANIC("Tagged Prefetcher") -#endif - -#if defined(USE_STRIDED) -#define BUILD_STRIDED_PREFETCHER(t) \ - pf = new StridePrefetcher(this) -#else -#define BUILD_STRIDED_PREFETCHER(t) BUILD_CACHE_PANIC("Stride Prefetcher") -#endif - -#if defined(USE_GHB) -#define BUILD_GHB_PREFETCHER(t) \ - pf = new GHBPrefetcher(this) -#else -#define BUILD_GHB_PREFETCHER(t) BUILD_CACHE_PANIC("GHB Prefetcher") -#endif - -#if defined(USE_TAGGED) -#define BUILD_NULL_PREFETCHER(t) \ - pf = new TaggedPrefetcher(this) -#else -#define BUILD_NULL_PREFETCHER(t) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)") -#endif - BaseCache * BaseCacheParams::create() { @@ -208,24 +136,6 @@ BaseCacheParams::create() subblock_size = block_size; } - //Warnings about prefetcher policy - if (prefetch_policy == Enums::none) { - if (prefetch_miss || prefetch_access) - panic("With no prefetcher, you shouldn't prefetch from" - " either miss or access stream\n"); - } - - if (prefetch_policy == Enums::tagged || prefetch_policy == Enums::stride || - prefetch_policy == Enums::ghb) { - - if (!prefetch_miss && !prefetch_access) - warn("With this prefetcher you should chose a prefetch" - " stream (miss or access)\nNo Prefetching will occur\n"); - - if (prefetch_miss && prefetch_access) - panic("Can't do prefetches from both miss and access stream"); - } - #if defined(USE_CACHE_IIC) // Build IIC params IIC::Params iic_params; diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index c640d4a60..d403535fc 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -50,14 +50,6 @@ #include "mem/cache/tags/iic.hh" #endif -#if defined(USE_CACHE_SPLIT) -#include "mem/cache/tags/split.hh" -#endif - -#if defined(USE_CACHE_SPLIT_LIFO) -#include "mem/cache/tags/split_lifo.hh" -#endif - #include "mem/cache/cache_impl.hh" // Template Instantiations @@ -76,12 +68,4 @@ template class Cache<IIC>; template class Cache<LRU>; #endif -#if defined(USE_CACHE_SPLIT) -template class Cache<Split>; -#endif - -#if defined(USE_CACHE_SPLIT_LIFO) -template class Cache<SplitLIFO>; -#endif - #endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index f5f65d4dd..4570b067b 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -64,8 +64,6 @@ class Cache : public BaseCache /** A typedef for a list of BlkType pointers. */ typedef typename TagStore::BlkList BlkList; - bool prefetchAccess; - protected: class CpuSidePort : public CachePort @@ -137,21 +135,14 @@ class Cache : public BaseCache BlkType *tempBlock; /** - * Can this cache should allocate a block on a line-sized write miss. + * This cache should allocate a block on a line-sized write miss. */ const bool doFastWrites; - const bool prefetchMiss; - /** - * Handle a replacement for the given request. - * @param blk A pointer to the block, usually NULL - * @param pkt The memory request to satisfy. - * @param new_state The new state of the block. - * @param writebacks A list to store any generated writebacks. + * Notify the prefetcher on every access, not just misses. */ - BlkType* doReplacement(BlkType *blk, PacketPtr pkt, - CacheBlk::State new_state, PacketList &writebacks); + const bool prefetchOnAccess; /** * Does all the processing necessary to perform the provided request. @@ -159,10 +150,10 @@ class Cache : public BaseCache * @param lat The latency of the access. * @param writebacks List for any writebacks that need to be performed. * @param update True if the replacement data should be updated. - * @return Pointer to the cache block touched by the request. NULL if it - * was a miss. + * @return Boolean indicating whether the request was satisfied. */ - bool access(PacketPtr pkt, BlkType *&blk, int &lat); + bool access(PacketPtr pkt, BlkType *&blk, + int &lat, PacketList &writebacks); /** *Handle doing the Compare and Swap function for SPARC. @@ -181,7 +172,6 @@ class Cache : public BaseCache * Populates a cache block and handles all outstanding requests for the * satisfied fill request. This version takes two memory requests. One * contains the fill data, the other is an optional target to satisfy. - * Used for Cache::probe. * @param pkt The memory request with the fill data. * @param blk The cache block if it already exists. * @param writebacks List for any writebacks that need to be performed. @@ -331,6 +321,11 @@ class Cache : public BaseCache bool inMissQueue(Addr addr) { return (mshrQueue.findMatch(addr) != 0); } + + /** + * Find next request ready time from among possible sources. + */ + Tick nextMSHRReadyTime(); }; #endif // __CACHE_HH__ diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index e546e2a9a..a78fd3637 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -38,6 +38,7 @@ */ #include "sim/host.hh" +#include "base/fast_alloc.hh" #include "base/misc.hh" #include "base/range_ops.hh" @@ -52,11 +53,10 @@ template<class TagStore> Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf) : BaseCache(p), - prefetchAccess(p->prefetch_access), tags(tags), prefetcher(pf), doFastWrites(true), - prefetchMiss(p->prefetch_miss) + prefetchOnAccess(p->prefetch_on_access) { tempBlock = new BlkType(); tempBlock->data = new uint8_t[blkSize]; @@ -71,7 +71,8 @@ Cache<TagStore>::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf) memSidePort->setOtherPort(cpuSidePort); tags->setCache(this); - prefetcher->setCache(this); + if (prefetcher) + prefetcher->setCache(this); } template<class TagStore> @@ -80,7 +81,8 @@ Cache<TagStore>::regStats() { BaseCache::regStats(); tags->regStats(name()); - prefetcher->regStats(name()); + if (prefetcher) + prefetcher->regStats(name()); } template<class TagStore> @@ -147,8 +149,10 @@ Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt) panic("Invalid size for conditional read/write\n"); } - if (overwrite_mem) + if (overwrite_mem) { std::memcpy(blk_data, &overwrite_val, pkt->getSize()); + blk->status |= BlkDirty; + } } @@ -259,7 +263,8 @@ Cache<TagStore>::squash(int threadNum) template<class TagStore> bool -Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat) +Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, + int &lat, PacketList &writebacks) { if (pkt->req->isUncacheable()) { blk = NULL; @@ -267,34 +272,16 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat) return false; } - blk = tags->findBlock(pkt->getAddr(), lat); - - if (prefetchAccess) { - //We are determining prefetches on access stream, call prefetcher - prefetcher->handleMiss(pkt, curTick); - } + blk = tags->accessBlock(pkt->getAddr(), lat); DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), (blk) ? "hit" : "miss"); if (blk != NULL) { - if (blk->isPrefetch()) { - //Signal that this was a hit under prefetch (no need for - //use prefetch (only can get here if true) - DPRINTF(HWPrefetch, "Hit a block that was prefetched\n"); - blk->status &= ~BlkHWPrefetched; - if (prefetchMiss) { - //If we are using the miss stream, signal the - //prefetcher otherwise the access stream would have - //already signaled this hit - prefetcher->handleMiss(pkt, curTick); - } - } - if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) { // OK to satisfy access - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; satisfyCpuSideRequest(pkt, blk); return true; } @@ -307,7 +294,6 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat) // into the cache without having a writeable copy (or any copy at // all). if (pkt->cmd == MemCmd::Writeback) { - PacketList writebacks; assert(blkSize == pkt->getSize()); if (blk == NULL) { // need to do a replacement @@ -318,19 +304,14 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat) incMissCount(pkt); return false; } + tags->insertBlock(pkt->getAddr(), blk); blk->status = BlkValid | BlkReadable; } std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize); blk->status |= BlkDirty; - // copy writebacks from replacement to write buffer - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - allocateWriteBuffer(wbPkt, curTick + hitLatency, true); - writebacks.pop_front(); - } // nothing else to do; writeback doesn't expect response assert(!pkt->needsResponse()); - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; return true; } @@ -346,7 +327,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat) } -class ForwardResponseRecord : public Packet::SenderState +class ForwardResponseRecord : public Packet::SenderState, public FastAlloc { Packet::SenderState *prevSenderState; int prevSrc; @@ -407,6 +388,9 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) memSidePort->sendTiming(snoopPkt); // main memory will delete snoopPkt } + // since we're the official target but we aren't responding, + // delete the packet now. + delete pkt; return true; } @@ -423,13 +407,13 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) int lat = hitLatency; BlkType *blk = NULL; - bool satisfied = access(pkt, blk, lat); + PacketList writebacks; + + bool satisfied = access(pkt, blk, lat, writebacks); #if 0 /** @todo make the fast write alloc (wh64) work with coherence. */ - PacketList writebacks; - // If this is a block size write/hint (WH64) allocate the block here // if the coherence protocol allows it. if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && @@ -447,15 +431,11 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) ++fastWrites; } } - - // copy writebacks to write buffer - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - allocateWriteBuffer(wbPkt, time, true); - writebacks.pop_front(); - } #endif + // track time of availability of next prefetch, if any + Tick next_pf_time = 0; + bool needsResponse = pkt->needsResponse(); if (satisfied) { @@ -465,10 +445,14 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) } else { delete pkt; } + + if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) { + if (blk) + blk->status &= ~BlkHWPrefetched; + next_pf_time = prefetcher->notify(pkt, time); + } } else { // miss - if (prefetchMiss) - prefetcher->handleMiss(pkt, time); Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); MSHR *mshr = mshrQueue.findMatch(blk_addr); @@ -476,8 +460,8 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) if (mshr) { // MSHR hit //@todo remove hw_pf here - mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { + mshr_hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + if (mshr->threadNum != 0/*pkt->req->threadId()*/) { mshr->threadNum = -1; } mshr->allocateTarget(pkt, time, order++); @@ -491,7 +475,7 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) } } else { // no MSHR - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; // always mark as cache fill for now... if we implement // no-write-allocate or bypass accesses this will have to // be changed. @@ -520,9 +504,23 @@ Cache<TagStore>::timingAccess(PacketPtr pkt) allocateMissBuffer(pkt, time, true); } + + if (prefetcher) { + next_pf_time = prefetcher->notify(pkt, time); + } } } + if (next_pf_time != 0) + requestMemSideBus(Request_PF, std::max(time, next_pf_time)); + + // copy writebacks to write buffer + while (!writebacks.empty()) { + PacketPtr wbPkt = writebacks.front(); + allocateWriteBuffer(wbPkt, time, true); + writebacks.pop_front(); + } + return true; } @@ -610,53 +608,79 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt) // access in timing mode BlkType *blk = NULL; + PacketList writebacks; - if (!access(pkt, blk, lat)) { + if (!access(pkt, blk, lat, writebacks)) { // MISS - PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive()); + PacketPtr bus_pkt = getBusPacket(pkt, blk, pkt->needsExclusive()); - bool isCacheFill = (busPkt != NULL); + bool is_forward = (bus_pkt == NULL); - if (busPkt == NULL) { + if (is_forward) { // just forwarding the same request to the next level // no local cache operation involved - busPkt = pkt; + bus_pkt = pkt; } DPRINTF(Cache, "Sending an atomic %s for %x\n", - busPkt->cmdString(), busPkt->getAddr()); + bus_pkt->cmdString(), bus_pkt->getAddr()); #if TRACING_ON CacheBlk::State old_state = blk ? blk->status : 0; #endif - lat += memSidePort->sendAtomic(busPkt); + lat += memSidePort->sendAtomic(bus_pkt); DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", - busPkt->cmdString(), busPkt->getAddr(), old_state); - - bool is_error = busPkt->isError(); - assert(!busPkt->wasNacked()); - - if (is_error && pkt->needsResponse()) { - pkt->makeAtomicResponse(); - pkt->copyError(busPkt); - } else if (isCacheFill && !is_error) { - PacketList writebacks; - blk = handleFill(busPkt, blk, writebacks); - satisfyCpuSideRequest(pkt, blk); - delete busPkt; - - // Handle writebacks if needed - while (!writebacks.empty()){ - PacketPtr wbPkt = writebacks.front(); - memSidePort->sendAtomic(wbPkt); - writebacks.pop_front(); - delete wbPkt; + bus_pkt->cmdString(), bus_pkt->getAddr(), old_state); + + assert(!bus_pkt->wasNacked()); + + // If packet was a forward, the response (if any) is already + // in place in the bus_pkt == pkt structure, so we don't need + // to do anything. Otherwise, use the separate bus_pkt to + // generate response to pkt and then delete it. + if (!is_forward) { + if (pkt->needsResponse()) { + assert(bus_pkt->isResponse()); + if (bus_pkt->isError()) { + pkt->makeAtomicResponse(); + pkt->copyError(bus_pkt); + } else if (bus_pkt->isRead() || + bus_pkt->cmd == MemCmd::UpgradeResp) { + // we're updating cache state to allow us to + // satisfy the upstream request from the cache + blk = handleFill(bus_pkt, blk, writebacks); + satisfyCpuSideRequest(pkt, blk); + } else { + // we're satisfying the upstream request without + // modifying cache state, e.g., a write-through + pkt->makeAtomicResponse(); + } } + delete bus_pkt; } } + // Note that we don't invoke the prefetcher at all in atomic mode. + // It's not clear how to do it properly, particularly for + // prefetchers that aggressively generate prefetch candidates and + // rely on bandwidth contention to throttle them; these will tend + // to pollute the cache in atomic mode since there is no bandwidth + // contention. If we ever do want to enable prefetching in atomic + // mode, though, this is the place to do it... see timingAccess() + // for an example (though we'd want to issue the prefetch(es) + // immediately rather than calling requestMemSideBus() as we do + // there). + + // Handle writebacks if needed + while (!writebacks.empty()){ + PacketPtr wbPkt = writebacks.front(); + memSidePort->sendAtomic(wbPkt); + writebacks.pop_front(); + delete wbPkt; + } + // We now have the block one way or another (hit or completed miss) if (pkt->needsResponse()) { @@ -742,14 +766,17 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) PacketList writebacks; if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->threadId()*/] += miss_latency; } else { - mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + mshr_miss_latency[stats_cmd_idx][0/*pkt->req->threadId()*/] += miss_latency; } - if (mshr->isCacheFill && !is_error) { + bool is_fill = !mshr->isForward && + (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp); + + if (is_fill && !is_error) { DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); @@ -770,9 +797,10 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) while (mshr->hasTargets()) { MSHR::Target *target = mshr->getTarget(); - if (target->isCpuSide()) { + switch (target->source) { + case MSHR::Target::FromCPU: Tick completion_time; - if (blk != NULL) { + if (is_fill) { satisfyCpuSideRequest(target->pkt, blk); // How many bytes past the first request is this one int transfer_offset = @@ -786,7 +814,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) (transfer_offset ? pkt->finishTime : pkt->firstWordTime); assert(!target->pkt->req->isUncacheable()); - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->threadId()*/] += completion_time - target->recvTime; } else { // not a cache fill, just forwarding response @@ -808,13 +836,27 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) target->pkt->cmd = MemCmd::ReadRespWithInvalidate; } cpuSidePort->respond(target->pkt, completion_time); - } else { + break; + + case MSHR::Target::FromPrefetcher: + assert(target->pkt->cmd == MemCmd::HardPFReq); + if (blk) + blk->status |= BlkHWPrefetched; + delete target->pkt->req; + delete target->pkt; + break; + + case MSHR::Target::FromSnoop: // I don't believe that a snoop can be in an error state assert(!is_error); // response to snoop request DPRINTF(Cache, "processing deferred snoop...\n"); handleSnoop(target->pkt, blk, true, true, mshr->pendingInvalidate || pkt->isInvalidate()); + break; + + default: + panic("Illegal target->source enum %d\n", target->source); } mshr->popTarget(); @@ -825,6 +867,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) } if (mshr->promoteDeferredTargets()) { + // avoid later read getting stale data while write miss is + // outstanding.. see comment in timingAccess() + blk->status &= ~BlkReadable; MSHRQueue *mq = mshr->queue; mq->markPending(mshr); requestMemSideBus((RequestCause)mq->index, pkt->finishTime); @@ -861,7 +906,7 @@ Cache<TagStore>::writebackBlk(BlkType *blk) { assert(blk && blk->isValid() && blk->isDirty()); - writebacks[0/*pkt->req->getThreadNum()*/]++; + writebacks[0/*pkt->req->threadId()*/]++; Request *writebackReq = new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0); @@ -878,7 +923,7 @@ template<class TagStore> typename Cache<TagStore>::BlkType* Cache<TagStore>::allocateBlock(Addr addr, PacketList &writebacks) { - BlkType *blk = tags->findReplacement(addr, writebacks); + BlkType *blk = tags->findVictim(addr, writebacks); if (blk->isValid()) { Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set); @@ -889,6 +934,7 @@ Cache<TagStore>::allocateBlock(Addr addr, PacketList &writebacks) assert(!blk->isWritable()); assert(repl_mshr->needsExclusive()); // too hard to replace block with transient state + // allocation failed, block not inserted return NULL; } else { DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", @@ -902,8 +948,6 @@ Cache<TagStore>::allocateBlock(Addr addr, PacketList &writebacks) } } - // Set tag for new block. Caller is responsible for setting status. - blk->tag = tags->extractTag(addr); return blk; } @@ -934,7 +978,10 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk, assert(!tempBlock->isValid()); blk = tempBlock; tempBlock->set = tags->extractSet(addr); + tempBlock->tag = tags->extractTag(addr); DPRINTF(Cache, "using temp block for %x\n", addr); + } else { + tags->insertBlock(addr, blk); } } else { // existing block... probably an upgrade @@ -1088,6 +1135,11 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk, pkt->makeAtomicResponse(); pkt->setDataFromBlock(blk->data, blkSize); } + } else if (is_timing && is_deferred) { + // if it's a deferred timing snoop then we've made a copy of + // the packet, and so if we're not using that copy to respond + // then we need to delete it here. + delete pkt; } // Do this last in case it deallocates block data or something @@ -1156,6 +1208,7 @@ Cache<TagStore>::snoopTiming(PacketPtr pkt) if (pkt->isInvalidate()) { // Invalidation trumps our writeback... discard here markInService(mshr); + delete wb_pkt; } // If this was a shared writeback, there may still be @@ -1253,7 +1306,7 @@ Cache<TagStore>::getNextMSHR() if (pkt) { // Update statistic on number of prefetches issued // (hwpf_mshr_misses) - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; // Don't request bus, since we already have it return allocateMissBuffer(pkt, curTick, false); } @@ -1277,7 +1330,7 @@ Cache<TagStore>::getTimingPacket() PacketPtr tgt_pkt = mshr->getTarget()->pkt; PacketPtr pkt = NULL; - if (mshr->isSimpleForward()) { + if (mshr->isForwardNoResponse()) { // no response expected, just forward packet as it is assert(tags->findBlock(mshr->addr) == NULL); pkt = tgt_pkt; @@ -1285,11 +1338,10 @@ Cache<TagStore>::getTimingPacket() BlkType *blk = tags->findBlock(mshr->addr); pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive()); - mshr->isCacheFill = (pkt != NULL); + mshr->isForward = (pkt == NULL); - if (pkt == NULL) { + if (mshr->isForward) { // not a cache block request, but a response is expected - assert(!mshr->isSimpleForward()); // make copy of current packet to forward, keep current // copy for response handling pkt = new Packet(tgt_pkt); @@ -1306,6 +1358,22 @@ Cache<TagStore>::getTimingPacket() } +template<class TagStore> +Tick +Cache<TagStore>::nextMSHRReadyTime() +{ + Tick nextReady = std::min(mshrQueue.nextMSHRReadyTime(), + writeBuffer.nextMSHRReadyTime()); + + if (prefetcher) { + nextReady = std::min(nextReady, + prefetcher->nextPrefetchReadyTime()); + } + + return nextReady; +} + + /////////////// // // CpuSidePort @@ -1463,7 +1531,7 @@ Cache<TagStore>::MemSidePort::sendPacket() waitingOnRetry = !success; if (waitingOnRetry) { DPRINTF(CachePort, "now waiting on a retry\n"); - if (!mshr->isSimpleForward()) { + if (!mshr->isForwardNoResponse()) { delete pkt; } } else { @@ -1481,7 +1549,7 @@ Cache<TagStore>::MemSidePort::sendPacket() // @TODO: need to facotr in prefetch requests here somehow if (nextReady != MaxTick) { DPRINTF(CachePort, "more packets to send @ %d\n", nextReady); - sendEvent->schedule(std::max(nextReady, curTick + 1)); + schedule(sendEvent, std::max(nextReady, curTick + 1)); } else { // no more to send right now: if we're draining, we may be done if (drainEvent) { diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc index 6537f6343..9ec9c090c 100644 --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -64,9 +64,9 @@ MSHR::TargetList::TargetList() inline void MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, - Counter order, bool cpuSide, bool markPending) + Counter order, Target::Source source, bool markPending) { - if (cpuSide) { + if (source != Target::FromSnoop) { if (pkt->needsExclusive()) { needsExclusive = true; } @@ -84,7 +84,7 @@ MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, } } - push_back(Target(pkt, readyTime, order, cpuSide, markPending)); + push_back(Target(pkt, readyTime, order, source, markPending)); } @@ -141,7 +141,14 @@ print(std::ostream &os, int verbosity, const std::string &prefix) const { ConstIterator end_i = end(); for (ConstIterator i = begin(); i != end_i; ++i) { - ccprintf(os, "%s%s: ", prefix, i->isCpuSide() ? "cpu" : "mem"); + const char *s; + switch (i->source) { + case Target::FromCPU: s = "FromCPU"; + case Target::FromSnoop: s = "FromSnoop"; + case Target::FromPrefetcher: s = "FromPrefetcher"; + default: s = ""; + } + ccprintf(os, "%s%s: ", prefix, s); i->pkt->print(os, verbosity, ""); } } @@ -156,16 +163,18 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, readyTime = whenReady; order = _order; assert(target); - isCacheFill = false; + isForward = false; _isUncacheable = target->req->isUncacheable(); inService = false; downstreamPending = false; threadNum = 0; ntargets = 1; - // Don't know of a case where we would allocate a new MSHR for a - // snoop (mem-side request), so set cpuSide to true here. assert(targets->isReset()); - targets->add(target, whenReady, _order, true, true); + // Don't know of a case where we would allocate a new MSHR for a + // snoop (mem-side request), so set source according to request here + Target::Source source = (target->cmd == MemCmd::HardPFReq) ? + Target::FromPrefetcher : Target::FromCPU; + targets->add(target, whenReady, _order, source, true); assert(deferredTargets->isReset()); pendingInvalidate = false; pendingShared = false; @@ -187,7 +196,7 @@ bool MSHR::markInService() { assert(!inService); - if (isSimpleForward()) { + if (isForwardNoResponse()) { // we just forwarded the request packet & don't expect a // response, so get rid of it assert(getNumTargets() == 1); @@ -230,17 +239,22 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) // comes back (but before this target is processed) // - the outstanding request is for a non-exclusive block and this // target requires an exclusive block + + // assume we'd never issue a prefetch when we've got an + // outstanding miss + assert(pkt->cmd != MemCmd::HardPFReq); + if (inService && (!deferredTargets->empty() || pendingInvalidate || (!targets->needsExclusive && pkt->needsExclusive()))) { // need to put on deferred list - deferredTargets->add(pkt, whenReady, _order, true, true); + deferredTargets->add(pkt, whenReady, _order, Target::FromCPU, true); } else { // No request outstanding, or still OK to append to // outstanding request: append to regular target list. Only // mark pending if current request hasn't been issued yet // (isn't in service). - targets->add(pkt, whenReady, _order, true, !inService); + targets->add(pkt, whenReady, _order, Target::FromCPU, !inService); } ++ntargets; @@ -291,7 +305,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here PacketPtr cp_pkt = new Packet(pkt, true); - targets->add(cp_pkt, curTick, _order, false, + targets->add(cp_pkt, curTick, _order, Target::FromSnoop, downstreamPending && targets->needsExclusive); ++ntargets; @@ -403,7 +417,8 @@ MSHR::print(std::ostream &os, int verbosity, const std::string &prefix) const { ccprintf(os, "%s[%x:%x] %s %s %s state: %s %s %s %s\n", prefix, addr, addr+size-1, - isCacheFill ? "Fill" : "", + isForward ? "Forward" : "", + isForwardNoResponse() ? "ForwNoResp" : "", needsExclusive() ? "Excl" : "", _isUncacheable ? "Unc" : "", inService ? "InSvc" : "", diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh index fdb0485cb..bed7012b0 100644 --- a/src/mem/cache/mshr.hh +++ b/src/mem/cache/mshr.hh @@ -55,20 +55,25 @@ class MSHR : public Packet::SenderState, public Printable class Target { public: + + enum Source { + FromCPU, + FromSnoop, + FromPrefetcher + }; + Tick recvTime; //!< Time when request was received (for stats) Tick readyTime; //!< Time when request is ready to be serviced Counter order; //!< Global order (for memory consistency mgmt) PacketPtr pkt; //!< Pending request packet. - bool cpuSide; //!< Did request come from cpu side or mem side? + Source source; //!< Did request come from cpu, memory, or prefetcher? bool markedPending; //!< Did we mark upstream MSHR //!< as downstreamPending? - bool isCpuSide() const { return cpuSide; } - Target(PacketPtr _pkt, Tick _readyTime, Counter _order, - bool _cpuSide, bool _markedPending) + Source _source, bool _markedPending) : recvTime(curTick), readyTime(_readyTime), order(_order), - pkt(_pkt), cpuSide(_cpuSide), markedPending(_markedPending) + pkt(_pkt), source(_source), markedPending(_markedPending) {} }; @@ -85,7 +90,7 @@ class MSHR : public Packet::SenderState, public Printable void resetFlags() { needsExclusive = hasUpgrade = false; } bool isReset() { return !needsExclusive && !hasUpgrade; } void add(PacketPtr pkt, Tick readyTime, Counter order, - bool cpuSide, bool markPending); + Target::Source source, bool markPending); void replaceUpgrades(); void clearDownstreamPending(); bool checkFunctional(PacketPtr pkt); @@ -118,8 +123,8 @@ class MSHR : public Packet::SenderState, public Printable /** True if the request has been sent to the bus. */ bool inService; - /** True if we will be putting the returned block in the cache */ - bool isCacheFill; + /** True if the request is just a simple forward from an upper level */ + bool isForward; /** True if we need to get an exclusive copy of the block. */ bool needsExclusive() const { return targets->needsExclusive; } @@ -200,7 +205,7 @@ public: * Returns the current number of allocated targets. * @return The current number of allocated targets. */ - int getNumTargets() { return ntargets; } + int getNumTargets() const { return ntargets; } /** * Returns a pointer to the target list. @@ -212,13 +217,17 @@ public: * Returns true if there are targets left. * @return true if there are targets */ - bool hasTargets() { return !targets->empty(); } + bool hasTargets() const { return !targets->empty(); } /** * Returns a reference to the first target. * @return A pointer to the first target. */ - Target *getTarget() { assert(hasTargets()); return &targets->front(); } + Target *getTarget() const + { + assert(hasTargets()); + return &targets->front(); + } /** * Pop first target. @@ -229,12 +238,12 @@ public: targets->pop_front(); } - bool isSimpleForward() + bool isForwardNoResponse() const { if (getNumTargets() != 1) return false; Target *tgt = getTarget(); - return tgt->isCpuSide() && !tgt->pkt->needsResponse(); + return tgt->source == Target::FromCPU && !tgt->pkt->needsResponse(); } bool promoteDeferredTargets(); diff --git a/src/mem/cache/mshr_queue.cc b/src/mem/cache/mshr_queue.cc index 45331c33d..b5c6cc7b8 100644 --- a/src/mem/cache/mshr_queue.cc +++ b/src/mem/cache/mshr_queue.cc @@ -230,7 +230,7 @@ MSHRQueue::squash(int threadNum) if (mshr->threadNum == threadNum) { while (mshr->hasTargets()) { mshr->popTarget(); - assert(0/*target->req->getThreadNum()*/ == threadNum); + assert(0/*target->req->threadId()*/ == threadNum); } assert(!mshr->hasTargets()); assert(mshr->ntargets==0); diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index fcc02ff28..365ce6727 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -33,6 +33,7 @@ * Hardware Prefetcher Definition. */ +#include "arch/isa_traits.hh" #include "base/trace.hh" #include "mem/cache/base.hh" #include "mem/cache/prefetch/base.hh" @@ -43,7 +44,7 @@ BasePrefetcher::BasePrefetcher(const BaseCacheParams *p) : size(p->prefetcher_size), pageStop(!p->prefetch_past_page), serialSquash(p->prefetch_serial_squash), cacheCheckPush(p->prefetch_cache_check_push), - only_data(p->prefetch_data_accesses_only) + onlyData(p->prefetch_data_accesses_only) { } @@ -52,6 +53,7 @@ BasePrefetcher::setCache(BaseCache *_cache) { cache = _cache; blkSize = cache->getBlockSize(); + _name = cache->name() + "-pf"; } void @@ -99,7 +101,8 @@ BasePrefetcher::regStats(const std::string &name) pfSquashed .name(name + ".prefetcher.num_hwpf_squashed_from_miss") - .desc("number of hwpf that got squashed due to a miss aborting calculation time") + .desc("number of hwpf that got squashed due to a miss " + "aborting calculation time") ; } @@ -126,60 +129,79 @@ BasePrefetcher::inMissQueue(Addr addr) PacketPtr BasePrefetcher::getPacket() { - DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name()); + DPRINTF(HWPrefetch, "Requesting a hw_pf to issue\n"); if (pf.empty()) { - DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name()); + DPRINTF(HWPrefetch, "No HW_PF found\n"); return NULL; } PacketPtr pkt; - bool keepTrying = false; + bool keep_trying = false; do { pkt = *pf.begin(); pf.pop_front(); if (!cacheCheckPush) { - keepTrying = cache->inCache(pkt->getAddr()); + keep_trying = cache->inCache(pkt->getAddr()); } + + if (keep_trying) { + DPRINTF(HWPrefetch, "addr 0x%x in cache, skipping\n", + pkt->getAddr()); + delete pkt->req; + delete pkt; + } + if (pf.empty()) { cache->deassertMemSideBusRequest(BaseCache::Request_PF); - if (keepTrying) return NULL; //None left, all were in cache + if (keep_trying) { + return NULL; // None left, all were in cache + } } - } while (keepTrying); + } while (keep_trying); pfIssued++; + assert(pkt != NULL); + DPRINTF(HWPrefetch, "returning 0x%x\n", pkt->getAddr()); return pkt; } -void -BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) + +Tick +BasePrefetcher::notify(PacketPtr &pkt, Tick time) { - if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data)) - { - //Calculate the blk address - Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); + if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && onlyData)) { + // Calculate the blk address + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); - //Check if miss is in pfq, if so remove it - std::list<PacketPtr>::iterator iter = inPrefetch(blkAddr); + // Check if miss is in pfq, if so remove it + std::list<PacketPtr>::iterator iter = inPrefetch(blk_addr); if (iter != pf.end()) { - DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name()); + DPRINTF(HWPrefetch, "Saw a miss to a queued prefetch addr: " + "0x%x, removing it\n", blk_addr); pfRemovedMSHR++; + delete (*iter)->req; + delete (*iter); pf.erase(iter); if (pf.empty()) cache->deassertMemSideBusRequest(BaseCache::Request_PF); } - //Remove anything in queue with delay older than time - //since everything is inserted in time order, start from end - //and work until pf.empty() or time is earlier - //This is done to emulate Aborting the previous work on a new miss - //Needed for serial calculators like GHB + // Remove anything in queue with delay older than time + // since everything is inserted in time order, start from end + // and work until pf.empty() or time is earlier + // This is done to emulate Aborting the previous work on a new miss + // Needed for serial calculators like GHB if (serialSquash) { iter = pf.end(); iter--; while (!pf.empty() && ((*iter)->time >= time)) { pfSquashed++; - pf.pop_back(); + DPRINTF(HWPrefetch, "Squashing old prefetch addr: 0x%x\n", + (*iter)->getAddr()); + delete (*iter)->req; + delete (*iter); + pf.erase(iter); iter--; } if (pf.empty()) @@ -191,74 +213,70 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) std::list<Tick> delays; calculatePrefetch(pkt, addresses, delays); - std::list<Addr>::iterator addr = addresses.begin(); - std::list<Tick>::iterator delay = delays.begin(); - while (addr != addresses.end()) - { - DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name()); - //temp calc this here... + std::list<Addr>::iterator addrIter = addresses.begin(); + std::list<Tick>::iterator delayIter = delays.begin(); + for (; addrIter != addresses.end(); ++addrIter, ++delayIter) { + Addr addr = *addrIter; + pfIdentified++; - //create a prefetch memreq - Request * prefetchReq = new Request(*addr, blkSize, 0); - PacketPtr prefetch; - prefetch = new Packet(prefetchReq, MemCmd::HardPFReq, -1); - prefetch->allocate(); - prefetch->req->setThreadContext(pkt->req->getCpuNum(), - pkt->req->getThreadNum()); - - prefetch->time = time + (*delay); //@todo ADD LATENCY HERE - //... initialize - - //Check if it is already in the cache - if (cacheCheckPush) { - if (cache->inCache(prefetch->getAddr())) { - addr++; - delay++; - continue; - } + + DPRINTF(HWPrefetch, "Found a pf candidate addr: 0x%x, " + "inserting into prefetch queue with delay %d time %d\n", + addr, *delayIter, time); + + // Check if it is already in the cache + if (cacheCheckPush && cache->inCache(addr)) { + DPRINTF(HWPrefetch, "Prefetch addr already in cache\n"); + continue; } - //Check if it is already in the miss_queue - if (cache->inMissQueue(prefetch->getAddr())) { - addr++; - delay++; + // Check if it is already in the miss_queue + if (cache->inMissQueue(addr)) { + DPRINTF(HWPrefetch, "Prefetch addr already in miss queue\n"); continue; } - //Check if it is already in the pf buffer - if (inPrefetch(prefetch->getAddr()) != pf.end()) { + // Check if it is already in the pf buffer + if (inPrefetch(addr) != pf.end()) { pfBufferHit++; - addr++; - delay++; + DPRINTF(HWPrefetch, "Prefetch addr already in pf buffer\n"); continue; } - //We just remove the head if we are full - if (pf.size() == size) - { - DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name()); + // create a prefetch memreq + Request *prefetchReq = new Request(*addrIter, blkSize, 0); + PacketPtr prefetch = + new Packet(prefetchReq, MemCmd::HardPFReq, Packet::Broadcast); + prefetch->allocate(); + prefetch->req->setThreadContext(pkt->req->contextId(), + pkt->req->threadId()); + + prefetch->time = time + (*delayIter); // @todo ADD LATENCY HERE + + // We just remove the head if we are full + if (pf.size() == size) { pfRemovedFull++; + PacketPtr old_pkt = *pf.begin(); + DPRINTF(HWPrefetch, "Prefetch queue full, " + "removing oldest 0x%x\n", old_pkt->getAddr()); + delete old_pkt->req; + delete old_pkt; pf.pop_front(); } pf.push_back(prefetch); - - //Make sure to request the bus, with proper delay - cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time); - - //Increment through the list - addr++; - delay++; } } + + return pf.empty() ? 0 : pf.front()->time; } std::list<PacketPtr>::iterator BasePrefetcher::inPrefetch(Addr address) { - //Guaranteed to only be one match, we always check before inserting + // Guaranteed to only be one match, we always check before inserting std::list<PacketPtr>::iterator iter; - for (iter=pf.begin(); iter != pf.end(); iter++) { + for (iter = pf.begin(); iter != pf.end(); iter++) { if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) { return iter; } @@ -266,4 +284,8 @@ BasePrefetcher::inPrefetch(Addr address) return pf.end(); } - +bool +BasePrefetcher::samePage(Addr a, Addr b) +{ + return roundDown(a, TheISA::VMPageSize) == roundDown(b, TheISA::VMPageSize); +} diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh index 1515d8a93..fc027cb3b 100644 --- a/src/mem/cache/prefetch/base.hh +++ b/src/mem/cache/prefetch/base.hh @@ -73,30 +73,40 @@ class BasePrefetcher bool cacheCheckPush; /** Do we prefetch on only data reads, or on inst reads as well. */ - bool only_data; + bool onlyData; + + std::string _name; public: - Stats::Scalar<> pfIdentified; - Stats::Scalar<> pfMSHRHit; - Stats::Scalar<> pfCacheHit; - Stats::Scalar<> pfBufferHit; - Stats::Scalar<> pfRemovedFull; - Stats::Scalar<> pfRemovedMSHR; - Stats::Scalar<> pfIssued; - Stats::Scalar<> pfSpanPage; - Stats::Scalar<> pfSquashed; + Stats::Scalar pfIdentified; + Stats::Scalar pfMSHRHit; + Stats::Scalar pfCacheHit; + Stats::Scalar pfBufferHit; + Stats::Scalar pfRemovedFull; + Stats::Scalar pfRemovedMSHR; + Stats::Scalar pfIssued; + Stats::Scalar pfSpanPage; + Stats::Scalar pfSquashed; void regStats(const std::string &name); public: + BasePrefetcher(const BaseCacheParams *p); virtual ~BasePrefetcher() {} + const std::string name() const { return _name; } + void setCache(BaseCache *_cache); - void handleMiss(PacketPtr &pkt, Tick time); + /** + * Notify prefetcher of cache access (may be any access or just + * misses, depending on cache parameters.) + * @retval Time of next prefetch availability, or 0 if none. + */ + Tick notify(PacketPtr &pkt, Tick time); bool inCache(Addr addr); @@ -109,11 +119,21 @@ class BasePrefetcher return !pf.empty(); } + Tick nextPrefetchReadyTime() + { + return pf.empty() ? MaxTick : pf.front()->time; + } + virtual void calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses, std::list<Tick> &delays) = 0; std::list<PacketPtr>::iterator inPrefetch(Addr address); + + /** + * Utility function: are addresses a and b on the same VM page? + */ + bool samePage(Addr a, Addr b); }; diff --git a/src/mem/cache/prefetch/ghb.cc b/src/mem/cache/prefetch/ghb.cc index f5b88e1a6..f8f7de1db 100644 --- a/src/mem/cache/prefetch/ghb.cc +++ b/src/mem/cache/prefetch/ghb.cc @@ -34,39 +34,37 @@ * GHB Prefetcher implementation. */ +#include "base/trace.hh" #include "mem/cache/prefetch/ghb.hh" -#include "arch/isa_traits.hh" void GHBPrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses, std::list<Tick> &delays) { - Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); - int cpuID = pkt->req->getCpuNum(); - if (!useCPUId) cpuID = 0; + if (useContextId && !pkt->req->hasContextId()) { + DPRINTF(HWPrefetch, "ignoring request with no context ID"); + return; + } + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); + int ctx_id = useContextId ? pkt->req->contextId() : 0; + assert(ctx_id < Max_Contexts); - int new_stride = blkAddr - last_miss_addr[cpuID]; - int old_stride = last_miss_addr[cpuID] - - second_last_miss_addr[cpuID]; + int new_stride = blk_addr - lastMissAddr[ctx_id]; + int old_stride = lastMissAddr[ctx_id] - secondLastMissAddr[ctx_id]; - second_last_miss_addr[cpuID] = last_miss_addr[cpuID]; - last_miss_addr[cpuID] = blkAddr; + secondLastMissAddr[ctx_id] = lastMissAddr[ctx_id]; + lastMissAddr[ctx_id] = blk_addr; if (new_stride == old_stride) { - for (int d=1; d <= degree; d++) { - Addr newAddr = blkAddr + d * new_stride; - if (this->pageStop && - (blkAddr & ~(TheISA::VMPageSize - 1)) != - (newAddr & ~(TheISA::VMPageSize - 1))) - { - //Spanned the page, so now stop - this->pfSpanPage += degree - d + 1; + for (int d = 1; d <= degree; d++) { + Addr new_addr = blk_addr + d * new_stride; + if (pageStop && !samePage(blk_addr, new_addr)) { + // Spanned the page, so now stop + pfSpanPage += degree - d + 1; return; - } - else - { - addresses.push_back(newAddr); + } else { + addresses.push_back(new_addr); delays.push_back(latency); } } diff --git a/src/mem/cache/prefetch/ghb.hh b/src/mem/cache/prefetch/ghb.hh index 4fb692016..c85221a39 100644 --- a/src/mem/cache/prefetch/ghb.hh +++ b/src/mem/cache/prefetch/ghb.hh @@ -42,18 +42,20 @@ class GHBPrefetcher : public BasePrefetcher { protected: - Addr second_last_miss_addr[64/*MAX_CPUS*/]; - Addr last_miss_addr[64/*MAX_CPUS*/]; + static const int Max_Contexts = 64; + + Addr secondLastMissAddr[Max_Contexts]; + Addr lastMissAddr[Max_Contexts]; Tick latency; int degree; - bool useCPUId; + bool useContextId; public: GHBPrefetcher(const BaseCacheParams *p) : BasePrefetcher(p), latency(p->prefetch_latency), - degree(p->prefetch_degree), useCPUId(p->prefetch_use_cpu_id) + degree(p->prefetch_degree), useContextId(p->prefetch_use_cpu_id) { } diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index b116b66c7..8af4e615e 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -34,59 +34,97 @@ * Stride Prefetcher template instantiations. */ +#include "base/trace.hh" #include "mem/cache/prefetch/stride.hh" void StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses, std::list<Tick> &delays) { -// Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); - int cpuID = pkt->req->getCpuNum(); - if (!useCPUId) cpuID = 0; - - /* Scan Table for IAddr Match */ -/* std::list<strideEntry*>::iterator iter; - for (iter=table[cpuID].begin(); - iter !=table[cpuID].end(); - iter++) { - if ((*iter)->IAddr == pkt->pc) break; - } - - if (iter != table[cpuID].end()) { - //Hit in table - - int newStride = blkAddr - (*iter)->MAddr; - if (newStride == (*iter)->stride) { - (*iter)->confidence++; - } - else { - (*iter)->stride = newStride; - (*iter)->confidence--; - } - - (*iter)->MAddr = blkAddr; - - for (int d=1; d <= degree; d++) { - Addr newAddr = blkAddr + d * newStride; - if (this->pageStop && - (blkAddr & ~(TheISA::VMPageSize - 1)) != - (newAddr & ~(TheISA::VMPageSize - 1))) - { - //Spanned the page, so now stop - this->pfSpanPage += degree - d + 1; - return; - } - else - { - addresses.push_back(newAddr); - delays.push_back(latency); - } - } - } - else { - //Miss in table - //Find lowest confidence and replace - - } -*/ + if (!pkt->req->hasPC()) { + DPRINTF(HWPrefetch, "ignoring request with no PC"); + return; + } + + if (useContextId && !pkt->req->hasContextId()) { + DPRINTF(HWPrefetch, "ignoring request with no context ID"); + return; + } + + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); + int ctx_id = useContextId ? pkt->req->contextId() : 0; + Addr pc = pkt->req->getPC(); + assert(ctx_id < Max_Contexts); + std::list<StrideEntry*> &tab = table[ctx_id]; + + /* Scan Table for instAddr Match */ + std::list<StrideEntry*>::iterator iter; + for (iter = tab.begin(); iter != tab.end(); iter++) { + if ((*iter)->instAddr == pc) + break; + } + + if (iter != tab.end()) { + // Hit in table + + int new_stride = blk_addr - (*iter)->missAddr; + bool stride_match = (new_stride == (*iter)->stride); + + if (stride_match && new_stride != 0) { + if ((*iter)->confidence < Max_Conf) + (*iter)->confidence++; + } else { + (*iter)->stride = new_stride; + if ((*iter)->confidence > Min_Conf) + (*iter)->confidence = 0; + } + + DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x stride %d (%s), conf %d\n", + pc, blk_addr, new_stride, stride_match ? "match" : "change", + (*iter)->confidence); + + (*iter)->missAddr = blk_addr; + + if ((*iter)->confidence <= 0) + return; + + for (int d = 1; d <= degree; d++) { + Addr new_addr = blk_addr + d * new_stride; + if (pageStop && !samePage(blk_addr, new_addr)) { + // Spanned the page, so now stop + pfSpanPage += degree - d + 1; + return; + } else { + DPRINTF(HWPrefetch, " queuing prefetch to %x @ %d\n", + new_addr, latency); + addresses.push_back(new_addr); + delays.push_back(latency); + } + } + } else { + // Miss in table + // Find lowest confidence and replace + + DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x\n", pc, blk_addr); + + if (tab.size() >= 256) { //set default table size is 256 + std::list<StrideEntry*>::iterator min_pos = tab.begin(); + int min_conf = (*min_pos)->confidence; + for (iter = min_pos, ++iter; iter != tab.end(); ++iter) { + if ((*iter)->confidence < min_conf){ + min_pos = iter; + min_conf = (*iter)->confidence; + } + } + DPRINTF(HWPrefetch, " replacing PC %x\n", (*min_pos)->instAddr); + tab.erase(min_pos); + } + + StrideEntry *new_entry = new StrideEntry; + new_entry->instAddr = pc; + new_entry->missAddr = blk_addr; + new_entry->stride = 0; + new_entry->confidence = 0; + tab.push_back(new_entry); + } } diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh index f6bdbc424..6ccd32b91 100644 --- a/src/mem/cache/prefetch/stride.hh +++ b/src/mem/cache/prefetch/stride.hh @@ -36,41 +36,41 @@ #ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ #define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ +#include <limits.h> #include "mem/cache/prefetch/base.hh" class StridePrefetcher : public BasePrefetcher { protected: - class strideEntry + static const int Max_Contexts = 64; + + // These constants need to be changed with the type of the + // 'confidence' field below. + static const int Max_Conf = INT_MAX; + static const int Min_Conf = INT_MIN; + + class StrideEntry { public: - Addr IAddr; - Addr MAddr; + Addr instAddr; + Addr missAddr; int stride; - int64_t confidence; - -/* bool operator < (strideEntry a,strideEntry b) - { - if (a.confidence == b.confidence) { - return true; //?????? - } - else return a.confidence < b.confidence; - }*/ + int confidence; }; - Addr* lastMissAddr[64/*MAX_CPUS*/]; - std::list<strideEntry*> table[64/*MAX_CPUS*/]; + Addr *lastMissAddr[Max_Contexts]; + + std::list<StrideEntry*> table[Max_Contexts]; Tick latency; int degree; - bool useCPUId; - + bool useContextId; public: StridePrefetcher(const BaseCacheParams *p) : BasePrefetcher(p), latency(p->prefetch_latency), - degree(p->prefetch_degree), useCPUId(p->prefetch_use_cpu_id) + degree(p->prefetch_degree), useContextId(p->prefetch_use_cpu_id) { } diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc index 6afe1c6c2..a6c2403ba 100644 --- a/src/mem/cache/prefetch/tagged.cc +++ b/src/mem/cache/prefetch/tagged.cc @@ -47,20 +47,15 @@ TaggedPrefetcher:: calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses, std::list<Tick> &delays) { - Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); - for (int d=1; d <= degree; d++) { - Addr newAddr = blkAddr + d*(this->blkSize); - if (this->pageStop && - (blkAddr & ~(TheISA::VMPageSize - 1)) != - (newAddr & ~(TheISA::VMPageSize - 1))) - { - //Spanned the page, so now stop - this->pfSpanPage += degree - d + 1; + for (int d = 1; d <= degree; d++) { + Addr newAddr = blkAddr + d*(blkSize); + if (pageStop && !samePage(blkAddr, newAddr)) { + // Spanned the page, so now stop + pfSpanPage += degree - d + 1; return; - } - else - { + } else { addresses.push_back(newAddr); delays.push_back(latency); } diff --git a/src/mem/cache/tags/SConscript b/src/mem/cache/tags/SConscript index 9153d97e7..7255e0b7e 100644 --- a/src/mem/cache/tags/SConscript +++ b/src/mem/cache/tags/SConscript @@ -34,13 +34,9 @@ Source('base.cc') Source('fa_lru.cc') Source('iic.cc') Source('lru.cc') -Source('split.cc') -Source('split_lifo.cc') -Source('split_lru.cc') SimObject('iic_repl/Repl.py') Source('iic_repl/gen.cc') TraceFlag('IIC') TraceFlag('IICMore') -TraceFlag('Split') diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh index b7b0c7ef0..46c7186b1 100644 --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -70,19 +70,19 @@ class BaseTags */ /** Number of replacements of valid blocks per thread. */ - Stats::Vector<> replacements; + Stats::Vector replacements; /** Per cycle average of the number of tags that hold valid data. */ - Stats::Average<> tagsInUse; + Stats::Average tagsInUse; /** The total number of references to a block before it is replaced. */ - Stats::Scalar<> totalRefs; + Stats::Scalar totalRefs; /** * The number of reference counts sampled. This is different from * replacements because we sample all the valid blocks when the simulator * exits. */ - Stats::Scalar<> sampledRefs; + Stats::Scalar sampledRefs; /** * Average number of references to a block before is was replaced. @@ -91,7 +91,7 @@ class BaseTags Stats::Formula avgRefs; /** The cycle that the warmup percentage was hit. */ - Stats::Scalar<> warmupCycle; + Stats::Scalar warmupCycle; /** * @} */ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 607e89a75..f92d4cb37 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -144,14 +144,6 @@ FALRU::hashLookup(Addr addr) const return NULL; } -bool -FALRU::probe(Addr addr) const -{ - Addr blkAddr = blkAlign(addr); - FALRUBlk* blk = hashLookup(blkAddr); - return blk && blk->tag == blkAddr && blk->isValid(); -} - void FALRU::invalidateBlk(FALRU::BlkType *blk) { @@ -163,7 +155,7 @@ FALRU::invalidateBlk(FALRU::BlkType *blk) } FALRUBlk* -FALRU::findBlock(Addr addr, int &lat, int *inCache) +FALRU::accessBlock(Addr addr, int &lat, int *inCache) { accesses++; int tmp_in_cache = 0; @@ -215,7 +207,7 @@ FALRU::findBlock(Addr addr) const } FALRUBlk* -FALRU::findReplacement(Addr addr, PacketList &writebacks) +FALRU::findVictim(Addr addr, PacketList &writebacks) { FALRUBlk * blk = tail; assert(blk->inCache == 0); @@ -237,6 +229,11 @@ FALRU::findReplacement(Addr addr, PacketList &writebacks) } void +FALRU::insertBlock(Addr addr, FALRU::BlkType *blk) +{ +} + +void FALRU::moveToHead(FALRUBlk *blk) { int updateMask = blk->inCache ^ cacheMask; diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index cabcf18b4..4eab10c49 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -139,11 +139,11 @@ class FALRU : public BaseTags */ /** Hits in each cache size >= 128K. */ - Stats::Vector<> hits; + Stats::Vector hits; /** Misses in each cache size >= 128K. */ - Stats::Vector<> misses; + Stats::Vector misses; /** Total number of accesses. */ - Stats::Scalar<> accesses; + Stats::Scalar accesses; /** * @} @@ -165,29 +165,23 @@ public: void regStats(const std::string &name); /** - * Return true if the address is found in the cache. - * @param asid The address space ID. - * @param addr The address to look for. - * @return True if the address is in the cache. - */ - bool probe(Addr addr) const; - - /** * Invalidate a cache block. * @param blk The block to invalidate. */ void invalidateBlk(BlkType *blk); /** - * Find the block in the cache and update the replacement data. Returns - * the access latency and the in cache flags as a side effect + * Access block and update replacement data. May not succeed, in which case + * NULL pointer is returned. This has all the implications of a cache + * access and should only be used as such. + * Returns the access latency and inCache flags as a side effect. * @param addr The address to look for. * @param asid The address space ID. * @param lat The latency of the access. * @param inCache The FALRUBlk::inCache flags. * @return Pointer to the cache block. */ - FALRUBlk* findBlock(Addr addr, int &lat, int *inCache = 0); + FALRUBlk* accessBlock(Addr addr, int &lat, int *inCache = 0); /** * Find the block in the cache, do not update the replacement data. @@ -203,7 +197,9 @@ public: * @param writebacks List for any writebacks to be performed. * @return The block to place the replacement in. */ - FALRUBlk* findReplacement(Addr addr, PacketList & writebacks); + FALRUBlk* findVictim(Addr addr, PacketList & writebacks); + + void insertBlock(Addr addr, BlkType *blk); /** * Return the hit latency of this cache. @@ -283,31 +279,6 @@ public: { return (tag); } - - /** - * Read the data out of the internal storage of a cache block. FALRU - * currently doesn't support data storage. - * @param blk The cache block to read. - * @param data The buffer to read the data into. - * @return The data from the cache block. - */ - void readData(FALRUBlk *blk, uint8_t *data) - { - } - - /** - * Write data into the internal storage of a cache block. FALRU - * currently doesn't support data storage. - * @param blk The cache block to be written. - * @param data The data to write. - * @param size The number of bytes to write. - * @param writebacks A list for any writebacks to be performed. May be - * needed when writing to a compressed block. - */ - void writeData(FALRUBlk *blk, uint8_t *data, int size, - PacketList &writebacks) - { - } }; #endif diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index 2825599f6..7bc2543c5 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -219,15 +219,9 @@ IIC::regStats(const string &name) ; } -// probe cache for presence of given block. -bool -IIC::probe(Addr addr) const -{ - return (findBlock(addr) != NULL); -} IICTag* -IIC::findBlock(Addr addr, int &lat) +IIC::accessBlock(Addr addr, int &lat) { Addr tag = extractTag(addr); unsigned set = hash(addr); @@ -303,7 +297,7 @@ IIC::findBlock(Addr addr) const IICTag* -IIC::findReplacement(Addr addr, PacketList &writebacks) +IIC::findVictim(Addr addr, PacketList &writebacks) { DPRINTF(IIC, "Finding Replacement for %x\n", addr); unsigned set = hash(addr); @@ -346,6 +340,11 @@ IIC::findReplacement(Addr addr, PacketList &writebacks) } void +IIC::insertBlock(Addr addr, BlkType* blk) +{ +} + +void IIC::freeReplacementBlock(PacketList & writebacks) { IICTag *tag_ptr; @@ -365,7 +364,7 @@ IIC::freeReplacementBlock(PacketList & writebacks) tag_ptr->refCount = 0; if (tag_ptr->isDirty()) { -/* PacketPtr writeback = +/* PacketPtr writeback = buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), tag_ptr->req->asid, tag_ptr->xc, blkSize, tag_ptr->data, @@ -635,66 +634,6 @@ IIC::invalidateBlk(IIC::BlkType *tag_ptr) } void -IIC::readData(IICTag *blk, uint8_t *data) -{ - assert(blk->size <= trivialSize || blk->numData > 0); - int data_size = blk->size; - if (data_size > trivialSize) { - for (int i = 0; i < blk->numData; ++i){ - memcpy(data+i*subSize, - &(dataBlks[blk->data_ptr[i]][0]), - (data_size>subSize)?subSize:data_size); - data_size -= subSize; - } - } else { - memcpy(data,blk->trivialData,data_size); - } -} - -void -IIC::writeData(IICTag *blk, uint8_t *write_data, int size, - PacketList & writebacks) -{ - DPRINTF(IIC, "Writing %d bytes to %x\n", size, - blk->tag<<tagShift); - // Find the number of subblocks needed, (round up) - int num_subs = (size + (subSize -1))/subSize; - if (size <= trivialSize) { - num_subs = 0; - } - assert(num_subs <= numSub); - if (num_subs > blk->numData) { - // need to allocate more data blocks - for (int i = blk->numData; i < num_subs; ++i){ - blk->data_ptr[i] = getFreeDataBlock(writebacks); - dataReferenceCount[blk->data_ptr[i]] += 1; - } - } else if (num_subs < blk->numData){ - // can free data blocks - for (int i=num_subs; i < blk->numData; ++i){ - // decrement reference count and compare to zero - if (--dataReferenceCount[blk->data_ptr[i]] == 0) { - freeDataBlock(blk->data_ptr[i]); - } - } - } - - blk->numData = num_subs; - blk->size = size; - assert(size <= trivialSize || blk->numData > 0); - if (size > trivialSize){ - for (int i = 0; i < blk->numData; ++i){ - memcpy(&dataBlks[blk->data_ptr[i]][0], write_data + i*subSize, - (size>subSize)?subSize:size); - size -= subSize; - } - } else { - memcpy(blk->trivialData,write_data,size); - } -} - - -void IIC::cleanupRefs() { for (int i = 0; i < numTags; ++i) { diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh index c9d080683..45c8ee801 100644 --- a/src/mem/cache/tags/iic.hh +++ b/src/mem/cache/tags/iic.hh @@ -248,20 +248,20 @@ class IIC : public BaseTags */ /** Hash hit depth of cache hits. */ - Stats::Distribution<> hitHashDepth; + Stats::Distribution hitHashDepth; /** Hash depth for cache misses. */ - Stats::Distribution<> missHashDepth; + Stats::Distribution missHashDepth; /** Count of accesses to each hash set. */ - Stats::Distribution<> setAccess; + Stats::Distribution setAccess; /** The total hash depth for every miss. */ - Stats::Scalar<> missDepthTotal; + Stats::Scalar missDepthTotal; /** The total hash depth for all hits. */ - Stats::Scalar<> hitDepthTotal; + Stats::Scalar hitDepthTotal; /** The number of hash misses. */ - Stats::Scalar<> hashMiss; + Stats::Scalar hashMiss; /** The number of hash hits. */ - Stats::Scalar<> hashHit; + Stats::Scalar hashHit; /** @} */ public: @@ -385,14 +385,6 @@ class IIC : public BaseTags } /** - * Check for the address in the tagstore. - * @param asid The address space ID. - * @param addr The address to find. - * @return true if it is found. - */ - bool probe(Addr addr) const; - - /** * Swap the position of two tags. * @param index1 The first tag location. * @param index2 The second tag location. @@ -418,14 +410,16 @@ class IIC : public BaseTags void invalidateBlk(BlkType *blk); /** - * Find the block and update the replacement data. This call also returns - * the access latency as a side effect. + * Access block and update replacement data. May not succeed, in which case + * NULL pointer is returned. This has all the implications of a cache + * access and should only be used as such. + * Returns the access latency and inCache flags as a side effect. * @param addr The address to find. * @param asid The address space ID. * @param lat The access latency. * @return A pointer to the block found, if any. */ - IICTag* findBlock(Addr addr, int &lat); + IICTag* accessBlock(Addr addr, int &lat); /** * Find the block, do not update the replacement data. @@ -441,31 +435,15 @@ class IIC : public BaseTags * @param writebacks List for any writebacks to be performed. * @return The block to place the replacement in. */ - IICTag* findReplacement(Addr addr, PacketList &writebacks); + IICTag* findVictim(Addr addr, PacketList &writebacks); - /** - * Read the data from the internal storage of the given cache block. - * @param blk The block to read the data from. - * @param data The buffer to read the data into. - * @return The cache block's data. - */ - void readData(IICTag *blk, uint8_t *data); - - /** - * Write the data into the internal storage of the given cache block. - * @param blk The block to write to. - * @param data The data to write. - * @param size The number of bytes to write. - * @param writebacks A list for any writebacks to be performed. May be - * needed when writing to a compressed block. - */ - void writeData(IICTag *blk, uint8_t *data, int size, - PacketList & writebacks); + void insertBlock(Addr addr, BlkType *blk); /** * Called at end of simulation to complete average block reference stats. */ virtual void cleanupRefs(); + private: /** * Return the hash of the address. diff --git a/src/mem/cache/tags/iic_repl/gen.hh b/src/mem/cache/tags/iic_repl/gen.hh index 22436b384..fe105d95a 100644 --- a/src/mem/cache/tags/iic_repl/gen.hh +++ b/src/mem/cache/tags/iic_repl/gen.hh @@ -162,11 +162,11 @@ class GenRepl : public Repl * @{ */ /** The number of replacements from each pool. */ - Stats::Distribution<> repl_pool; + Stats::Distribution repl_pool; /** The number of advances out of each pool. */ - Stats::Distribution<> advance_pool; + Stats::Distribution advance_pool; /** The number of demotions from each pool. */ - Stats::Distribution<> demote_pool; + Stats::Distribution demote_pool; /** * @} */ diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 7f352e9c4..ff353ff6a 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -113,7 +113,7 @@ LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) : // allocate data storage in one big chunk dataBlks = new uint8_t[numSets*assoc*blkSize]; - blkIndex = 0; // index into blks array + blkIndex = 0; // index into blks array for (i = 0; i < numSets; ++i) { sets[i].assoc = assoc; @@ -150,21 +150,8 @@ LRU::~LRU() delete [] sets; } -// probe cache for presence of given block. -bool -LRU::probe(Addr addr) const -{ - // return(findBlock(Read, addr, asid) != 0); - Addr tag = extractTag(addr); - unsigned myset = extractSet(addr); - - LRUBlk *blk = sets[myset].findBlk(tag); - - return (blk != NULL); // true if in cache -} - LRUBlk* -LRU::findBlock(Addr addr, int &lat) +LRU::accessBlock(Addr addr, int &lat) { Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -196,12 +183,11 @@ LRU::findBlock(Addr addr) const } LRUBlk* -LRU::findReplacement(Addr addr, PacketList &writebacks) +LRU::findVictim(Addr addr, PacketList &writebacks) { unsigned set = extractSet(addr); // grab a replacement candidate LRUBlk *blk = sets[set].blks[assoc-1]; - sets[set].moveToHead(blk); if (blk->isValid()) { replacements[0]++; totalRefs += blk->refCount; @@ -210,7 +196,14 @@ LRU::findReplacement(Addr addr, PacketList &writebacks) DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n", set, regenerateBlkAddr(blk->tag, set)); - } else if (!blk->isTouched) { + } + return blk; +} + +void +LRU::insertBlock(Addr addr, LRU::BlkType *blk) +{ + if (!blk->isTouched) { tagsInUse++; blk->isTouched = true; if (!warmedUp && tagsInUse.value() >= warmupBound) { @@ -219,7 +212,11 @@ LRU::findReplacement(Addr addr, PacketList &writebacks) } } - return blk; + // Set tag for new block. Caller is responsible for setting status. + blk->tag = extractTag(addr); + + unsigned set = extractSet(addr); + sets[set].moveToHead(blk); } void diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index ea5606cde..7b6e95e84 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -127,7 +127,7 @@ public: * @param _assoc The associativity of the cache. * @param _hit_latency The latency in cycles for a hit. */ - LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency); + LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency); /** * Destructor @@ -154,31 +154,25 @@ public: } /** - * Search for the address in the cache. - * @param asid The address space ID. - * @param addr The address to find. - * @return True if the address is in the cache. - */ - bool probe(Addr addr) const; - - /** * Invalidate the given block. * @param blk The block to invalidate. */ void invalidateBlk(BlkType *blk); /** - * Finds the given address in the cache and update replacement data. - * Returns the access latency as a side effect. + * Access block and update replacement data. May not succeed, in which case + * NULL pointer is returned. This has all the implications of a cache + * access and should only be used as such. Returns the access latency as a side effect. * @param addr The address to find. * @param asid The address space ID. * @param lat The access latency. * @return Pointer to the cache block if found. */ - LRUBlk* findBlock(Addr addr, int &lat); + LRUBlk* accessBlock(Addr addr, int &lat); /** * Finds the given address in the cache, do not update replacement data. + * i.e. This is a no-side-effect find of a block. * @param addr The address to find. * @param asid The address space ID. * @return Pointer to the cache block if found. @@ -186,12 +180,20 @@ public: LRUBlk* findBlock(Addr addr) const; /** - * Find a replacement block for the address provided. - * @param pkt The request to a find a replacement candidate for. + * Find a block to evict for the address provided. + * @param addr The addr to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @return The block to place the replacement in. + * @return The candidate block. */ - LRUBlk* findReplacement(Addr addr, PacketList &writebacks); + LRUBlk* findVictim(Addr addr, PacketList &writebacks); + + /** + * Insert the new block into the cache. For LRU this means inserting into + * the MRU position of the set. + * @param addr The address to update. + * @param blk The block to update. + */ + void insertBlock(Addr addr, BlkType *blk); /** * Generate the tag from the given address. @@ -254,33 +256,6 @@ public: } /** - * Read the data out of the internal storage of the given cache block. - * @param blk The cache block to read. - * @param data The buffer to read the data into. - * @return The cache block's data. - */ - void readData(LRUBlk *blk, uint8_t *data) - { - std::memcpy(data, blk->data, blk->size); - } - - /** - * Write data into the internal storage of the given cache block. Since in - * LRU does not store data differently this just needs to update the size. - * @param blk The cache block to write. - * @param data The data to write. - * @param size The number of bytes to write. - * @param writebacks A list for any writebacks to be performed. May be - * needed when writing to a compressed block. - */ - void writeData(LRUBlk *blk, uint8_t *data, int size, - PacketList & writebacks) - { - assert(size <= blkSize); - blk->size = size; - } - - /** * Called at end of simulation to complete average block reference stats. */ virtual void cleanupRefs(); diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc deleted file mode 100644 index 0df85cc92..000000000 --- a/src/mem/cache/tags/split.cc +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Lisa Hsu - */ - -/** - * @file - * Definitions of split cache tag store. - */ - -#include <string> -#include <iostream> -#include <fstream> - -#include "base/cprintf.hh" -#include "base/intmath.hh" -#include "base/output.hh" -#include "base/trace.hh" -#include "mem/cache/base.hh" -#include "mem/cache/tags/split.hh" -#include "mem/cache/tags/split_lifo.hh" -#include "mem/cache/tags/split_lru.hh" - - -using namespace std; -using namespace TheISA; - -// create and initialize a partitioned cache structure -Split::Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, - bool _lifo, bool _two_queue, int _hit_latency) : - numSets(_numSets), blkSize(_blkSize), lifo(_lifo), hitLatency(_hit_latency) -{ - DPRINTF(Split, "new split cache!!\n"); - - DPRINTF(Split, "lru has %d numSets, %d blkSize, %d assoc, and %d hit_latency\n", - numSets, blkSize, LRU1_assoc, hitLatency); - - lru = new SplitLRU(_numSets, _blkSize, LRU1_assoc, _hit_latency, 1); - - if (total_ways - LRU1_assoc == 0) { - lifo_net = NULL; - lru_net = NULL; - } else { - if (lifo) { - DPRINTF(Split, "Other partition is a LIFO with size %d in bytes. it gets %d ways\n", - (total_ways - LRU1_assoc)*_numSets*_blkSize, (total_ways - LRU1_assoc)); - lifo_net = new SplitLIFO(_blkSize, (total_ways - LRU1_assoc)*_numSets*_blkSize, - (total_ways - LRU1_assoc), _hit_latency, _two_queue, 2); - lru_net = NULL; - } - else { - DPRINTF(Split, "other LRU gets %d ways\n", total_ways - LRU1_assoc); - lru_net = new SplitLRU(_numSets, _blkSize, total_ways - LRU1_assoc, _hit_latency, 2); - lifo_net = NULL; - } - } - - blkMask = blkSize - 1; - - if (!isPowerOf2(total_ways)) - warn("total cache ways/columns %d should be power of 2", - total_ways); - - warmedUp = false; - /** @todo Make warmup percentage a parameter. */ - warmupBound = numSets * total_ways; - -} - -Split::~Split() -{ - delete lru; - if (lifo) - delete lifo_net; - else - delete lru_net; -} - -void -Split::regStats(const string &name) -{ - using namespace Stats; - - BaseTags::regStats(name); - - usedEvictDist.init(0,3000,40); - unusedEvictDist.init(0,3000,40); - useByCPUCycleDist.init(0,35,1); - - nic_repl - .name(name + ".nic_repl") - .desc("number of replacements in the nic partition") - .precision(0) - ; - - cpu_repl - .name(name + ".cpu_repl") - .desc("number of replacements in the cpu partition") - .precision(0) - ; - - lru->regStats(name + ".lru"); - - if (lifo && lifo_net) { - lifo_net->regStats(name + ".lifo_net"); - } else if (lru_net) { - lru_net->regStats(name + ".lru_net"); - } - - nicUsedWhenEvicted - .name(name + ".nicUsedWhenEvicted") - .desc("number of NIC blks that were used before evicted") - ; - - nicUsedTotLatency - .name(name + ".nicUsedTotLatency") - .desc("total cycles before eviction of used NIC blks") - ; - - nicUsedTotEvicted - .name(name + ".nicUsedTotEvicted") - .desc("total number of used NIC blks evicted") - ; - - nicUsedAvgLatency - .name(name + ".nicUsedAvgLatency") - .desc("avg number of cycles a used NIC blk is in cache") - .precision(0) - ; - nicUsedAvgLatency = nicUsedTotLatency / nicUsedTotEvicted; - - usedEvictDist - .name(name + ".usedEvictDist") - .desc("distribution of used NIC blk eviction times") - .flags(pdf | cdf) - ; - - nicUnusedWhenEvicted - .name(name + ".nicUnusedWhenEvicted") - .desc("number of NIC blks that were unused when evicted") - ; - - nicUnusedTotLatency - .name(name + ".nicUnusedTotLatency") - .desc("total cycles before eviction of unused NIC blks") - ; - - nicUnusedTotEvicted - .name(name + ".nicUnusedTotEvicted") - .desc("total number of unused NIC blks evicted") - ; - - nicUnusedAvgLatency - .name(name + ".nicUnusedAvgLatency") - .desc("avg number of cycles an unused NIC blk is in cache") - .precision(0) - ; - nicUnusedAvgLatency = nicUnusedTotLatency / nicUnusedTotEvicted; - - unusedEvictDist - .name(name + ".unusedEvictDist") - .desc("distribution of unused NIC blk eviction times") - .flags(pdf | cdf) - ; - - nicUseByCPUCycleTotal - .name(name + ".nicUseByCPUCycleTotal") - .desc("total latency of NIC blks til usage time") - ; - - nicBlksUsedByCPU - .name(name + ".nicBlksUsedByCPU") - .desc("total number of NIC blks used") - ; - - nicAvgUsageByCPULatency - .name(name + ".nicAvgUsageByCPULatency") - .desc("average number of cycles before a NIC blk that is used gets used") - .precision(0) - ; - nicAvgUsageByCPULatency = nicUseByCPUCycleTotal / nicBlksUsedByCPU; - - useByCPUCycleDist - .name(name + ".useByCPUCycleDist") - .desc("the distribution of cycle time in cache before NIC blk is used") - .flags(pdf | cdf) - ; - - cpuUsedBlks - .name(name + ".cpuUsedBlks") - .desc("number of cpu blks that were used before evicted") - ; - - cpuUnusedBlks - .name(name + ".cpuUnusedBlks") - .desc("number of cpu blks that were unused before evicted") - ; - - nicAvgLatency - .name(name + ".nicAvgLatency") - .desc("avg number of cycles a NIC blk is in cache before evicted") - .precision(0) - ; - nicAvgLatency = (nicUnusedTotLatency + nicUsedTotLatency) / - (nicUnusedTotEvicted + nicUsedTotEvicted); - - NR_CP_hits - .name(name + ".NR_CP_hits") - .desc("NIC requests hitting in CPU Partition") - ; - - NR_NP_hits - .name(name + ".NR_NP_hits") - .desc("NIC requests hitting in NIC Partition") - ; - - CR_CP_hits - .name(name + ".CR_CP_hits") - .desc("CPU requests hitting in CPU partition") - ; - - CR_NP_hits - .name(name + ".CR_NP_hits") - .desc("CPU requests hitting in NIC partition") - ; - -} - -// probe cache for presence of given block. -bool -Split::probe(Addr addr) const -{ - bool success = lru->probe(addr); - if (!success) { - if (lifo && lifo_net) - success = lifo_net->probe(addr); - else if (lru_net) - success = lru_net->probe(addr); - } - - return success; -} - - -SplitBlk* -Split::findBlock(Addr addr, int &lat) -{ - SplitBlk *blk = lru->findBlock(addr, lat); - if (!blk) { - if (lifo && lifo_net) { - blk = lifo_net->findBlock(addr, lat); - } else if (lru_net) { - blk = lru_net->findBlock(addr, lat); - } - } - - return blk; -} - -SplitBlk* -Split::findBlock(Addr addr) const -{ - SplitBlk *blk = lru->findBlock(addr); - if (!blk) { - if (lifo && lifo_net) { - blk = lifo_net->findBlock(addr); - } else if (lru_net) { - blk = lru_net->findBlock(addr); - } - } - - return blk; -} - -SplitBlk* -Split::findReplacement(Addr addr, PacketList &writebacks) -{ - SplitBlk *blk = NULL; - - assert(0); -#if 0 - if (pkt->nic_pkt()) { - DPRINTF(Split, "finding a replacement for nic_req\n"); - nic_repl++; - if (lifo && lifo_net) - blk = lifo_net->findReplacement(addr, writebacks); - else if (lru_net) - blk = lru_net->findReplacement(addr, writebacks); - // in this case, this is an LRU only cache, it's non partitioned - else - blk = lru->findReplacement(addr, writebacks); - } else { - DPRINTF(Split, "finding replacement for cpu_req\n"); - blk = lru->findReplacement(addr, writebacks); - cpu_repl++; - } - - Tick latency = curTick - blk->ts; - if (blk->isNIC) { - if (blk->isUsed) { - nicUsedWhenEvicted++; - usedEvictDist.sample(latency); - nicUsedTotLatency += latency; - nicUsedTotEvicted++; - } else { - nicUnusedWhenEvicted++; - unusedEvictDist.sample(latency); - nicUnusedTotLatency += latency; - nicUnusedTotEvicted++; - } - } else { - if (blk->isUsed) { - cpuUsedBlks++; - } else { - cpuUnusedBlks++; - } - } - - // blk attributes for the new blk coming IN - blk->ts = curTick; - blk->isNIC = (pkt->nic_pkt()) ? true : false; -#endif - - return blk; -} - -void -Split::invalidateBlk(Split::BlkType *blk) -{ - if (!blk) { - fatal("FIXME!\n"); -#if 0 - if (lifo && lifo_net) - blk = lifo_net->findBlock(addr); - else if (lru_net) - blk = lru_net->findBlock(addr); -#endif - - if (!blk) - return; - } - - blk->status = 0; - blk->isTouched = false; - tagsInUse--; -} - -void -Split::cleanupRefs() -{ - lru->cleanupRefs(); - if (lifo && lifo_net) - lifo_net->cleanupRefs(); - else if (lru_net) - lru_net->cleanupRefs(); - - ofstream memPrint(simout.resolve("memory_footprint.txt").c_str(), - ios::trunc); - - // this shouldn't be here but it happens at the end, which is what i want - memIter end = memHash.end(); - for (memIter iter = memHash.begin(); iter != end; ++iter) { - ccprintf(memPrint, "%8x\t%d\n", (*iter).first, (*iter).second); - } -} - -Addr -Split::regenerateBlkAddr(Addr tag, int set) const -{ - if (lifo_net) - return lifo_net->regenerateBlkAddr(tag, set); - else - return lru->regenerateBlkAddr(tag, set); -} - -Addr -Split::extractTag(Addr addr) const -{ - // need to fix this if we want to use it... old interface of - // passing in blk was too weird - assert(0); - return 0; -/* - if (blk->part == 2) { - if (lifo_net) - return lifo_net->extractTag(addr); - else if (lru_net) - return lru_net->extractTag(addr); - else - panic("this shouldn't happen"); - } else - return lru->extractTag(addr); -*/ -} - diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh deleted file mode 100644 index e8954f791..000000000 --- a/src/mem/cache/tags/split.hh +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Lisa Hsu - */ - -/** - * @file - * Declaration of a split/partitioned tag store. - */ - -#ifndef __SPLIT_HH__ -#define __SPLIT_HH__ - -#include <cstring> -#include <list> - -#include "mem/cache/blk.hh" // base class -#include "mem/cache/tags/split_blk.hh" -#include "mem/packet.hh" // for inlined functions -#include <assert.h> -#include "mem/cache/tags/base.hh" -#include "base/hashmap.hh" - -class BaseCache; -class SplitLRU; -class SplitLIFO; - -/** - * A cache tag store. - */ -class Split : public BaseTags -{ - public: - /** Typedef the block type used in this tag store. */ - typedef SplitBlk BlkType; - /** Typedef for a list of pointers to the local block class. */ - typedef std::list<SplitBlk*> BlkList; - protected: - /** The number of sets in the cache. */ - const int numSets; - /** The number of bytes in a block. */ - const int blkSize; - /** Whether the 2nd partition (for the nic) is LIFO or not */ - const bool lifo; - /** The hit latency. */ - const int hitLatency; - - Addr blkMask; - - /** Number of NIC requests that hit in the NIC partition */ - Stats::Scalar<> NR_NP_hits; - /** Number of NIC requests that hit in the CPU partition */ - Stats::Scalar<> NR_CP_hits; - /** Number of CPU requests that hit in the NIC partition */ - Stats::Scalar<> CR_NP_hits; - /** Number of CPU requests that hit in the CPU partition */ - Stats::Scalar<> CR_CP_hits; - /** The number of nic replacements (i.e. misses) */ - Stats::Scalar<> nic_repl; - /** The number of cpu replacements (i.e. misses) */ - Stats::Scalar<> cpu_repl; - - //For latency studies - /** the number of NIC blks that were used before evicted */ - Stats::Scalar<> nicUsedWhenEvicted; - /** the total latency of used NIC blocks in the cache */ - Stats::Scalar<> nicUsedTotLatency; - /** the total number of used NIC blocks evicted */ - Stats::Scalar<> nicUsedTotEvicted; - /** the average number of cycles a used NIC blk is in the cache */ - Stats::Formula nicUsedAvgLatency; - /** the Distribution of used NIC blk eviction times */ - Stats::Distribution<> usedEvictDist; - - /** the number of NIC blks that were unused before evicted */ - Stats::Scalar<> nicUnusedWhenEvicted; - /** the total latency of unused NIC blks in the cache */ - Stats::Scalar<> nicUnusedTotLatency; - /** the total number of unused NIC blocks evicted */ - Stats::Scalar<> nicUnusedTotEvicted; - /** the average number of cycles an unused NIC blk is in the cache */ - Stats::Formula nicUnusedAvgLatency; - /** the Distribution of unused NIC blk eviction times */ - Stats::Distribution<> unusedEvictDist; - - /** The total latency of NIC blocks to 1st usage time by CPU */ - Stats::Scalar<> nicUseByCPUCycleTotal; - /** The total number of NIC blocks used */ - Stats::Scalar<> nicBlksUsedByCPU; - /** the average number of cycles before a NIC blk that is used gets used by CPU */ - Stats::Formula nicAvgUsageByCPULatency; - /** the Distribution of cycles time before a NIC blk is used by CPU*/ - Stats::Distribution<> useByCPUCycleDist; - - /** the number of CPU blks that were used before evicted */ - Stats::Scalar<> cpuUsedBlks; - /** the number of CPU blks that were unused before evicted */ - Stats::Scalar<> cpuUnusedBlks; - - /** the avg number of cycles before a NIC blk is evicted */ - Stats::Formula nicAvgLatency; - - typedef m5::hash_map<Addr, int, m5::hash<Addr> > hash_t; - typedef hash_t::const_iterator memIter; - hash_t memHash; - - - private: - SplitLRU *lru; - SplitLRU *lru_net; - SplitLIFO *lifo_net; - - public: - /** - * Construct and initialize this tag store. - * @param _numSets The number of sets in the cache. - * @param _blkSize The number of bytes in a block. - * @param _assoc The associativity of the cache. - * @param _hit_latency The latency in cycles for a hit. - */ - Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, - bool _lifo, bool _two_queue, int _hit_latency); - - /** - * Destructor - */ - virtual ~Split(); - - /** - * Register the stats for this object - * @param name The name to prepend to the stats name. - */ - void regStats(const std::string &name); - - /** - * Return the block size. - * @return the block size. - */ - int getBlockSize() - { - return blkSize; - } - - /** - * Return the subblock size. In the case of Split it is always the block - * size. - * @return The block size. - */ - int getSubBlockSize() - { - return blkSize; - } - - /** - * Search for the address in the cache. - * @param asid The address space ID. - * @param addr The address to find. - * @return True if the address is in the cache. - */ - bool probe(Addr addr) const; - - /** - * Invalidate the given block. - * @param blk The block to invalidate. - */ - void invalidateBlk(BlkType *blk); - - /** - * Finds the given address in the cache and update replacement data. - * Returns the access latency as a side effect. - * @param addr The address to find. - * @param asid The address space ID. - * @param lat The access latency. - * @return Pointer to the cache block if found. - */ - SplitBlk* findBlock(Addr addr, int &lat); - - /** - * Finds the given address in the cache, do not update replacement data. - * @param addr The address to find. - * @param asid The address space ID. - * @return Pointer to the cache block if found. - */ - SplitBlk* findBlock(Addr addr) const; - - /** - * Find a replacement block for the address provided. - * @param pkt The request to a find a replacement candidate for. - * @param writebacks List for any writebacks to be performed. - * @return The block to place the replacement in. - */ - SplitBlk* findReplacement(Addr addr, PacketList &writebacks); - - - /** - * Generate the tag from the given address. - * @param addr The address to get the tag from. - * @return The tag of the address. - */ - Addr extractTag(Addr addr) const; - - /** - * Calculate the set index from the address. - * @param addr The address to get the set from. - * @return The set index of the address. - */ - int extractSet(Addr addr) const - { - panic("should never call this!\n"); - M5_DUMMY_RETURN - } - - /** - * Get the block offset from an address. - * @param addr The address to get the offset of. - * @return The block offset. - */ - int extractBlkOffset(Addr addr) const - { - return (addr & blkMask); - } - - /** - * Align an address to the block size. - * @param addr the address to align. - * @return The block address. - */ - Addr blkAlign(Addr addr) const - { - return (addr & ~(Addr) (blkMask)); - } - - /** - * Regenerate the block address from the tag. - * @param tag The tag of the block. - * @param set The set of the block. - * @return The block address. - */ - Addr regenerateBlkAddr(Addr tag, int set) const; - - /** - * Return the hit latency. - * @return the hit latency. - */ - int getHitLatency() const - { - return hitLatency; - } - - /** - * Read the data out of the internal storage of the given cache block. - * @param blk The cache block to read. - * @param data The buffer to read the data into. - * @return The cache block's data. - */ - void readData(SplitBlk *blk, uint8_t *data) - { - std::memcpy(data, blk->data, blk->size); - } - - /** - * Write data into the internal storage of the given cache block. Since in - * Split does not store data differently this just needs to update the size. - * @param blk The cache block to write. - * @param data The data to write. - * @param size The number of bytes to write. - * @param writebacks A list for any writebacks to be performed. May be - * needed when writing to a compressed block. - */ - void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList & writebacks) - { - assert(size <= blkSize); - blk->size = size; - } - - /** - * Called at end of simulation to complete average block reference stats. - */ - virtual void cleanupRefs(); -}; - -#endif diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc deleted file mode 100644 index 3bdc7cae9..000000000 --- a/src/mem/cache/tags/split_lifo.cc +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Lisa Hsu - */ - -/** - * @file - * Definitions of LIFO tag store usable in a partitioned cache. - */ - -#include <string> - -#include "mem/cache/base.hh" -#include "base/intmath.hh" -#include "mem/cache/tags/split_lifo.hh" -#include "sim/core.hh" -#include "base/trace.hh" - -using namespace std; - -SplitBlk* -LIFOSet::findBlk(Addr tag) const -{ - for (SplitBlk *blk = firstIn; blk != NULL; blk = blk->next) { - if (blk->tag == tag && blk->isValid()) { - return blk; - } - } - return NULL; -} - -void -LIFOSet::moveToLastIn(SplitBlk *blk) -{ - if (blk == lastIn) - return; - - if (blk == firstIn) { - blk->next->prev = NULL; - } else { - blk->prev->next = blk->next; - blk->next->prev = blk->prev; - } - blk->next = NULL; - blk->prev = lastIn; - lastIn->next = blk; - - lastIn = blk; -} - -void -LIFOSet::moveToFirstIn(SplitBlk *blk) -{ - if (blk == firstIn) - return; - - if (blk == lastIn) { - blk->prev->next = NULL; - } else { - blk->next->prev = blk->prev; - blk->prev->next = blk->next; - } - - blk->prev = NULL; - blk->next = firstIn; - firstIn->prev = blk; - - firstIn = blk; -} - -// create and initialize a LIFO cache structure -SplitLIFO::SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool two_Queue, int _part) : - blkSize(_blkSize), size(_size), numBlks(_size/_blkSize), numSets((_size/_ways)/_blkSize), ways(_ways), - hitLatency(_hit_latency), twoQueue(two_Queue), part(_part) -{ - if (!isPowerOf2(blkSize)) - fatal("cache block size (in bytes) must be a power of 2"); - if (!(hitLatency > 0)) - fatal("access latency in cycles must be at least on cycle"); - if (_ways == 0) - fatal("if instantiating a splitLIFO, needs non-zero size!"); - - - SplitBlk *blk; - int i, j, blkIndex; - - setShift = floorLog2(blkSize); - blkMask = blkSize - 1; - setMask = numSets - 1; - tagShift = setShift + floorLog2(numSets); - - warmedUp = false; - /** @todo Make warmup percentage a parameter. */ - warmupBound = size/blkSize; - - // allocate data blocks - blks = new SplitBlk[numBlks]; - sets = new LIFOSet[numSets]; - dataBlks = new uint8_t[size]; - -/* - // these start off point to same blk - top = &(blks[0]); - head = top; -*/ - - blkIndex = 0; - for (i=0; i < numSets; ++i) { - sets[i].ways = ways; - sets[i].lastIn = &blks[blkIndex]; - sets[i].firstIn = &blks[blkIndex + ways - 1]; - - /* 3 cases: if there is 1 way, if there are 2 ways, or if there are 3+. - in the case of 1 way, last in and first out point to the same blocks, - and the next and prev pointers need to be assigned specially. and so on - */ - /* deal with the first way */ - blk = &blks[blkIndex]; - blk->prev = &blks[blkIndex + 1]; - blk->next = NULL; - blk->data = &dataBlks[blkSize*blkIndex]; - blk->size = blkSize; - blk->part = part; - blk->set = i; - ++blkIndex; - - /* if there are "middle" ways, do them here */ - if (ways > 2) { - for (j=1; j < ways-1; ++j) { - blk = &blks[blkIndex]; - blk->data = &dataBlks[blkSize*blkIndex]; - blk->prev = &blks[blkIndex+1]; - blk->next = &blks[blkIndex-1]; - blk->data = &(dataBlks[blkSize*blkIndex]); - blk->size = blkSize; - blk->part = part; - blk->set = i; - ++blkIndex; - } - } - - /* do the final way here, depending on whether the final way is the only - way or not - */ - if (ways > 1) { - blk = &blks[blkIndex]; - blk->prev = NULL; - blk->next = &blks[blkIndex - 1]; - blk->data = &dataBlks[blkSize*blkIndex]; - blk->size = blkSize; - blk->part = part; - blk->set = i; - ++blkIndex; - } else { - blk->prev = NULL; - } - } - assert(blkIndex == numBlks); -} - -SplitLIFO::~SplitLIFO() -{ - delete [] blks; - delete [] sets; - delete [] dataBlks; -} - -void -SplitLIFO::regStats(const std::string &name) -{ - BaseTags::regStats(name); - - hits - .name(name + ".hits") - .desc("number of hits on this partition") - .precision(0) - ; - - misses - .name(name + ".misses") - .desc("number of misses in this partition") - .precision(0) - ; - - invalidations - .name(name + ".invalidations") - .desc("number of invalidations in this partition") - .precision(0) - ; -} - -// probe cache for presence of given block. -bool -SplitLIFO::probe(Addr addr) const -{ - Addr tag = extractTag(addr); - unsigned myset = extractSet(addr); - - SplitBlk* blk = sets[myset].findBlk(tag); - return (blk != NULL); -} - -SplitBlk* -SplitLIFO::findBlock(Addr addr, int &lat) -{ - Addr tag = extractTag(addr); - unsigned set = extractSet(addr); - SplitBlk *blk = sets[set].findBlk(tag); - - lat = hitLatency; - - if (blk) { - DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", - addr, set, tag); - hits++; - - if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency) - lat = blk->whenReady - curTick; - blk->refCount +=1; - - if (twoQueue) { - blk->isUsed = true; - sets[set].moveToFirstIn(blk); - } else { - sets[set].moveToLastIn(blk); - } - } - - return blk; -} - - -SplitBlk* -SplitLIFO::findBlock(Addr addr) const -{ - Addr tag = extractTag(addr); - unsigned set = extractSet(addr); - SplitBlk *blk = sets[set].findBlk(tag); - - return blk; -} - -SplitBlk* -SplitLIFO::findReplacement(Addr addr, PacketList &writebacks) -{ - unsigned set = extractSet(addr); - - SplitBlk *firstIn = sets[set].firstIn; - SplitBlk *lastIn = sets[set].lastIn; - - SplitBlk *blk; - if (twoQueue && firstIn->isUsed) { - blk = firstIn; - blk->isUsed = false; - sets[set].moveToLastIn(blk); - } else { - int withValue = sets[set].withValue; - if (withValue == ways) { - blk = lastIn; - } else { - blk = &(sets[set].firstIn[ways - ++withValue]); - } - } - - DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", - addr, regenerateBlkAddr(blk->tag, set), blk->status); - if (blk->isValid()) { - replacements[0]++; - totalRefs += blk->refCount; - ++sampledRefs; - blk->refCount = 0; - } else { - tagsInUse++; - blk->isTouched = true; - if (!warmedUp && tagsInUse.value() >= warmupBound) { - warmedUp = true; - warmupCycle = curTick; - } - } - - misses++; - - return blk; -} - -void -SplitLIFO::invalidateBlk(SplitLIFO::BlkType *blk) -{ - if (blk) { - blk->status = 0; - blk->isTouched = false; - tagsInUse--; - invalidations++; - } -} - -void -SplitLIFO::cleanupRefs() -{ - for (int i = 0; i < numBlks; ++i) { - if (blks[i].isValid()) { - totalRefs += blks[i].refCount; - ++sampledRefs; - } - } -} diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh deleted file mode 100644 index 0fd5f5c3c..000000000 --- a/src/mem/cache/tags/split_lifo.hh +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Lisa Hsu - */ - -/** - * @file - * Declaration of a LIFO tag store usable in a partitioned cache. - */ - -#ifndef __SPLIT_LIFO_HH__ -#define __SPLIT_LIFO_HH__ - -#include <cstring> -#include <list> - -#include "mem/cache/blk.hh" // base class -#include "mem/cache/tags/split_blk.hh" -#include "mem/packet.hh" // for inlined functions -#include "base/hashmap.hh" -#include <assert.h> -#include "mem/cache/tags/base.hh" - -class BaseCache; - -/** - * A LIFO set of cache blks - */ -class LIFOSet { - public: - /** the number of blocks in this set */ - int ways; - - /** Cache blocks in this set, maintained in LIFO order where - 0 = Last in (head) */ - SplitBlk *lastIn; - SplitBlk *firstIn; - - /** has the initial "filling" of this set finished? i.e., have you had - * 'ways' number of compulsory misses in this set yet? if withValue == ways, - * then yes. withValue is meant to be the number of blocks in the set that have - * gone through their first compulsory miss. - */ - int withValue; - - /** - * Find a block matching the tag in this set. - * @param asid The address space ID. - * @param tag the Tag you are looking for - * @return Pointer to the block, if found, NULL otherwise - */ - SplitBlk* findBlk(Addr tag) const; - - void moveToLastIn(SplitBlk *blk); - void moveToFirstIn(SplitBlk *blk); - - LIFOSet() - : ways(-1), lastIn(NULL), firstIn(NULL), withValue(0) - {} -}; - -/** - * A LIFO cache tag store. - */ -class SplitLIFO : public BaseTags -{ - public: - /** Typedef the block type used in this tag store. */ - typedef SplitBlk BlkType; - /** Typedef for a list of pointers to the local block class. */ - typedef std::list<SplitBlk*> BlkList; - protected: - /** The number of bytes in a block. */ - const int blkSize; - /** the size of the cache in bytes */ - const int size; - /** the number of blocks in the cache */ - const int numBlks; - /** the number of sets in the cache */ - const int numSets; - /** the number of ways in the cache */ - const int ways; - /** The hit latency. */ - const int hitLatency; - /** whether this is a "2 queue" replacement @sa moveToLastIn @sa moveToFirstIn */ - const bool twoQueue; - /** indicator for which partition this is */ - const int part; - - /** The cache blocks. */ - SplitBlk *blks; - /** The Cache sets */ - LIFOSet *sets; - /** The data blocks, 1 per cache block. */ - uint8_t *dataBlks; - - /** The amount to shift the address to get the set. */ - int setShift; - /** The amount to shift the address to get the tag. */ - int tagShift; - /** Mask out all bits that aren't part of the set index. */ - unsigned setMask; - /** Mask out all bits that aren't part of the block offset. */ - unsigned blkMask; - - - /** the number of hit in this partition */ - Stats::Scalar<> hits; - /** the number of blocks brought into this partition (i.e. misses) */ - Stats::Scalar<> misses; - /** the number of invalidations in this partition */ - Stats::Scalar<> invalidations; - -public: - /** - * Construct and initialize this tag store. - * @param _numSets The number of sets in the cache. - * @param _blkSize The number of bytes in a block. - * @param _assoc The associativity of the cache. - * @param _hit_latency The latency in cycles for a hit. - */ - SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool twoQueue, int _part); - - /** - * Destructor - */ - virtual ~SplitLIFO(); - - /** - * Register the statistics for this object - * @param name The name to precede the stat - */ - void regStats(const std::string &name); - - /** - * Return the block size. - * @return the block size. - */ - int getBlockSize() - { - return blkSize; - } - - /** - * Return the subblock size. In the case of LIFO it is always the block - * size. - * @return The block size. - */ - int getSubBlockSize() - { - return blkSize; - } - - /** - * Search for the address in the cache. - * @param asid The address space ID. - * @param addr The address to find. - * @return True if the address is in the cache. - */ - bool probe( Addr addr) const; - - /** - * Invalidate the given block. - * @param blk The block to invalidate. - */ - void invalidateBlk(BlkType *blk); - - /** - * Finds the given address in the cache and update replacement data. - * Returns the access latency as a side effect. - * @param addr The address to find. - * @param asid The address space ID. - * @param lat The access latency. - * @return Pointer to the cache block if found. - */ - SplitBlk* findBlock(Addr addr, int &lat); - - /** - * Finds the given address in the cache, do not update replacement data. - * @param addr The address to find. - * @param asid The address space ID. - * @return Pointer to the cache block if found. - */ - SplitBlk* findBlock(Addr addr) const; - - /** - * Find a replacement block for the address provided. - * @param pkt The request to a find a replacement candidate for. - * @param writebacks List for any writebacks to be performed. - * @return The block to place the replacement in. - */ - SplitBlk* findReplacement(Addr addr, PacketList &writebacks); - - /** - * Generate the tag from the given address. - * @param addr The address to get the tag from. - * @return The tag of the address. - */ - Addr extractTag(Addr addr) const - { - return (addr >> tagShift); - } - - /** - * Calculate the set index from the address. - * @param addr The address to get the set from. - * @return The set index of the address. - */ - int extractSet(Addr addr) const - { - return ((addr >> setShift) & setMask); - } - - /** - * Get the block offset from an address. - * @param addr The address to get the offset of. - * @return The block offset. - */ - int extractBlkOffset(Addr addr) const - { - return (addr & blkMask); - } - - /** - * Align an address to the block size. - * @param addr the address to align. - * @return The block address. - */ - Addr blkAlign(Addr addr) const - { - return (addr & ~(Addr)blkMask); - } - - /** - * Regenerate the block address from the tag. - * @param tag The tag of the block. - * @param set The set of the block. - * @return The block address. - */ - Addr regenerateBlkAddr(Addr tag, unsigned set) const - { - return ((tag << tagShift) | ((Addr)set << setShift)); - } - - /** - * Return the hit latency. - * @return the hit latency. - */ - int getHitLatency() const - { - return hitLatency; - } - - /** - * Read the data out of the internal storage of the given cache block. - * @param blk The cache block to read. - * @param data The buffer to read the data into. - * @return The cache block's data. - */ - void readData(SplitBlk *blk, uint8_t *data) - { - std::memcpy(data, blk->data, blk->size); - } - - /** - * Write data into the internal storage of the given cache block. Since in - * LIFO does not store data differently this just needs to update the size. - * @param blk The cache block to write. - * @param data The data to write. - * @param size The number of bytes to write. - * @param writebacks A list for any writebacks to be performed. May be - * needed when writing to a compressed block. - */ - void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList & writebacks) - { - assert(size <= blkSize); - blk->size = size; - } - - /** - * Called at end of simulation to complete average block reference stats. - */ - virtual void cleanupRefs(); -}; - -#endif diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc deleted file mode 100644 index bcccdcb30..000000000 --- a/src/mem/cache/tags/split_lru.cc +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Lisa Hsu - */ - -/** - * @file - * Definitions of LRU tag store for a partitioned cache. - */ - -#include <string> - -#include "mem/cache/base.hh" -#include "base/intmath.hh" -#include "mem/cache/tags/split_lru.hh" -#include "sim/core.hh" - -using namespace std; - -SplitBlk* -SplitCacheSet::findBlk(Addr tag) const -{ - for (int i = 0; i < assoc; ++i) { - if (blks[i]->tag == tag && blks[i]->isValid()) { - return blks[i]; - } - } - return 0; -} - - -void -SplitCacheSet::moveToHead(SplitBlk *blk) -{ - // nothing to do if blk is already head - if (blks[0] == blk) - return; - - // write 'next' block into blks[i], moving up from MRU toward LRU - // until we overwrite the block we moved to head. - - // start by setting up to write 'blk' into blks[0] - int i = 0; - SplitBlk *next = blk; - - do { - assert(i < assoc); - // swap blks[i] and next - SplitBlk *tmp = blks[i]; - blks[i] = next; - next = tmp; - ++i; - } while (next != blk); -} - - -// create and initialize a LRU/MRU cache structure -SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part) : - numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency), part(_part) -{ - // Check parameters - if (blkSize < 4 || !isPowerOf2(blkSize)) { - fatal("Block size must be at least 4 and a power of 2"); - } - if (numSets <= 0 || !isPowerOf2(numSets)) { - fatal("# of sets must be non-zero and a power of 2"); - } - if (assoc <= 0) { - fatal("associativity must be greater than zero"); - } - if (hitLatency <= 0) { - fatal("access latency must be greater than zero"); - } - - SplitBlk *blk; - int i, j, blkIndex; - - blkMask = blkSize - 1; - setShift = floorLog2(blkSize); - setMask = numSets - 1; - tagShift = setShift + floorLog2(numSets); - warmedUp = false; - /** @todo Make warmup percentage a parameter. */ - warmupBound = numSets * assoc; - - sets = new SplitCacheSet[numSets]; - blks = new SplitBlk[numSets * assoc]; - // allocate data storage in one big chunk - dataBlks = new uint8_t[numSets*assoc*blkSize]; - - blkIndex = 0; // index into blks array - for (i = 0; i < numSets; ++i) { - sets[i].assoc = assoc; - - sets[i].blks = new SplitBlk*[assoc]; - - // link in the data blocks - for (j = 0; j < assoc; ++j) { - // locate next cache block - blk = &blks[blkIndex]; - blk->data = &dataBlks[blkSize*blkIndex]; - ++blkIndex; - - // invalidate new cache block - blk->status = 0; - - //EGH Fix Me : do we need to initialize blk? - - // Setting the tag to j is just to prevent long chains in the hash - // table; won't matter because the block is invalid - blk->tag = j; - blk->whenReady = 0; - blk->isTouched = false; - blk->size = blkSize; - sets[i].blks[j]=blk; - blk->set = i; - blk->part = part; - } - } -} - -SplitLRU::~SplitLRU() -{ - delete [] dataBlks; - delete [] blks; - delete [] sets; -} - -void -SplitLRU::regStats(const std::string &name) -{ - BaseTags::regStats(name); - - hits - .name(name + ".hits") - .desc("number of hits on this partition") - .precision(0) - ; - - misses - .name(name + ".misses") - .desc("number of misses in this partition") - .precision(0) - ; -} - -// probe cache for presence of given block. -bool -SplitLRU::probe(Addr addr) const -{ - // return(findBlock(Read, addr, asid) != 0); - Addr tag = extractTag(addr); - unsigned myset = extractSet(addr); - - SplitBlk *blk = sets[myset].findBlk(tag); - - return (blk != NULL); // true if in cache -} - -SplitBlk* -SplitLRU::findBlock(Addr addr, int &lat) -{ - Addr tag = extractTag(addr); - unsigned set = extractSet(addr); - SplitBlk *blk = sets[set].findBlk(tag); - lat = hitLatency; - if (blk != NULL) { - // move this block to head of the MRU list - sets[set].moveToHead(blk); - if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ - lat = blk->whenReady - curTick; - } - blk->refCount += 1; - hits++; - } - - return blk; -} - - -SplitBlk* -SplitLRU::findBlock(Addr addr) const -{ - Addr tag = extractTag(addr); - unsigned set = extractSet(addr); - SplitBlk *blk = sets[set].findBlk(tag); - return blk; -} - -SplitBlk* -SplitLRU::findReplacement(Addr addr, PacketList &writebacks) -{ - unsigned set = extractSet(addr); - // grab a replacement candidate - SplitBlk *blk = sets[set].blks[assoc-1]; - sets[set].moveToHead(blk); - if (blk->isValid()) { - replacements[0]++; - totalRefs += blk->refCount; - ++sampledRefs; - blk->refCount = 0; - } else if (!blk->isTouched) { - tagsInUse++; - blk->isTouched = true; - if (!warmedUp && tagsInUse.value() >= warmupBound) { - warmedUp = true; - warmupCycle = curTick; - } - } - - misses++; - - return blk; -} - -void -SplitLRU::invalidateBlk(SplitLRU::BlkType *blk) -{ - if (blk) { - blk->status = 0; - blk->isTouched = false; - tagsInUse--; - } -} - -void -SplitLRU::cleanupRefs() -{ - for (int i = 0; i < numSets*assoc; ++i) { - if (blks[i].isValid()) { - totalRefs += blks[i].refCount; - ++sampledRefs; - } - } -} diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh deleted file mode 100644 index d41b6efa7..000000000 --- a/src/mem/cache/tags/split_lru.hh +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Lisa Hsu - */ - -/** - * @file - * Declaration of a LRU tag store for a partitioned cache. - */ - -#ifndef __SPLIT_LRU_HH__ -#define __SPLIT_LRU_HH__ - -#include <cstring> -#include <list> - -#include "mem/cache/blk.hh" // base class -#include "mem/cache/tags/split_blk.hh" -#include "mem/packet.hh" // for inlined functions -#include <assert.h> -#include "mem/cache/tags/base.hh" - -class BaseCache; - -/** - * An associative set of cache blocks. - */ - -class SplitCacheSet -{ - public: - /** The associativity of this set. */ - int assoc; - - /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ - SplitBlk **blks; - - /** - * Find a block matching the tag in this set. - * @param asid The address space ID. - * @param tag The Tag to find. - * @return Pointer to the block if found. - */ - SplitBlk* findBlk(Addr tag) const; - - /** - * Move the given block to the head of the list. - * @param blk The block to move. - */ - void moveToHead(SplitBlk *blk); -}; - -/** - * A LRU cache tag store. - */ -class SplitLRU : public BaseTags -{ - public: - /** Typedef the block type used in this tag store. */ - typedef SplitBlk BlkType; - /** Typedef for a list of pointers to the local block class. */ - typedef std::list<SplitBlk*> BlkList; - protected: - /** The number of sets in the cache. */ - const int numSets; - /** The number of bytes in a block. */ - const int blkSize; - /** The associativity of the cache. */ - const int assoc; - /** The hit latency. */ - const int hitLatency; - /** indicator for which partition this is */ - const int part; - - /** The cache sets. */ - SplitCacheSet *sets; - - /** The cache blocks. */ - SplitBlk *blks; - /** The data blocks, 1 per cache block. */ - uint8_t *dataBlks; - - /** The amount to shift the address to get the set. */ - int setShift; - /** The amount to shift the address to get the tag. */ - int tagShift; - /** Mask out all bits that aren't part of the set index. */ - unsigned setMask; - /** Mask out all bits that aren't part of the block offset. */ - unsigned blkMask; - - /** number of hits in this partition */ - Stats::Scalar<> hits; - /** number of blocks brought into this partition (i.e. misses) */ - Stats::Scalar<> misses; - -public: - /** - * Construct and initialize this tag store. - * @param _numSets The number of sets in the cache. - * @param _blkSize The number of bytes in a block. - * @param _assoc The associativity of the cache. - * @param _hit_latency The latency in cycles for a hit. - */ - SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part); - - /** - * Destructor - */ - virtual ~SplitLRU(); - - /** - * Register the statistics for this object - * @param name The name to precede the stat - */ - void regStats(const std::string &name); - - /** - * Return the block size. - * @return the block size. - */ - int getBlockSize() - { - return blkSize; - } - - /** - * Return the subblock size. In the case of LRU it is always the block - * size. - * @return The block size. - */ - int getSubBlockSize() - { - return blkSize; - } - - /** - * Search for the address in the cache. - * @param asid The address space ID. - * @param addr The address to find. - * @return True if the address is in the cache. - */ - bool probe(Addr addr) const; - - /** - * Invalidate the given block. - * @param blk The block to invalidate. - */ - void invalidateBlk(BlkType *blk); - - /** - * Finds the given address in the cache and update replacement data. - * Returns the access latency as a side effect. - * @param addr The address to find. - * @param asid The address space ID. - * @param lat The access latency. - * @return Pointer to the cache block if found. - */ - SplitBlk* findBlock(Addr addr, int &lat); - - /** - * Finds the given address in the cache, do not update replacement data. - * @param addr The address to find. - * @param asid The address space ID. - * @return Pointer to the cache block if found. - */ - SplitBlk* findBlock(Addr addr) const; - - /** - * Find a replacement block for the address provided. - * @param pkt The request to a find a replacement candidate for. - * @param writebacks List for any writebacks to be performed. - * @return The block to place the replacement in. - */ - SplitBlk* findReplacement(Addr addr, PacketList &writebacks); - - /** - * Generate the tag from the given address. - * @param addr The address to get the tag from. - * @return The tag of the address. - */ - Addr extractTag(Addr addr) const - { - return (addr >> tagShift); - } - - /** - * Calculate the set index from the address. - * @param addr The address to get the set from. - * @return The set index of the address. - */ - int extractSet(Addr addr) const - { - return ((addr >> setShift) & setMask); - } - - /** - * Get the block offset from an address. - * @param addr The address to get the offset of. - * @return The block offset. - */ - int extractBlkOffset(Addr addr) const - { - return (addr & blkMask); - } - - /** - * Align an address to the block size. - * @param addr the address to align. - * @return The block address. - */ - Addr blkAlign(Addr addr) const - { - return (addr & ~(Addr)blkMask); - } - - /** - * Regenerate the block address from the tag. - * @param tag The tag of the block. - * @param set The set of the block. - * @return The block address. - */ - Addr regenerateBlkAddr(Addr tag, unsigned set) const - { - return ((tag << tagShift) | ((Addr)set << setShift)); - } - - /** - * Return the hit latency. - * @return the hit latency. - */ - int getHitLatency() const - { - return hitLatency; - } - - /** - * Read the data out of the internal storage of the given cache block. - * @param blk The cache block to read. - * @param data The buffer to read the data into. - * @return The cache block's data. - */ - void readData(SplitBlk *blk, uint8_t *data) - { - std::memcpy(data, blk->data, blk->size); - } - - /** - * Write data into the internal storage of the given cache block. Since in - * LRU does not store data differently this just needs to update the size. - * @param blk The cache block to write. - * @param data The data to write. - * @param size The number of bytes to write. - * @param writebacks A list for any writebacks to be performed. May be - * needed when writing to a compressed block. - */ - void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList & writebacks) - { - assert(size <= blkSize); - blk->size = size; - } - - /** - * Called at end of simulation to complete average block reference stats. - */ - virtual void cleanupRefs(); -}; - -#endif diff --git a/src/mem/config/cache.hh b/src/mem/config/cache.hh index 24da04021..946ed444f 100644 --- a/src/mem/config/cache.hh +++ b/src/mem/config/cache.hh @@ -36,7 +36,4 @@ */ #define USE_CACHE_LRU 1 #define USE_CACHE_FALRU 1 -// #define USE_CACHE_SPLIT 1 -// #define USE_CACHE_SPLIT_LIFO 1 #define USE_CACHE_IIC 1 - diff --git a/src/mem/dram.cc b/src/mem/dram.cc index 75146f9ed..ff01ab1dc 100644 --- a/src/mem/dram.cc +++ b/src/mem/dram.cc @@ -366,7 +366,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) int lat=0, temp=0, current_bank=0; int current_row=0, current_device=0; - int was_miss = 0; // determines if there was an active row miss this access + int was_miss = 0; // determines if there was an active row miss this access //md_addr_t physic_address; /* linear memory address to be accessed */ Addr physic_address; /* linear memory address to be accessed */ @@ -415,7 +415,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) int SD_BEST_T_WRITE_WRITE_OBANK = 0; /* WAW, row miss/hit, another bank */ Tick time_since_last_access = curTick-time_last_access; - Tick time_last_miss = 0; // used for keeping track of times between activations (page misses) + Tick time_last_miss = 0; // used for keeping track of times between activations (page misses) //int was_idle = (curTick > busy_until); bool srow_flag = false; int timing_correction = 0; @@ -433,7 +433,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) // SDRAM does not use the active_row array in closed_page mode // TODO: handle closed page operation - } else { // DRDRAM uses the active_row array + } else { // DRDRAM uses the active_row array for( int i = 0; i < bank_max; i++ ) { if( (active_row[current_bank] != row_max)) all_precharged = 0; } @@ -923,7 +923,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) } // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return lat; } @@ -2034,7 +2034,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return lat; } @@ -2226,7 +2226,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) /*fprintf(stderr,"%10.0f %10.0f %4d %4d \n",(double)busy_until, (double)curTick, overlap, lat);debug*/ // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return lat; } @@ -2468,7 +2468,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return lat; } @@ -2525,7 +2525,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return lat; } @@ -2593,7 +2593,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) } // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return lat; } @@ -2608,7 +2608,7 @@ DRAMMemory::calculateLatency(PacketPtr pkt) assert(chunks >0); // if((_cpu_num < num_cpus) && (_cpu_num >= 0)) // cout <<"cpu id = " << _cpu_num << "current_bank = " << current_bank << endl; - // bank_access_profile[_cpu_num][current_bank]++; + // bank_access_profile[_cpu_num][current_bank]++; return(/* first chunk latency */act_lat + (/* remainder chunk latency */cas_lat * (chunks - 1))); } diff --git a/src/mem/dram.hh b/src/mem/dram.hh index 352ca96ae..1745fa52b 100644 --- a/src/mem/dram.hh +++ b/src/mem/dram.hh @@ -117,27 +117,27 @@ class DRAMMemory : public PhysicalMemory Tick time_last_access; - Stats::Vector<> accesses; - Stats::Vector<> bytesRequested; - Stats::Vector<> bytesSent; - Stats::Vector<> compressedAccesses; - - Stats::Vector<> cycles_nCKE; - Stats::Vector<> cycles_all_precharge_CKE; - Stats::Vector<> cycles_all_precharge_nCKE; - Stats::Vector<> cycles_bank_active_nCKE; - Stats::Vector<> cycles_avg_ACT; - Stats::Vector<> cycles_read_out; - Stats::Vector<> cycles_write_in; - Stats::Vector<> cycles_between_misses; - Stats::Vector<> other_bank_read_access_miss; - Stats::Vector<> other_bank_write_access_miss; - Stats::Scalar<> total_latency; - Stats::Scalar<> total_icache_req; - Stats::Scalar<> total_arb_latency; + Stats::Vector accesses; + Stats::Vector bytesRequested; + Stats::Vector bytesSent; + Stats::Vector compressedAccesses; + + Stats::Vector cycles_nCKE; + Stats::Vector cycles_all_precharge_CKE; + Stats::Vector cycles_all_precharge_nCKE; + Stats::Vector cycles_bank_active_nCKE; + Stats::Vector cycles_avg_ACT; + Stats::Vector cycles_read_out; + Stats::Vector cycles_write_in; + Stats::Vector cycles_between_misses; + Stats::Vector other_bank_read_access_miss; + Stats::Vector other_bank_write_access_miss; + Stats::Scalar total_latency; + Stats::Scalar total_icache_req; + Stats::Scalar total_arb_latency; Stats::Formula avg_latency; Stats::Formula avg_arb_latency; - Stats::Vector2d<> bank_access_profile; + Stats::Vector2d bank_access_profile; protected: diff --git a/src/mem/mem_object.cc b/src/mem/mem_object.cc index ce2a1107e..20a1b4cd8 100644 --- a/src/mem/mem_object.cc +++ b/src/mem/mem_object.cc @@ -35,14 +35,6 @@ MemObject::MemObject(const Params *params) { } -MemObjectParams * -MemObject::makeParams(const std::string &name) -{ - MemObjectParams *params = new MemObjectParams; - params->name = name; - return params; -} - void MemObject::deletePortRefs(Port *p) { diff --git a/src/mem/mem_object.hh b/src/mem/mem_object.hh index 33b56dfd4..b8bf4b939 100644 --- a/src/mem/mem_object.hh +++ b/src/mem/mem_object.hh @@ -48,17 +48,10 @@ class MemObject : public SimObject { public: typedef MemObjectParams Params; - MemObject(const Params *params); - - const Params * - params() const - { - return dynamic_cast<const Params *>(_params); - } + const Params *params() const + { return dynamic_cast<const Params *>(_params); } - protected: - // static: support for old-style constructors (call manually) - static Params *makeParams(const std::string &name); + MemObject(const Params *params); public: /** Additional function to return the Port of a memory object. */ diff --git a/src/mem/config/prefetch.hh b/src/mem/mport.cc index d24db79da..72bcfc7fd 100644 --- a/src/mem/config/prefetch.hh +++ b/src/mem/mport.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2008 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,17 +25,36 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ron Dreslinski + * Authors: Gabe Black */ -/** - * @file - * Central location to configure which prefetch types we want to build - * into the simulator. In the future, this should probably be - * autogenerated by some sort of configuration script. - */ -#define USE_TAGGED 1 //Be sure not to turn this off, it is also used for no - //prefetching case unless you always want to use a - //different prefetcher -//#define USE_STRIDED 1 -//#define USE_GHB 1 +#include "mem/mport.hh" + +Tick +MessagePort::recvAtomic(PacketPtr pkt) +{ + if (pkt->cmd == MemCmd::MessageReq) { + // We received a message. + return recvMessage(pkt); + } else if (pkt->cmd == MemCmd::MessageResp) { + return recvResponse(pkt); + } else if (pkt->wasNacked()) { + return recvNack(pkt); + } else if (pkt->isError()) { + panic("Packet is error.\n"); + } else { + panic("Unexpected memory command %s.\n", pkt->cmd.toString()); + } +} + +void +MessagePort::sendMessageTiming(PacketPtr pkt, Tick latency) +{ + schedSendTiming(pkt, curTick + latency); +} + +Tick +MessagePort::sendMessageAtomic(PacketPtr pkt) +{ + return sendAtomic(pkt); +} diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/mport.hh index d2efe08df..5975f89f0 100644 --- a/src/mem/cache/tags/split_blk.hh +++ b/src/mem/mport.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2008 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,44 +25,56 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Gabe Black */ -/** - * @file - * Declaration of partitioned tag store cache block class. - */ - -#ifndef __SPLIT_BLK_HH__ -#define __SPLIT_BLK_HH__ +#ifndef __MEM_MPORT_HH__ +#define __MEM_MPORT_HH__ -#include "mem/cache/blk.hh" // base class +#include "mem/tport.hh" -/** - * Split cache block. +/* + * This file defines a port class which is used for sending and receiving + * messages. These messages are atomic units which don't interact and + * should be smaller than a cache block. This class is based on + * the underpinnings of SimpleTimingPort, but it tweaks some of the external + * functions. */ -class SplitBlk : public CacheBlk { + +class MessagePort : public SimpleTimingPort +{ public: - /** Has this block been touched? Used to aid calculation of warmup time. */ - bool isTouched; - /** Has this block been used after being brought in? (for LIFO partition) */ - bool isUsed; - /** is this blk a NIC block? (i.e. requested by the NIC) */ - bool isNIC; - /** timestamp of the arrival of this block into the cache */ - Tick ts; - /** the previous block in the LIFO partition (brought in before than me) */ - SplitBlk *prev; - /** the next block in the LIFO partition (brought in later than me) */ - SplitBlk *next; - /** which partition this block is in */ - int part; + MessagePort(std::string pname, MemObject *_owner = NULL) : + SimpleTimingPort(pname, _owner) + {} - SplitBlk() - : isTouched(false), isUsed(false), isNIC(false), ts(0), prev(NULL), next(NULL), - part(0) + virtual ~MessagePort() {} + + void + recvFunctional(PacketPtr pkt) + { + recvAtomic(pkt); + } + + Tick recvAtomic(PacketPtr pkt); + + virtual Tick recvMessage(PacketPtr pkt) = 0; + + // Accept and ignore responses. + virtual Tick recvResponse(PacketPtr pkt) + { + return 0; + } + + // Since by default we're assuming everything we send is accepted, panic. + virtual Tick recvNack(PacketPtr pkt) + { + panic("Unhandled message nack.\n"); + } + + void sendMessageTiming(PacketPtr pkt, Tick latency); + Tick sendMessageAtomic(PacketPtr pkt); }; #endif - diff --git a/src/mem/packet.cc b/src/mem/packet.cc index f3bd06f36..17e58859a 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -42,6 +42,8 @@ #include "base/trace.hh" #include "mem/packet.hh" +using namespace std; + // The one downside to bitsets is that static initializers can get ugly. #define SET1(a1) (1 << (a1)) #define SET2(a1, a2) (SET1(a1) | SET1(a2)) @@ -101,12 +103,10 @@ MemCmd::commandInfo[] = /* ReadExResp */ { SET4(IsRead, NeedsExclusive, IsResponse, HasData), InvalidCmd, "ReadExResp" }, - /* LoadLockedReq */ + /* LoadLockedReq: note that we use plain ReadResp as response, so that + * we can also use ReadRespWithInvalidate when needed */ { SET4(IsRead, IsLocked, IsRequest, NeedsResponse), - LoadLockedResp, "LoadLockedReq" }, - /* LoadLockedResp */ - { SET4(IsRead, IsLocked, IsResponse, HasData), - InvalidCmd, "LoadLockedResp" }, + ReadResp, "LoadLockedReq" }, /* StoreCondReq */ { SET6(IsWrite, NeedsExclusive, IsLocked, IsRequest, NeedsResponse, HasData), @@ -120,6 +120,11 @@ MemCmd::commandInfo[] = /* SwapResp -- for Swap ldstub type operations */ { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData), InvalidCmd, "SwapResp" }, + /* IntReq -- for interrupts */ + { SET4(IsWrite, IsRequest, NeedsResponse, HasData), + MessageReq, "MessageReq" }, + /* IntResp -- for interrupts */ + { SET2(IsWrite, IsResponse), MessageResp, "MessageResp" }, /* NetworkNackError -- nacked at network layer (not by protocol) */ { SET2(IsResponse, IsError), InvalidCmd, "NetworkNackError" }, /* InvalidDestError -- packet dest field invalid */ @@ -130,35 +135,6 @@ MemCmd::commandInfo[] = { SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" } }; - -/** delete the data pointed to in the data pointer. Ok to call to matter how - * data was allocted. */ -void -Packet::deleteData() -{ - assert(staticData || dynamicData); - if (staticData) - return; - - if (arrayData) - delete [] data; - else - delete data; -} - -/** If there isn't data in the packet, allocate some. */ -void -Packet::allocate() -{ - if (data) - return; - assert(!staticData); - dynamicData = true; - arrayData = true; - data = new uint8_t[getSize()]; -} - - bool Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data) { @@ -190,7 +166,7 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data) if (isRead()) { if (func_start >= val_start && func_end <= val_end) { allocate(); - std::memcpy(getPtr<uint8_t>(), data + offset, getSize()); + memcpy(getPtr<uint8_t>(), data + offset, getSize()); makeResponse(); return true; } else { @@ -205,11 +181,12 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data) } } else if (isWrite()) { if (offset >= 0) { - std::memcpy(data + offset, getPtr<uint8_t>(), - (std::min(func_end, val_end) - func_start) + 1); - } else { // val_start > func_start - std::memcpy(data, getPtr<uint8_t>() - offset, - (std::min(func_end, val_end) - val_start) + 1); + memcpy(data + offset, getPtr<uint8_t>(), + (min(func_end, val_end) - func_start) + 1); + } else { + // val_start > func_start + memcpy(data, getPtr<uint8_t>() - offset, + (min(func_end, val_end) - val_start) + 1); } } else { panic("Don't know how to handle command %s\n", cmdString()); @@ -219,23 +196,19 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data) return false; } - void -Packet::print(std::ostream &o, const int verbosity, - const std::string &prefix) const +Packet::print(ostream &o, const int verbosity, const string &prefix) const { ccprintf(o, "%s[%x:%x] %s\n", prefix, getAddr(), getAddr() + getSize() - 1, cmdString()); } - -Packet::PrintReqState::PrintReqState(std::ostream &_os, int _verbosity) - : curPrefixPtr(new std::string("")), os(_os), verbosity(_verbosity) +Packet::PrintReqState::PrintReqState(ostream &_os, int _verbosity) + : curPrefixPtr(new string("")), os(_os), verbosity(_verbosity) { labelStack.push_back(LabelStackEntry("", curPrefixPtr)); } - Packet::PrintReqState::~PrintReqState() { labelStack.pop_back(); @@ -243,21 +216,17 @@ Packet::PrintReqState::~PrintReqState() delete curPrefixPtr; } - Packet::PrintReqState:: -LabelStackEntry::LabelStackEntry(const std::string &_label, - std::string *_prefix) +LabelStackEntry::LabelStackEntry(const string &_label, string *_prefix) : label(_label), prefix(_prefix), labelPrinted(false) { } - void -Packet::PrintReqState::pushLabel(const std::string &lbl, - const std::string &prefix) +Packet::PrintReqState::pushLabel(const string &lbl, const string &prefix) { labelStack.push_back(LabelStackEntry(lbl, curPrefixPtr)); - curPrefixPtr = new std::string(*curPrefixPtr); + curPrefixPtr = new string(*curPrefixPtr); *curPrefixPtr += prefix; } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 05442b369..41f599fa0 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -42,8 +42,10 @@ #include <list> #include <bitset> +#include "base/cast.hh" #include "base/compiler.hh" #include "base/fast_alloc.hh" +#include "base/flags.hh" #include "base/misc.hh" #include "base/printable.hh" #include "mem/request.hh" @@ -58,9 +60,12 @@ typedef std::list<PacketPtr> PacketList; class MemCmd { - public: + friend class Packet; - /** List of all commands associated with a packet. */ + public: + /** + * List of all commands associated with a packet. + */ enum Command { InvalidCmd, @@ -81,11 +86,12 @@ class MemCmd ReadExReq, ReadExResp, LoadLockedReq, - LoadLockedResp, StoreCondReq, StoreCondResp, SwapReq, SwapResp, + MessageReq, + MessageResp, // Error responses // @TODO these should be classified as responses rather than // requests; coding them as requests initially for backwards @@ -99,7 +105,9 @@ class MemCmd }; private: - /** List of command attributes. */ + /** + * List of command attributes. + */ enum Attribute { IsRead, //!< Data flows from responder to requester @@ -119,26 +127,31 @@ class MemCmd NUM_COMMAND_ATTRIBUTES }; - /** Structure that defines attributes and other data associated - * with a Command. */ - struct CommandInfo { - /** Set of attribute flags. */ + /** + * Structure that defines attributes and other data associated + * with a Command. + */ + struct CommandInfo + { + /// Set of attribute flags. const std::bitset<NUM_COMMAND_ATTRIBUTES> attributes; - /** Corresponding response for requests; InvalidCmd if no - * response is applicable. */ + /// Corresponding response for requests; InvalidCmd if no + /// response is applicable. const Command response; - /** String representation (for printing) */ + /// String representation (for printing) const std::string str; }; - /** Array to map Command enum to associated info. */ + /// Array to map Command enum to associated info. static const CommandInfo commandInfo[]; private: Command cmd; - bool testCmdAttrib(MemCmd::Attribute attrib) const { + bool + testCmdAttrib(MemCmd::Attribute attrib) const + { return commandInfo[cmd].attributes[attrib] != 0; } @@ -157,33 +170,22 @@ class MemCmd bool isError() const { return testCmdAttrib(IsError); } bool isPrint() const { return testCmdAttrib(IsPrint); } - const Command responseCommand() const { + const Command + responseCommand() const + { return commandInfo[cmd].response; } - /** Return the string to a cmd given by idx. */ - const std::string &toString() const { - return commandInfo[cmd].str; - } - + /// Return the string to a cmd given by idx. + const std::string &toString() const { return commandInfo[cmd].str; } int toInt() const { return (int)cmd; } - MemCmd(Command _cmd) - : cmd(_cmd) - { } - - MemCmd(int _cmd) - : cmd((Command)_cmd) - { } - - MemCmd() - : cmd(InvalidCmd) - { } - - bool operator==(MemCmd c2) { return (cmd == c2.cmd); } - bool operator!=(MemCmd c2) { return (cmd != c2.cmd); } + MemCmd(Command _cmd) : cmd(_cmd) { } + MemCmd(int _cmd) : cmd((Command)_cmd) { } + MemCmd() : cmd(InvalidCmd) { } - friend class Packet; + bool operator==(MemCmd c2) const { return (cmd == c2.cmd); } + bool operator!=(MemCmd c2) const { return (cmd != c2.cmd); } }; /** @@ -196,107 +198,118 @@ class MemCmd class Packet : public FastAlloc, public Printable { public: + typedef uint32_t FlagsType; + typedef ::Flags<FlagsType> Flags; + typedef short NodeID; + + private: + static const FlagsType PUBLIC_FLAGS = 0x00000000; + static const FlagsType PRIVATE_FLAGS = 0x00007F0F; + static const FlagsType COPY_FLAGS = 0x0000000F; + static const FlagsType SHARED = 0x00000001; + // Special control flags + /// Special timing-mode atomic snoop for multi-level coherence. + static const FlagsType EXPRESS_SNOOP = 0x00000002; + /// Does supplier have exclusive copy? + /// Useful for multi-level coherence. + static const FlagsType SUPPLY_EXCLUSIVE = 0x00000004; + // Snoop response flags + static const FlagsType MEM_INHIBIT = 0x00000008; + /// Are the 'addr' and 'size' fields valid? + static const FlagsType VALID_ADDR = 0x00000100; + static const FlagsType VALID_SIZE = 0x00000200; + /// Is the 'src' field valid? + static const FlagsType VALID_SRC = 0x00000400; + static const FlagsType VALID_DST = 0x00000800; + /// Is the data pointer set to a value that shouldn't be freed + /// when the packet is destroyed? + static const FlagsType STATIC_DATA = 0x00001000; + /// The data pointer points to a value that should be freed when + /// the packet is destroyed. + static const FlagsType DYNAMIC_DATA = 0x00002000; + /// the data pointer points to an array (thus delete []) needs to + /// be called on it rather than simply delete. + static const FlagsType ARRAY_DATA = 0x00004000; + + Flags flags; + + public: typedef MemCmd::Command Command; - /** The command field of the packet. */ + /// The command field of the packet. MemCmd cmd; - /** A pointer to the original request. */ + /// A pointer to the original request. RequestPtr req; private: - /** A pointer to the data being transfered. It can be differnt - * sizes at each level of the heirarchy so it belongs in the - * packet, not request. This may or may not be populated when a - * responder recieves the packet. If not populated it memory - * should be allocated. + /** + * A pointer to the data being transfered. It can be differnt + * sizes at each level of the heirarchy so it belongs in the + * packet, not request. This may or may not be populated when a + * responder recieves the packet. If not populated it memory should + * be allocated. */ PacketDataPtr data; - /** Is the data pointer set to a value that shouldn't be freed - * when the packet is destroyed? */ - bool staticData; - /** The data pointer points to a value that should be freed when - * the packet is destroyed. */ - bool dynamicData; - /** the data pointer points to an array (thus delete [] ) needs to - * be called on it rather than simply delete.*/ - bool arrayData; - - /** The address of the request. This address could be virtual or - * physical, depending on the system configuration. */ + /// The address of the request. This address could be virtual or + /// physical, depending on the system configuration. Addr addr; - /** The size of the request or transfer. */ + /// The size of the request or transfer. int size; - /** Device address (e.g., bus ID) of the source of the - * transaction. The source is not responsible for setting this - * field; it is set implicitly by the interconnect when the - * packet is first sent. */ - short src; + /** + * Device address (e.g., bus ID) of the source of the + * transaction. The source is not responsible for setting this + * field; it is set implicitly by the interconnect when the packet + * is first sent. + */ + NodeID src; - /** Device address (e.g., bus ID) of the destination of the - * transaction. The special value Broadcast indicates that the - * packet should be routed based on its address. This field is - * initialized in the constructor and is thus always valid - * (unlike * addr, size, and src). */ - short dest; + /** + * Device address (e.g., bus ID) of the destination of the + * transaction. The special value Broadcast indicates that the + * packet should be routed based on its address. This field is + * initialized in the constructor and is thus always valid (unlike + * addr, size, and src). + */ + NodeID dest; - /** The original value of the command field. Only valid when the + /** + * The original value of the command field. Only valid when the * current command field is an error condition; in that case, the * previous contents of the command field are copied here. This * field is *not* set on non-error responses. */ MemCmd origCmd; - /** Are the 'addr' and 'size' fields valid? */ - bool addrSizeValid; - /** Is the 'src' field valid? */ - bool srcValid; - bool destValid; - - enum Flag { - // Snoop response flags - MemInhibit, - Shared, - // Special control flags - /// Special timing-mode atomic snoop for multi-level coherence. - ExpressSnoop, - /// Does supplier have exclusive copy? - /// Useful for multi-level coherence. - SupplyExclusive, - NUM_PACKET_FLAGS - }; - - /** Status flags */ - std::bitset<NUM_PACKET_FLAGS> flags; - public: - - /** Used to calculate latencies for each packet.*/ + /// Used to calculate latencies for each packet. Tick time; - /** The time at which the packet will be fully transmitted */ + /// The time at which the packet will be fully transmitted Tick finishTime; - /** The time at which the first chunk of the packet will be transmitted */ + /// The time at which the first chunk of the packet will be transmitted Tick firstWordTime; - /** The special destination address indicating that the packet - * should be routed based on its address. */ - static const short Broadcast = -1; - - /** A virtual base opaque structure used to hold state associated - * with the packet but specific to the sending device (e.g., an - * MSHR). A pointer to this state is returned in the packet's - * response so that the sender can quickly look up the state - * needed to process it. A specific subclass would be derived - * from this to carry state specific to a particular sending - * device. */ - class SenderState : public FastAlloc { - public: + /// The special destination address indicating that the packet + /// should be routed based on its address. + static const NodeID Broadcast = -1; + + /** + * A virtual base opaque structure used to hold state associated + * with the packet but specific to the sending device (e.g., an + * MSHR). A pointer to this state is returned in the packet's + * response so that the sender can quickly look up the state + * needed to process it. A specific subclass would be derived + * from this to carry state specific to a particular sending + * device. + */ + struct SenderState + { virtual ~SenderState() {} }; @@ -304,15 +317,18 @@ class Packet : public FastAlloc, public Printable * Object used to maintain state of a PrintReq. The senderState * field of a PrintReq should always be of this type. */ - class PrintReqState : public SenderState { - /** An entry in the label stack. */ - class LabelStackEntry { - public: + class PrintReqState : public SenderState, public FastAlloc + { + private: + /** + * An entry in the label stack. + */ + struct LabelStackEntry + { const std::string label; std::string *prefix; bool labelPrinted; - LabelStackEntry(const std::string &_label, - std::string *_prefix); + LabelStackEntry(const std::string &_label, std::string *_prefix); }; typedef std::list<LabelStackEntry> LabelStack; @@ -327,35 +343,53 @@ class Packet : public FastAlloc, public Printable PrintReqState(std::ostream &os, int verbosity = 0); ~PrintReqState(); - /** Returns the current line prefix. */ + /** + * Returns the current line prefix. + */ const std::string &curPrefix() { return *curPrefixPtr; } - /** Push a label onto the label stack, and prepend the given + /** + * Push a label onto the label stack, and prepend the given * prefix string onto the current prefix. Labels will only be - * printed if an object within the label's scope is - * printed. */ + * printed if an object within the label's scope is printed. + */ void pushLabel(const std::string &lbl, const std::string &prefix = " "); - /** Pop a label off the label stack. */ + + /** + * Pop a label off the label stack. + */ void popLabel(); - /** Print all of the pending unprinted labels on the + + /** + * Print all of the pending unprinted labels on the * stack. Called by printObj(), so normally not called by - * users unless bypassing printObj(). */ + * users unless bypassing printObj(). + */ void printLabels(); - /** Print a Printable object to os, because it matched the - * address on a PrintReq. */ + + /** + * Print a Printable object to os, because it matched the + * address on a PrintReq. + */ void printObj(Printable *obj); }; - /** This packet's sender state. Devices should use dynamic_cast<> - * to cast to the state appropriate to the sender. */ + /** + * This packet's sender state. Devices should use dynamic_cast<> + * to cast to the state appropriate to the sender. The intent of + * this variable is to allow a device to attach extra information + * to a request. A response packet must return the sender state + * that was attached to the original request (even if a new packet + * is created). + */ SenderState *senderState; - /** Return the string name of the cmd field (for debugging and - * tracing). */ + /// Return the string name of the cmd field (for debugging and + /// tracing). const std::string &cmdString() const { return cmd.toString(); } - /** Return the index of this command. */ + /// Return the index of this command. inline int cmdToIndex() const { return cmd.toInt(); } bool isRead() const { return cmd.isRead(); } @@ -372,108 +406,134 @@ class Packet : public FastAlloc, public Printable bool isPrint() const { return cmd.isPrint(); } // Snoop flags - void assertMemInhibit() { flags[MemInhibit] = true; } - bool memInhibitAsserted() { return flags[MemInhibit]; } - void assertShared() { flags[Shared] = true; } - bool sharedAsserted() { return flags[Shared]; } + void assertMemInhibit() { flags.set(MEM_INHIBIT); } + bool memInhibitAsserted() { return flags.isSet(MEM_INHIBIT); } + void assertShared() { flags.set(SHARED); } + bool sharedAsserted() { return flags.isSet(SHARED); } // Special control flags - void setExpressSnoop() { flags[ExpressSnoop] = true; } - bool isExpressSnoop() { return flags[ExpressSnoop]; } - void setSupplyExclusive() { flags[SupplyExclusive] = true; } - bool isSupplyExclusive() { return flags[SupplyExclusive]; } + void setExpressSnoop() { flags.set(EXPRESS_SNOOP); } + bool isExpressSnoop() { return flags.isSet(EXPRESS_SNOOP); } + void setSupplyExclusive() { flags.set(SUPPLY_EXCLUSIVE); } + bool isSupplyExclusive() { return flags.isSet(SUPPLY_EXCLUSIVE); } // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command // field, etc.) - void setNacked() { assert(isResponse()); cmd = MemCmd::NetworkNackError; } - void setBadAddress() { assert(isResponse()); cmd = MemCmd::BadAddressError; } - bool wasNacked() { return cmd == MemCmd::NetworkNackError; } - bool hadBadAddress() { return cmd == MemCmd::BadAddressError; } + void + setNacked() + { + assert(isResponse()); + cmd = MemCmd::NetworkNackError; + } + + void + setBadAddress() + { + assert(isResponse()); + cmd = MemCmd::BadAddressError; + } + + bool wasNacked() const { return cmd == MemCmd::NetworkNackError; } + bool hadBadAddress() const { return cmd == MemCmd::BadAddressError; } void copyError(Packet *pkt) { assert(pkt->isError()); cmd = pkt->cmd; } - bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN } - - /** Accessor function that returns the source index of the packet. */ - short getSrc() const { assert(srcValid); return src; } - void setSrc(short _src) { src = _src; srcValid = true; } - /** Reset source field, e.g. to retransmit packet on different bus. */ - void clearSrc() { srcValid = false; } - - /** Accessor function that returns the destination index of - the packet. */ - short getDest() const { assert(destValid); return dest; } - void setDest(short _dest) { dest = _dest; destValid = true; } - - Addr getAddr() const { assert(addrSizeValid); return addr; } - int getSize() const { assert(addrSizeValid); return size; } - Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); } - - /** Constructor. Note that a Request object must be constructed - * first, but the Requests's physical address and size fields - * need not be valid. The command and destination addresses - * must be supplied. */ - Packet(Request *_req, MemCmd _cmd, short _dest) - : cmd(_cmd), req(_req), - data(NULL), staticData(false), dynamicData(false), arrayData(false), - addr(_req->paddr), size(_req->size), dest(_dest), - addrSizeValid(_req->validPaddr), srcValid(false), destValid(true), - flags(0), time(curTick), senderState(NULL) + /// Accessor function to get the source index of the packet. + NodeID getSrc() const { assert(flags.isSet(VALID_SRC)); return src; } + /// Accessor function to set the source index of the packet. + void setSrc(NodeID _src) { src = _src; flags.set(VALID_SRC); } + /// Reset source field, e.g. to retransmit packet on different bus. + void clearSrc() { flags.clear(VALID_SRC); } + + /// Accessor function for the destination index of the packet. + NodeID getDest() const { assert(flags.isSet(VALID_DST)); return dest; } + /// Accessor function to set the destination index of the packet. + void setDest(NodeID _dest) { dest = _dest; flags.set(VALID_DST); } + + Addr getAddr() const { assert(flags.isSet(VALID_ADDR)); return addr; } + int getSize() const { assert(flags.isSet(VALID_SIZE)); return size; } + Addr getOffset(int blkSize) const { return getAddr() & (Addr)(blkSize - 1); } + + /** + * Constructor. Note that a Request object must be constructed + * first, but the Requests's physical address and size fields need + * not be valid. The command and destination addresses must be + * supplied. + */ + Packet(Request *_req, MemCmd _cmd, NodeID _dest) + : flags(VALID_DST), cmd(_cmd), req(_req), data(NULL), + addr(_req->paddr), size(_req->size), dest(_dest), time(curTick), + senderState(NULL) { + if (req->flags.isSet(Request::VALID_PADDR)) + flags.set(VALID_ADDR|VALID_SIZE); } - /** Alternate constructor if you are trying to create a packet with - * a request that is for a whole block, not the address from the req. - * this allows for overriding the size/addr of the req.*/ - Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize) - : cmd(_cmd), req(_req), - data(NULL), staticData(false), dynamicData(false), arrayData(false), + /** + * Alternate constructor if you are trying to create a packet with + * a request that is for a whole block, not the address from the + * req. this allows for overriding the size/addr of the req. + */ + Packet(Request *_req, MemCmd _cmd, NodeID _dest, int _blkSize) + : flags(VALID_DST), cmd(_cmd), req(_req), data(NULL), addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest), - addrSizeValid(_req->validPaddr), srcValid(false), destValid(true), - flags(0), time(curTick), senderState(NULL) + time(curTick), senderState(NULL) { + if (req->flags.isSet(Request::VALID_PADDR)) + flags.set(VALID_ADDR|VALID_SIZE); } - /** Alternate constructor for copying a packet. Copy all fields + /** + * Alternate constructor for copying a packet. Copy all fields * *except* if the original packet's data was dynamic, don't copy * that, as we can't guarantee that the new packet's lifetime is * less than that of the original packet. In this case the new - * packet should allocate its own data. */ - Packet(Packet *origPkt, bool clearFlags = false) - : cmd(origPkt->cmd), req(origPkt->req), - data(origPkt->staticData ? origPkt->data : NULL), - staticData(origPkt->staticData), - dynamicData(false), arrayData(false), - addr(origPkt->addr), size(origPkt->size), - src(origPkt->src), dest(origPkt->dest), - addrSizeValid(origPkt->addrSizeValid), - srcValid(origPkt->srcValid), destValid(origPkt->destValid), - flags(clearFlags ? 0 : origPkt->flags), - time(curTick), senderState(origPkt->senderState) + * packet should allocate its own data. + */ + Packet(Packet *pkt, bool clearFlags = false) + : cmd(pkt->cmd), req(pkt->req), + data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL), + addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest), + time(curTick), senderState(pkt->senderState) { + if (!clearFlags) + flags.set(pkt->flags & COPY_FLAGS); + + flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE|VALID_SRC|VALID_DST)); + flags.set(pkt->flags & STATIC_DATA); } - /** Destructor. */ + /** + * clean up packet variables + */ ~Packet() - { if (staticData || dynamicData) deleteData(); } - - /** Reinitialize packet address and size from the associated - * Request object, and reset other fields that may have been - * modified by a previous transaction. Typically called when a - * statically allocated Request/Packet pair is reused for - * multiple transactions. */ - void reinitFromRequest() { - assert(req->validPaddr); + { + // If this is a request packet for which there's no response, + // delete the request object here, since the requester will + // never get the chance. + if (req && isRequest() && !needsResponse()) + delete req; + deleteData(); + } + + /** + * Reinitialize packet address and size from the associated + * Request object, and reset other fields that may have been + * modified by a previous transaction. Typically called when a + * statically allocated Request/Packet pair is reused for multiple + * transactions. + */ + void + reinitFromRequest() + { + assert(req->flags.isSet(Request::VALID_PADDR)); flags = 0; addr = req->paddr; size = req->size; time = req->time; - addrSizeValid = true; - if (dynamicData) { - deleteData(); - dynamicData = false; - arrayData = false; - } + + flags.set(VALID_ADDR|VALID_SIZE); + deleteData(); } /** @@ -482,23 +542,27 @@ class Packet : public FastAlloc, public Printable * destination fields are *not* modified, as is appropriate for * atomic accesses. */ - void makeResponse() + void + makeResponse() { assert(needsResponse()); assert(isRequest()); origCmd = cmd; cmd = cmd.responseCommand(); + dest = src; - destValid = srcValid; - srcValid = false; + flags.set(VALID_DST, flags.isSet(VALID_SRC)); + flags.clear(VALID_SRC); } - void makeAtomicResponse() + void + makeAtomicResponse() { makeResponse(); } - void makeTimingResponse() + void + makeTimingResponse() { makeResponse(); } @@ -526,10 +590,9 @@ class Packet : public FastAlloc, public Printable void dataStatic(T *p) { - if(dynamicData) - dynamicData = false; + assert(flags.noneSet(STATIC_DATA|DYNAMIC_DATA|ARRAY_DATA)); data = (PacketDataPtr)p; - staticData = true; + flags.set(STATIC_DATA); } /** @@ -540,10 +603,9 @@ class Packet : public FastAlloc, public Printable void dataDynamicArray(T *p) { - assert(!staticData && !dynamicData); + assert(flags.noneSet(STATIC_DATA|DYNAMIC_DATA|ARRAY_DATA)); data = (PacketDataPtr)p; - dynamicData = true; - arrayData = true; + flags.set(DYNAMIC_DATA|ARRAY_DATA); } /** @@ -554,33 +616,39 @@ class Packet : public FastAlloc, public Printable void dataDynamic(T *p) { - assert(!staticData && !dynamicData); + assert(flags.noneSet(STATIC_DATA|DYNAMIC_DATA|ARRAY_DATA)); data = (PacketDataPtr)p; - dynamicData = true; - arrayData = false; + flags.set(DYNAMIC_DATA); } - /** get a pointer to the data ptr. */ + /** + * get a pointer to the data ptr. + */ template <typename T> T* getPtr() { - assert(staticData || dynamicData); + assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA)); return (T*)data; } - /** return the value of what is pointed to in the packet. */ + /** + * return the value of what is pointed to in the packet. + */ template <typename T> T get(); - /** set the value in the data pointer to v. */ + /** + * set the value in the data pointer to v. + */ template <typename T> void set(T v); /** * Copy data into the packet from the provided pointer. */ - void setData(uint8_t *p) + void + setData(uint8_t *p) { std::memcpy(getPtr<uint8_t>(), p, getSize()); } @@ -589,7 +657,8 @@ class Packet : public FastAlloc, public Printable * Copy data into the packet from the provided block pointer, * which is aligned to the given block size. */ - void setDataFromBlock(uint8_t *blk_data, int blkSize) + void + setDataFromBlock(uint8_t *blk_data, int blkSize) { setData(blk_data + getOffset(blkSize)); } @@ -598,7 +667,8 @@ class Packet : public FastAlloc, public Printable * Copy data from the packet to the provided block pointer, which * is aligned to the given block size. */ - void writeData(uint8_t *p) + void + writeData(uint8_t *p) { std::memcpy(p, getPtr<uint8_t>(), getSize()); } @@ -606,7 +676,8 @@ class Packet : public FastAlloc, public Printable /** * Copy data from the packet to the memory at the provided pointer. */ - void writeDataToBlock(uint8_t *blk_data, int blkSize) + void + writeDataToBlock(uint8_t *blk_data, int blkSize) { writeData(blk_data + getOffset(blkSize)); } @@ -615,10 +686,32 @@ class Packet : public FastAlloc, public Printable * delete the data pointed to in the data pointer. Ok to call to * matter how data was allocted. */ - void deleteData(); + void + deleteData() + { + if (flags.isSet(ARRAY_DATA)) + delete [] data; + else if (flags.isSet(DYNAMIC_DATA)) + delete data; + + flags.clear(STATIC_DATA|DYNAMIC_DATA|ARRAY_DATA); + data = NULL; + } /** If there isn't data in the packet, allocate some. */ - void allocate(); + void + allocate() + { + if (data) { + assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA)); + return; + } + + assert(flags.noneSet(STATIC_DATA|DYNAMIC_DATA)); + flags.set(DYNAMIC_DATA|ARRAY_DATA); + data = new uint8_t[getSize()]; + } + /** * Check a functional request against a memory value represented @@ -633,29 +726,32 @@ class Packet : public FastAlloc, public Printable * Check a functional request against a memory value stored in * another packet (i.e. an in-transit request or response). */ - bool checkFunctional(PacketPtr otherPkt) { - return checkFunctional(otherPkt, - otherPkt->getAddr(), otherPkt->getSize(), - otherPkt->hasData() ? - otherPkt->getPtr<uint8_t>() : NULL); + bool + checkFunctional(PacketPtr other) + { + uint8_t *data = other->hasData() ? other->getPtr<uint8_t>() : NULL; + return checkFunctional(other, other->getAddr(), other->getSize(), + data); } /** * Push label for PrintReq (safe to call unconditionally). */ - void pushLabel(const std::string &lbl) { - if (isPrint()) { - dynamic_cast<PrintReqState*>(senderState)->pushLabel(lbl); - } + void + pushLabel(const std::string &lbl) + { + if (isPrint()) + safe_cast<PrintReqState*>(senderState)->pushLabel(lbl); } /** * Pop label for PrintReq (safe to call unconditionally). */ - void popLabel() { - if (isPrint()) { - dynamic_cast<PrintReqState*>(senderState)->popLabel(); - } + void + popLabel() + { + if (isPrint()) + safe_cast<PrintReqState*>(senderState)->popLabel(); } void print(std::ostream &o, int verbosity = 0, diff --git a/src/mem/packet_access.hh b/src/mem/packet_access.hh index d1edd00aa..f70d508b2 100644 --- a/src/mem/packet_access.hh +++ b/src/mem/packet_access.hh @@ -46,7 +46,7 @@ template <typename T> inline T Packet::get() { - assert(staticData || dynamicData); + assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA)); assert(sizeof(T) <= size); return TheISA::gtoh(*(T*)data); } @@ -56,6 +56,7 @@ template <typename T> inline void Packet::set(T v) { + assert(flags.isSet(STATIC_DATA|DYNAMIC_DATA)); assert(sizeof(T) <= size); *(T*)data = TheISA::htog(v); } diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index 54165f293..bdcbbfec3 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -87,6 +87,44 @@ PageTable::allocate(Addr vaddr, int64_t size) } } +void +PageTable::remap(Addr vaddr, int64_t size, Addr new_vaddr) +{ + assert(pageOffset(vaddr) == 0); + assert(pageOffset(new_vaddr) == 0); + + DPRINTF(MMU, "moving pages from vaddr %08p to %08p, size = %d\n", vaddr, + new_vaddr, size); + + for (; size > 0; size -= pageSize, vaddr += pageSize, new_vaddr += pageSize) { + PTableItr iter = pTable.find(vaddr); + + assert(iter != pTable.end()); + + pTable[new_vaddr] = pTable[vaddr]; + pTable.erase(vaddr); + pTable[new_vaddr].updateVaddr(new_vaddr); + updateCache(new_vaddr, pTable[new_vaddr]); + } +} + +void +PageTable::deallocate(Addr vaddr, int64_t size) +{ + assert(pageOffset(vaddr) == 0); + + DPRINTF(MMU, "Deallocating page: %#x-%#x\n", vaddr, vaddr+ size); + + for (; size > 0; size -= pageSize, vaddr += pageSize) { + PTableItr iter = pTable.find(vaddr); + + assert(iter != pTable.end()); + + pTable.erase(vaddr); + } + +} + bool PageTable::lookup(Addr vaddr, TheISA::TlbEntry &entry) { diff --git a/src/mem/page_table.hh b/src/mem/page_table.hh index b8b52174c..d4101c6bf 100644 --- a/src/mem/page_table.hh +++ b/src/mem/page_table.hh @@ -80,6 +80,8 @@ class PageTable Addr pageOffset(Addr a) { return (a & offsetMask); } void allocate(Addr vaddr, int64_t size); + void remap(Addr vaddr, int64_t size, Addr new_vaddr); + void deallocate(Addr vaddr, int64_t size); /** * Lookup function @@ -91,11 +93,19 @@ class PageTable /** * Translate function * @param vaddr The virtual address. - * @return Physical address from translation. + * @param paddr Physical address from translation. + * @return True if translation exists */ bool translate(Addr vaddr, Addr &paddr); /** + * Simplified translate function (just check for translation) + * @param vaddr The virtual address. + * @return True if translation exists + */ + bool translate(Addr vaddr) { Addr dummy; return translate(vaddr, dummy); } + + /** * Perform a translation on the memory request, fills in paddr * field of req. * @param req The memory request. diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 3560fc670..16ff3de6d 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -41,6 +41,7 @@ #include "arch/isa_traits.hh" #include "base/misc.hh" +#include "base/random.hh" #include "config/full_system.hh" #include "mem/packet_access.hh" #include "mem/physical.hh" @@ -51,11 +52,16 @@ using namespace std; using namespace TheISA; PhysicalMemory::PhysicalMemory(const Params *p) - : MemObject(p), pmemAddr(NULL), lat(p->latency) + : MemObject(p), pmemAddr(NULL), pagePtr(0), + lat(p->latency), lat_var(p->latency_var), + cachedSize(params()->range.size()), cachedStart(params()->range.start) { if (params()->range.size() % TheISA::PageBytes != 0) panic("Memory Size not divisible by page size\n"); + if (params()->null) + return; + int map_flags = MAP_ANON | MAP_PRIVATE; pmemAddr = (uint8_t *)mmap(NULL, params()->range.size(), PROT_READ | PROT_WRITE, map_flags, -1, 0); @@ -68,12 +74,6 @@ PhysicalMemory::PhysicalMemory(const Params *p) //If requested, initialize all the memory to 0 if (p->zero) memset(pmemAddr, 0, p->range.size()); - - pagePtr = 0; - - cachedSize = params()->range.size(); - cachedStart = params()->range.start; - } void @@ -116,7 +116,10 @@ PhysicalMemory::deviceBlockSize() Tick PhysicalMemory::calculateLatency(PacketPtr pkt) { - return lat; + Tick latency = lat; + if (lat_var != 0) + latency += random_mt.random<Tick>(0, lat_var); + return latency; } @@ -136,16 +139,16 @@ PhysicalMemory::trackLoadLocked(PacketPtr pkt) for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) { if (i->matchesContext(req)) { - DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n", - req->getCpuNum(), req->getThreadNum(), paddr); + DPRINTF(LLSC, "Modifying lock record: context %d addr %#x\n", + req->contextId(), paddr); i->addr = paddr; return; } } // no record for this xc: need to allocate a new one - DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n", - req->getCpuNum(), req->getThreadNum(), paddr); + DPRINTF(LLSC, "Adding lock record: context %d addr %#x\n", + req->contextId(), paddr); lockedAddrList.push_front(LockedAddr(req)); } @@ -180,14 +183,14 @@ PhysicalMemory::checkLockedAddrList(PacketPtr pkt) // it's a store conditional, and as far as the memory // system can tell, the requesting context's lock is // still valid. - DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n", - req->getCpuNum(), req->getThreadNum(), paddr); + DPRINTF(LLSC, "StCond success: context %d addr %#x\n", + req->contextId(), paddr); success = true; } // Get rid of our record of this lock and advance to next - DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n", - i->cpuNum, i->threadNum, paddr); + DPRINTF(LLSC, "Erasing lock record: context %d addr %#x\n", + i->contextId, paddr); i = lockedAddrList.erase(i); } else { @@ -252,6 +255,8 @@ PhysicalMemory::doAtomicAccess(PacketPtr pkt) uint64_t condition_val64; uint32_t condition_val32; + if (!pmemAddr) + panic("Swap only works if there is real memory (i.e. null=False)"); assert(sizeof(IntReg) >= pkt->getSize()); overwrite_mem = true; @@ -282,11 +287,13 @@ PhysicalMemory::doAtomicAccess(PacketPtr pkt) if (pkt->isLocked()) { trackLoadLocked(pkt); } - memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize()); + if (pmemAddr) + memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize()); TRACE_PACKET("Read"); } else if (pkt->isWrite()) { if (writeOK(pkt)) { - memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize()); + if (pmemAddr) + memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize()); TRACE_PACKET("Write"); } } else if (pkt->isInvalidate()) { @@ -315,11 +322,13 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt) uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start(); if (pkt->isRead()) { - memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize()); + if (pmemAddr) + memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize()); TRACE_PACKET("Read"); pkt->makeAtomicResponse(); } else if (pkt->isWrite()) { - memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize()); + if (pmemAddr) + memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize()); TRACE_PACKET("Write"); pkt->makeAtomicResponse(); } else if (pkt->isPrint()) { @@ -374,7 +383,7 @@ PhysicalMemory::recvStatusChange(Port::Status status) PhysicalMemory::MemoryPort::MemoryPort(const std::string &_name, PhysicalMemory *_memory) - : SimpleTimingPort(_name), memory(_memory) + : SimpleTimingPort(_name, _memory), memory(_memory) { } void @@ -443,6 +452,9 @@ PhysicalMemory::drain(Event *de) void PhysicalMemory::serialize(ostream &os) { + if (!pmemAddr) + return; + gzFile compressedMem; string filename = name() + ".physmem"; @@ -475,6 +487,9 @@ PhysicalMemory::serialize(ostream &os) void PhysicalMemory::unserialize(Checkpoint *cp, const string §ion) { + if (!pmemAddr) + return; + gzFile compressedMem; long *tempPage; long *pmem_current; @@ -482,7 +497,6 @@ PhysicalMemory::unserialize(Checkpoint *cp, const string §ion) uint32_t bytesRead; const int chunkSize = 16384; - string filename; UNSERIALIZE_SCALAR(filename); diff --git a/src/mem/physical.hh b/src/mem/physical.hh index c3749bd5b..d18138ecd 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -89,21 +89,18 @@ class PhysicalMemory : public MemObject static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); } - Addr addr; // locked address - int cpuNum; // locking CPU - int threadNum; // locking thread ID within CPU + Addr addr; // locked address + int contextId; // locking hw context // check for matching execution context bool matchesContext(Request *req) { - return (cpuNum == req->getCpuNum() && - threadNum == req->getThreadNum()); + return (contextId == req->contextId()); } LockedAddr(Request *req) : addr(mask(req->getPaddr())), - cpuNum(req->getCpuNum()), - threadNum(req->getThreadNum()) + contextId(req->contextId()) { } }; @@ -146,6 +143,7 @@ class PhysicalMemory : public MemObject uint8_t *pmemAddr; int pagePtr; Tick lat; + Tick lat_var; std::vector<MemoryPort*> ports; typedef std::vector<MemoryPort*>::iterator PortIterator; diff --git a/src/mem/port.cc b/src/mem/port.cc index ce3f6c74b..a666c968b 100644 --- a/src/mem/port.cc +++ b/src/mem/port.cc @@ -39,17 +39,18 @@ #include "mem/mem_object.hh" #include "mem/port.hh" -class defaultPeerPortClass: public Port +class DefaultPeerPort : public Port { protected: void blowUp() { - fatal("Unconnected port!"); + fatal("%s: Unconnected port!", peer->name()); } public: - defaultPeerPortClass() : Port("default_port") - {} + DefaultPeerPort() + : Port("default_port", NULL) + { } bool recvTiming(PacketPtr) { @@ -84,16 +85,18 @@ class defaultPeerPortClass: public Port blowUp(); } - bool isDefaultPort() { return true; } + bool isDefaultPort() const { return true; } +}; -} defaultPeerPort; +DefaultPeerPort defaultPeerPort; -Port::Port() : peer(&defaultPeerPort), owner(NULL) +Port::Port(const std::string &_name, MemObject *_owner) + : EventManager(_owner), portName(_name), peer(&defaultPeerPort), + owner(_owner) { } -Port::Port(const std::string &_name, MemObject *_owner) : - portName(_name), peer(&defaultPeerPort), owner(_owner) +Port::~Port() { } @@ -101,10 +104,18 @@ void Port::setPeer(Port *port) { DPRINTF(Config, "setting peer to %s\n", port->name()); + peer = port; } void +Port::setOwner(MemObject *_owner) +{ + eventq = _owner->queue(); + owner = _owner; +} + +void Port::removeConn() { if (peer->getOwner()) diff --git a/src/mem/port.hh b/src/mem/port.hh index f66b566ea..1d9135ae6 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -47,6 +47,7 @@ #include "base/range.hh" #include "mem/packet.hh" #include "mem/request.hh" +#include "sim/eventq.hh" /** This typedef is used to clean up the parameter list of * getDeviceAddressRanges() and getPeerAddressRanges(). It's declared @@ -58,6 +59,7 @@ typedef std::list<Range<Addr> > AddrRangeList; typedef std::list<Range<Addr> >::iterator AddrRangeIter; +class EventQueue; class MemObject; /** @@ -71,10 +73,9 @@ class MemObject; * Send accessor functions are being called from the device the port is * associated with, and it will call the peer recv. accessor function. */ -class Port +class Port : public EventManager { - private: - + protected: /** Descriptive name (for DPRINTF output) */ mutable std::string portName; @@ -87,9 +88,6 @@ class Port MemObject *owner; public: - - Port(); - /** * Constructor. * @@ -98,12 +96,12 @@ class Port * @param _owner Pointer to the MemObject that owns this port. * Will not necessarily be set. */ - Port(const std::string &_name, MemObject *_owner = NULL); + Port(const std::string &_name, MemObject *_owner); /** Return port name (for DPRINTF). */ const std::string &name() const { return portName; } - virtual ~Port() {}; + virtual ~Port(); // mey be better to use subclasses & RTTI? /** Holds the ports status. Currently just that a range recomputation needs @@ -122,7 +120,7 @@ class Port Port *getPeer() { return peer; } /** Function to set the owner of this port. */ - void setOwner(MemObject *_owner) { owner = _owner; } + void setOwner(MemObject *_owner); /** Function to return the owner of this port. */ MemObject *getOwner() { return owner; } @@ -131,7 +129,9 @@ class Port * demise. */ void removeConn(); - virtual bool isDefaultPort() { return false; } + virtual bool isDefaultPort() const { return false; } + + bool isConnected() { return peer && !peer->isDefaultPort(); } protected: diff --git a/src/mem/request.hh b/src/mem/request.hh index cc9c6b8bf..ee62ce771 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -39,247 +39,436 @@ #ifndef __MEM_REQUEST_HH__ #define __MEM_REQUEST_HH__ +#include <cassert> + #include "base/fast_alloc.hh" +#include "base/flags.hh" +#include "base/misc.hh" #include "sim/host.hh" #include "sim/core.hh" -#include <cassert> - class Request; typedef Request* RequestPtr; +class Request : public FastAlloc +{ + friend class Packet; -/** ASI information for this request if it exsits. */ -const uint32_t ASI_BITS = 0x000FF; -/** The request is a Load locked/store conditional. */ -const uint32_t LOCKED = 0x00100; -/** The virtual address is also the physical address. */ -const uint32_t PHYSICAL = 0x00200; -/** The request is an ALPHA VPTE pal access (hw_ld). */ -const uint32_t VPTE = 0x00400; -/** Use the alternate mode bits in ALPHA. */ -const uint32_t ALTMODE = 0x00800; -/** The request is to an uncacheable address. */ -const uint32_t UNCACHEABLE = 0x01000; -/** The request should not cause a page fault. */ -const uint32_t NO_FAULT = 0x02000; -/** The request should be prefetched into the exclusive state. */ -const uint32_t PF_EXCLUSIVE = 0x10000; -/** The request should be marked as LRU. */ -const uint32_t EVICT_NEXT = 0x20000; -/** The request should ignore unaligned access faults */ -const uint32_t NO_ALIGN_FAULT = 0x40000; -/** The request was an instruction read. */ -const uint32_t INST_READ = 0x80000; -/** This request is for a memory swap. */ -const uint32_t MEM_SWAP = 0x100000; -const uint32_t MEM_SWAP_COND = 0x200000; -/** The request should ignore unaligned access faults */ -const uint32_t NO_HALF_WORD_ALIGN_FAULT = 0x400000; + public: + typedef uint32_t FlagsType; + typedef ::Flags<FlagsType> Flags; + + /** ASI information for this request if it exists. */ + static const FlagsType ASI_BITS = 0x000000FF; + /** The request is a Load locked/store conditional. */ + static const FlagsType LOCKED = 0x00000100; + /** The virtual address is also the physical address. */ + static const FlagsType PHYSICAL = 0x00000200; + /** The request is an ALPHA VPTE pal access (hw_ld). */ + static const FlagsType VPTE = 0x00000400; + /** Use the alternate mode bits in ALPHA. */ + static const FlagsType ALTMODE = 0x00000800; + /** The request is to an uncacheable address. */ + static const FlagsType UNCACHEABLE = 0x00001000; + /** The request should not cause a page fault. */ + static const FlagsType NO_FAULT = 0x00002000; + /** The request should not cause a memory access. */ + static const FlagsType NO_ACCESS = 0x00004000; + /** The request should be prefetched into the exclusive state. */ + static const FlagsType PF_EXCLUSIVE = 0x00010000; + /** The request should be marked as LRU. */ + static const FlagsType EVICT_NEXT = 0x00020000; + /** The request should ignore unaligned access faults */ + static const FlagsType NO_ALIGN_FAULT = 0x00040000; + /** The request was an instruction read. */ + static const FlagsType INST_READ = 0x00080000; + /** This request is for a memory swap. */ + static const FlagsType MEM_SWAP = 0x00100000; + static const FlagsType MEM_SWAP_COND = 0x00200000; + /** The request should ignore unaligned access faults */ + static const FlagsType NO_HALF_WORD_ALIGN_FAULT = 0x00400000; + /** This request is to a memory mapped register. */ + static const FlagsType MMAPED_IPR = 0x00800000; + private: + static const FlagsType PUBLIC_FLAGS = 0x00FF3FFF; + static const FlagsType PRIVATE_FLAGS = 0xFF000000; + + /** Whether or not the size is valid. */ + static const FlagsType VALID_SIZE = 0x01000000; + /** Whether or not paddr is valid (has been written yet). */ + static const FlagsType VALID_PADDR = 0x02000000; + /** Whether or not the vaddr & asid are valid. */ + static const FlagsType VALID_VADDR = 0x04000000; + /** Whether or not the pc is valid. */ + static const FlagsType VALID_PC = 0x10000000; + /** Whether or not the context ID is valid. */ + static const FlagsType VALID_CONTEXT_ID = 0x20000000; + static const FlagsType VALID_THREAD_ID = 0x40000000; + /** Whether or not the sc result is valid. */ + static const FlagsType VALID_EXTRA_DATA = 0x80000000; -class Request : public FastAlloc -{ private: /** * The physical address of the request. Valid only if validPaddr - * is set. */ + * is set. + */ Addr paddr; /** * The size of the request. This field must be set when vaddr or * paddr is written via setVirt() or setPhys(), so it is always - * valid as long as one of the address fields is valid. */ + * valid as long as one of the address fields is valid. + */ int size; /** Flag structure for the request. */ - uint32_t flags; + Flags flags; /** * The time this request was started. Used to calculate * latencies. This field is set to curTick any time paddr or vaddr - * is written. */ + * is written. + */ Tick time; /** The address space ID. */ int asid; - /** This request is to a memory mapped register. */ - bool mmapedIpr; - /** The virtual address of the request. */ Addr vaddr; - /** Extra data for the request, such as the return value of + /** + * Extra data for the request, such as the return value of * store conditional or the compare value for a CAS. */ uint64_t extraData; - /** The cpu number (for statistics, typically). */ - int cpuNum; - /** The requesting thread id (for statistics, typically). */ - int threadNum; + /** The context ID (for statistics, typically). */ + int _contextId; + /** The thread ID (id within this CPU) */ + int _threadId; /** program counter of initiating access; for tracing/debugging */ Addr pc; - /** Whether or not paddr is valid (has been written yet). */ - bool validPaddr; - /** Whether or not the asid & vaddr are valid. */ - bool validAsidVaddr; - /** Whether or not the sc result is valid. */ - bool validExData; - /** Whether or not the cpu number & thread ID are valid. */ - bool validCpuAndThreadNums; - /** Whether or not the pc is valid. */ - bool validPC; - public: /** Minimal constructor. No fields are initialized. */ Request() - : validPaddr(false), validAsidVaddr(false), - validExData(false), validCpuAndThreadNums(false), validPC(false) {} /** * Constructor for physical (e.g. device) requests. Initializes * just physical address, size, flags, and timestamp (to curTick). - * These fields are adequate to perform a request. */ - Request(Addr _paddr, int _size, int _flags) - : validCpuAndThreadNums(false) - { setPhys(_paddr, _size, _flags); } + * These fields are adequate to perform a request. + */ + Request(Addr paddr, int size, Flags flags) + { + setPhys(paddr, size, flags); + } - Request(int _asid, Addr _vaddr, int _size, int _flags, Addr _pc, - int _cpuNum, int _threadNum) + Request(int asid, Addr vaddr, int size, Flags flags, Addr pc, + int cid, int tid) { - setThreadContext(_cpuNum, _threadNum); - setVirt(_asid, _vaddr, _size, _flags, _pc); + setThreadContext(cid, tid); + setVirt(asid, vaddr, size, flags, pc); } ~Request() {} // for FastAlloc /** - * Set up CPU and thread numbers. */ - void setThreadContext(int _cpuNum, int _threadNum) + * Set up CPU and thread numbers. + */ + void + setThreadContext(int context_id, int thread_id) { - cpuNum = _cpuNum; - threadNum = _threadNum; - validCpuAndThreadNums = true; + _contextId = context_id; + _threadId = thread_id; + flags.set(VALID_CONTEXT_ID|VALID_THREAD_ID); } /** * Set up a physical (e.g. device) request in a previously - * allocated Request object. */ - void setPhys(Addr _paddr, int _size, int _flags) + * allocated Request object. + */ + void + setPhys(Addr _paddr, int _size, Flags _flags) { + assert(_size >= 0); paddr = _paddr; size = _size; - flags = _flags; time = curTick; - validPaddr = true; - validAsidVaddr = false; - validPC = false; - validExData = false; - mmapedIpr = false; + + flags.set(VALID_PADDR|VALID_SIZE); + flags.clear(VALID_VADDR|VALID_PC|VALID_EXTRA_DATA|MMAPED_IPR); + flags.update(_flags, PUBLIC_FLAGS); } /** * Set up a virtual (e.g., CPU) request in a previously - * allocated Request object. */ - void setVirt(int _asid, Addr _vaddr, int _size, int _flags, Addr _pc) + * allocated Request object. + */ + void + setVirt(int _asid, Addr _vaddr, int _size, Flags _flags, Addr _pc) { + assert(_size >= 0); asid = _asid; vaddr = _vaddr; size = _size; - flags = _flags; pc = _pc; time = curTick; - validPaddr = false; - validAsidVaddr = true; - validPC = true; - validExData = false; - mmapedIpr = false; + + flags.set(VALID_VADDR|VALID_SIZE|VALID_PC); + flags.clear(VALID_PADDR|VALID_EXTRA_DATA|MMAPED_IPR); + flags.update(_flags, PUBLIC_FLAGS); } - /** Set just the physical address. This should only be used to + /** + * Set just the physical address. This should only be used to * record the result of a translation, and thus the vaddr must be * valid before this method is called. Otherwise, use setPhys() * to guarantee that the size and flags are also set. */ - void setPaddr(Addr _paddr) + void + setPaddr(Addr _paddr) { - assert(validAsidVaddr); + assert(flags.isSet(VALID_VADDR)); paddr = _paddr; - validPaddr = true; + flags.set(VALID_PADDR); } - /** Accessor for paddr. */ - Addr getPaddr() { assert(validPaddr); return paddr; } + /** + * Generate two requests as if this request had been split into two + * pieces. The original request can't have been translated already. + */ + void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2) + { + assert(flags.isSet(VALID_VADDR)); + assert(flags.noneSet(VALID_PADDR)); + assert(split_addr > vaddr && split_addr < vaddr + size); + req1 = new Request; + *req1 = *this; + req2 = new Request; + *req2 = *this; + req1->size = split_addr - vaddr; + req2->vaddr = split_addr; + req2->size = size - req1->size; + } + + /** + * Accessor for paddr. + */ + Addr + getPaddr() + { + assert(flags.isSet(VALID_PADDR)); + return paddr; + } + + /** + * Accessor for size. + */ + int + getSize() + { + assert(flags.isSet(VALID_SIZE)); + return size; + } - /** Accessor for size. */ - int getSize() { assert(validPaddr || validAsidVaddr); return size; } /** Accessor for time. */ - Tick getTime() { assert(validPaddr || validAsidVaddr); return time; } + Tick + getTime() + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + return time; + } + + void + setTime(Tick when) + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + time = when; + } + + /** Accessor for flags. */ + Flags + getFlags() + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + return flags & PUBLIC_FLAGS; + } + + Flags + anyFlags(Flags _flags) + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + assert(_flags.noneSet(~PUBLIC_FLAGS)); + return flags.isSet(_flags); + } + + Flags + allFlags(Flags _flags) + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + assert(_flags.noneSet(~PUBLIC_FLAGS)); + return flags.allSet(_flags); + } /** Accessor for flags. */ - uint32_t getFlags() { assert(validPaddr || validAsidVaddr); return flags; } - /** Accessor for paddr. */ - void setFlags(uint32_t _flags) - { assert(validPaddr || validAsidVaddr); flags = _flags; } + void + setFlags(Flags _flags) + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + assert(_flags.noneSet(~PUBLIC_FLAGS)); + flags.set(_flags); + } + + void + clearFlags(Flags _flags) + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + assert(_flags.noneSet(~PUBLIC_FLAGS)); + flags.clear(_flags); + } + + void + clearFlags() + { + assert(flags.isSet(VALID_PADDR|VALID_VADDR)); + flags.clear(PUBLIC_FLAGS); + } /** Accessor function for vaddr.*/ - Addr getVaddr() { assert(validAsidVaddr); return vaddr; } + Addr + getVaddr() + { + assert(flags.isSet(VALID_VADDR)); + return vaddr; + } /** Accessor function for asid.*/ - int getAsid() { assert(validAsidVaddr); return asid; } + int + getAsid() + { + assert(flags.isSet(VALID_VADDR)); + return asid; + } /** Accessor function for asi.*/ - uint8_t getAsi() { assert(validAsidVaddr); return flags & ASI_BITS; } + uint8_t + getAsi() + { + assert(flags.isSet(VALID_VADDR)); + return flags & ASI_BITS; + } /** Accessor function for asi.*/ - void setAsi(uint8_t a) - { assert(validAsidVaddr); flags = (flags & ~ASI_BITS) | a; } + void + setAsi(uint8_t a) + { + assert(flags.isSet(VALID_VADDR)); + flags.update(a, ASI_BITS); + } /** Accessor function for asi.*/ - bool isMmapedIpr() { assert(validPaddr); return mmapedIpr; } + bool + isMmapedIpr() + { + assert(flags.isSet(VALID_PADDR)); + return flags.isSet(MMAPED_IPR); + } /** Accessor function for asi.*/ - void setMmapedIpr(bool r) { assert(validAsidVaddr); mmapedIpr = r; } + void + setMmapedIpr(bool r) + { + assert(VALID_VADDR); + flags.set(MMAPED_IPR); + } /** Accessor function to check if sc result is valid. */ - bool extraDataValid() { return validExData; } + bool + extraDataValid() + { + return flags.isSet(VALID_EXTRA_DATA); + } + /** Accessor function for store conditional return value.*/ - uint64_t getExtraData() { assert(validExData); return extraData; } + uint64_t + getExtraData() const + { + assert(flags.isSet(VALID_EXTRA_DATA)); + return extraData; + } + /** Accessor function for store conditional return value.*/ - void setExtraData(uint64_t _extraData) - { extraData = _extraData; validExData = true; } + void + setExtraData(uint64_t _extraData) + { + extraData = _extraData; + flags.set(VALID_EXTRA_DATA); + } - /** Accessor function for cpu number.*/ - int getCpuNum() { assert(validCpuAndThreadNums); return cpuNum; } - /** Accessor function for thread number.*/ - int getThreadNum() { assert(validCpuAndThreadNums); return threadNum; } + bool + hasContextId() const + { + return flags.isSet(VALID_CONTEXT_ID); + } - /** Accessor function for pc.*/ - Addr getPC() { assert(validPC); return pc; } + /** Accessor function for context ID.*/ + int + contextId() const + { + assert(flags.isSet(VALID_CONTEXT_ID)); + return _contextId; + } - /** Accessor Function to Check Cacheability. */ - bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; } + /** Accessor function for thread ID. */ + int + threadId() const + { + assert(flags.isSet(VALID_THREAD_ID)); + return _threadId; + } - bool isInstRead() { return (getFlags() & INST_READ) != 0; } + bool + hasPC() const + { + return flags.isSet(VALID_PC); + } - bool isLocked() { return (getFlags() & LOCKED) != 0; } + /** Accessor function for pc.*/ + Addr + getPC() const + { + assert(flags.isSet(VALID_PC)); + return pc; + } + + /** Accessor Function to Check Cacheability. */ + bool isUncacheable() const { return flags.isSet(UNCACHEABLE); } + bool isInstRead() const { return flags.isSet(INST_READ); } + bool isLocked() const { return flags.isSet(LOCKED); } + bool isSwap() const { return flags.isSet(MEM_SWAP|MEM_SWAP_COND); } + bool isCondSwap() const { return flags.isSet(MEM_SWAP_COND); } + + bool + isMisaligned() const + { + if (flags.isSet(NO_ALIGN_FAULT)) + return false; - bool isSwap() { return (getFlags() & MEM_SWAP || - getFlags() & MEM_SWAP_COND); } + if ((vaddr & 0x1)) + return true; - bool isCondSwap() { return (getFlags() & MEM_SWAP_COND) != 0; } + if (flags.isSet(NO_HALF_WORD_ALIGN_FAULT)) + return false; - bool inline isMisaligned() {return (!(getFlags() & NO_ALIGN_FAULT) && - ((vaddr & 1) || - (!(getFlags() & NO_HALF_WORD_ALIGN_FAULT) - && (vaddr & 0x2))));} + if ((vaddr & 0x2)) + return true; - friend class Packet; + return false; + } }; #endif // __MEM_REQUEST_HH__ diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 9fa27046b..f937eeb32 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -30,6 +30,21 @@ #include "mem/tport.hh" +using namespace std; + +SimpleTimingPort::SimpleTimingPort(string pname, MemObject *_owner) + : Port(pname, _owner), sendEvent(0), drainEvent(NULL), + waitingOnRetry(false) +{ + sendEvent = new EventWrapper<SimpleTimingPort, + &SimpleTimingPort::processSendEvent>(this); +} + +SimpleTimingPort::~SimpleTimingPort() +{ + delete sendEvent; +} + bool SimpleTimingPort::checkFunctional(PacketPtr pkt) { @@ -65,7 +80,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) // code to hanldle nacks here, but I'm pretty sure it didn't work // correctly with the drain code, so that would need to be fixed // if we ever added it back. - assert(pkt->isRequest()); if (pkt->memInhibitAsserted()) { // snooper will supply based on copy of packet @@ -83,7 +97,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) assert(pkt->isResponse()); schedSendTiming(pkt, curTick + latency); } else { - delete pkt->req; delete pkt; } @@ -104,11 +117,6 @@ SimpleTimingPort::schedSendTiming(PacketPtr pkt, Tick when) return; } - // list is non-empty and this is not the head, so event should - // already be scheduled - assert(waitingOnRetry || - (sendEvent->scheduled() && sendEvent->when() <= when)); - // list is non-empty & this belongs at the end if (when >= transmitList.back().tick) { transmitList.push_back(DeferredPacket(when, pkt)); @@ -144,7 +152,7 @@ SimpleTimingPort::sendDeferredPacket() if (success) { if (!transmitList.empty() && !sendEvent->scheduled()) { Tick time = transmitList.front().tick; - sendEvent->schedule(time <= curTick ? curTick+1 : time); + schedule(sendEvent, time <= curTick ? curTick+1 : time); } if (transmitList.empty() && drainEvent) { diff --git a/src/mem/tport.hh b/src/mem/tport.hh index d0f1be425..7dfe60b72 100644 --- a/src/mem/tport.hh +++ b/src/mem/tport.hh @@ -85,9 +85,6 @@ class SimpleTimingPort : public Port * When the event time expires it attempts to send the packet. * If it cannot, the packet sent when recvRetry() is called. **/ - typedef EventWrapper<SimpleTimingPort, &SimpleTimingPort::processSendEvent> - SendEvent; - Event *sendEvent; /** If we need to drain, keep the drain event around until we're done @@ -108,7 +105,8 @@ class SimpleTimingPort : public Port Tick deferredPacketReadyTime() { return transmitList.empty() ? MaxTick : transmitList.front().tick; } - void schedSendEvent(Tick when) + void + schedSendEvent(Tick when) { if (waitingOnRetry) { assert(!sendEvent->scheduled()); @@ -116,9 +114,9 @@ class SimpleTimingPort : public Port } if (!sendEvent->scheduled()) { - sendEvent->schedule(when); + schedule(sendEvent, when); } else if (sendEvent->when() > when) { - sendEvent->reschedule(when); + reschedule(sendEvent, when); } } @@ -154,15 +152,8 @@ class SimpleTimingPort : public Port public: - - SimpleTimingPort(std::string pname, MemObject *_owner = NULL) - : Port(pname, _owner), - sendEvent(new SendEvent(this)), - drainEvent(NULL), - waitingOnRetry(false) - {} - - ~SimpleTimingPort() { delete sendEvent; } + SimpleTimingPort(std::string pname, MemObject *_owner); + ~SimpleTimingPort(); /** Hook for draining timing accesses from the system. The * associated SimObject's drain() functions should be implemented diff --git a/src/mem/vport.cc b/src/mem/vport.cc index 6cc4d9ca9..15be45c2a 100644 --- a/src/mem/vport.cc +++ b/src/mem/vport.cc @@ -75,23 +75,18 @@ void CopyOut(ThreadContext *tc, void *dest, Addr src, size_t cplen) { uint8_t *dst = (uint8_t *)dest; - VirtualPort *vp = tc->getVirtPort(tc); + VirtualPort *vp = tc->getVirtPort(); vp->readBlob(src, dst, cplen); - - tc->delVirtPort(vp); - } void CopyIn(ThreadContext *tc, Addr dest, void *source, size_t cplen) { uint8_t *src = (uint8_t *)source; - VirtualPort *vp = tc->getVirtPort(tc); + VirtualPort *vp = tc->getVirtPort(); vp->writeBlob(dest, src, cplen); - - tc->delVirtPort(vp); } void @@ -99,25 +94,23 @@ CopyStringOut(ThreadContext *tc, char *dst, Addr vaddr, size_t maxlen) { int len = 0; char *start = dst; - VirtualPort *vp = tc->getVirtPort(tc); + VirtualPort *vp = tc->getVirtPort(); do { vp->readBlob(vaddr++, (uint8_t*)dst++, 1); } while (len < maxlen && start[len++] != 0 ); - tc->delVirtPort(vp); dst[len] = 0; } void CopyStringIn(ThreadContext *tc, char *src, Addr vaddr) { - VirtualPort *vp = tc->getVirtPort(tc); + VirtualPort *vp = tc->getVirtPort(); for (ChunkGenerator gen(vaddr, strlen(src), TheISA::PageBytes); !gen.done(); gen.next()) { vp->writeBlob(gen.addr(), (uint8_t*)src, gen.size()); src += gen.size(); } - tc->delVirtPort(vp); } |