diff options
author | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
---|---|---|
committer | Tony Gutierrez <anthony.gutierrez@amd.com> | 2016-01-19 14:28:22 -0500 |
commit | 1a7d3f9fcb76a68540dd948f91413533a383bfde (patch) | |
tree | 867510a147cd095f19499d26b7c02d27de4cae9d /src/gpu-compute/gpu_tlb.cc | |
parent | 28e353e0403ea379d244a418e8dc8ee0b48187cf (diff) | |
download | gem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz |
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/gpu-compute/gpu_tlb.cc')
-rw-r--r-- | src/gpu-compute/gpu_tlb.cc | 1801 |
1 files changed, 1801 insertions, 0 deletions
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc new file mode 100644 index 000000000..de005fd04 --- /dev/null +++ b/src/gpu-compute/gpu_tlb.cc @@ -0,0 +1,1801 @@ +/* + * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Lisa Hsu + */ + +#include "gpu-compute/gpu_tlb.hh" + +#include <cmath> +#include <cstring> + +#include "arch/x86/faults.hh" +#include "arch/x86/insts/microldstop.hh" +#include "arch/x86/pagetable.hh" +#include "arch/x86/pagetable_walker.hh" +#include "arch/x86/regs/misc.hh" +#include "arch/x86/x86_traits.hh" +#include "base/bitfield.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "cpu/base.hh" +#include "cpu/thread_context.hh" +#include "debug/GPUPrefetch.hh" +#include "debug/GPUTLB.hh" +#include "mem/packet_access.hh" +#include "mem/page_table.hh" +#include "mem/request.hh" +#include "sim/process.hh" + +namespace X86ISA +{ + + GpuTLB::GpuTLB(const Params *p) + : MemObject(p), configAddress(0), size(p->size), + cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this) + { + assoc = p->assoc; + assert(assoc <= size); + numSets = size/assoc; + allocationPolicy = p->allocationPolicy; + hasMemSidePort = false; + accessDistance = p->accessDistance; + clock = p->clk_domain->clockPeriod(); + + tlb = new GpuTlbEntry[size]; + std::memset(tlb, 0, sizeof(GpuTlbEntry) * size); + + freeList.resize(numSets); + entryList.resize(numSets); + + for (int set = 0; set < numSets; ++set) { + for (int way = 0; way < assoc; ++way) { + int x = set*assoc + way; + freeList[set].push_back(&tlb[x]); + } + } + + FA = (size == assoc); + + /** + * @warning: the set-associative version assumes you have a + * fixed page size of 4KB. + * If the page size is greather than 4KB (as defined in the + * TheISA::PageBytes), then there are various issues w/ the current + * implementation (you'd have the same 8KB page being replicated in + * different sets etc) + */ + setMask = numSets - 1; + + #if 0 + // GpuTLB doesn't yet support full system + walker = p->walker; + walker->setTLB(this); + #endif + + maxCoalescedReqs = p->maxOutstandingReqs; + + // Do not allow maxCoalescedReqs to be more than the TLB associativity + if (maxCoalescedReqs > assoc) { + maxCoalescedReqs = assoc; + cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); + } + + outstandingReqs = 0; + hitLatency = p->hitLatency; + missLatency1 = p->missLatency1; + missLatency2 = p->missLatency2; + + // create the slave ports based on the number of connected ports + for (size_t i = 0; i < p->port_slave_connection_count; ++i) { + cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", + name(), i), this, i)); + } + + // create the master ports based on the number of connected ports + for (size_t i = 0; i < p->port_master_connection_count; ++i) { + memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", + name(), i), this, i)); + } + } + + // fixme: this is never called? + GpuTLB::~GpuTLB() + { + // make sure all the hash-maps are empty + assert(translationReturnEvent.empty()); + + // delete the TLB + delete[] tlb; + } + + BaseSlavePort& + GpuTLB::getSlavePort(const std::string &if_name, PortID idx) + { + if (if_name == "slave") { + if (idx >= static_cast<PortID>(cpuSidePort.size())) { + panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); + } + + return *cpuSidePort[idx]; + } else { + panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); + } + } + + BaseMasterPort& + GpuTLB::getMasterPort(const std::string &if_name, PortID idx) + { + if (if_name == "master") { + if (idx >= static_cast<PortID>(memSidePort.size())) { + panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); + } + + hasMemSidePort = true; + + return *memSidePort[idx]; + } else { + panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); + } + } + + GpuTlbEntry* + GpuTLB::insert(Addr vpn, GpuTlbEntry &entry) + { + GpuTlbEntry *newEntry = nullptr; + + /** + * vpn holds the virtual page address + * The least significant bits are simply masked + */ + int set = (vpn >> TheISA::PageShift) & setMask; + + if (!freeList[set].empty()) { + newEntry = freeList[set].front(); + freeList[set].pop_front(); + } else { + newEntry = entryList[set].back(); + entryList[set].pop_back(); + } + + *newEntry = entry; + newEntry->vaddr = vpn; + entryList[set].push_front(newEntry); + + return newEntry; + } + + GpuTLB::EntryList::iterator + GpuTLB::lookupIt(Addr va, bool update_lru) + { + int set = (va >> TheISA::PageShift) & setMask; + + if (FA) { + assert(!set); + } + + auto entry = entryList[set].begin(); + for (; entry != entryList[set].end(); ++entry) { + int page_size = (*entry)->size(); + + if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { + DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " + "with size %#x.\n", va, (*entry)->vaddr, page_size); + + if (update_lru) { + entryList[set].push_front(*entry); + entryList[set].erase(entry); + entry = entryList[set].begin(); + } + + break; + } + } + + return entry; + } + + GpuTlbEntry* + GpuTLB::lookup(Addr va, bool update_lru) + { + int set = (va >> TheISA::PageShift) & setMask; + + auto entry = lookupIt(va, update_lru); + + if (entry == entryList[set].end()) + return nullptr; + else + return *entry; + } + + void + GpuTLB::invalidateAll() + { + DPRINTF(GPUTLB, "Invalidating all entries.\n"); + + for (int i = 0; i < numSets; ++i) { + while (!entryList[i].empty()) { + GpuTlbEntry *entry = entryList[i].front(); + entryList[i].pop_front(); + freeList[i].push_back(entry); + } + } + } + + void + GpuTLB::setConfigAddress(uint32_t addr) + { + configAddress = addr; + } + + void + GpuTLB::invalidateNonGlobal() + { + DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); + + for (int i = 0; i < numSets; ++i) { + for (auto entryIt = entryList[i].begin(); + entryIt != entryList[i].end();) { + if (!(*entryIt)->global) { + freeList[i].push_back(*entryIt); + entryList[i].erase(entryIt++); + } else { + ++entryIt; + } + } + } + } + + void + GpuTLB::demapPage(Addr va, uint64_t asn) + { + + int set = (va >> TheISA::PageShift) & setMask; + auto entry = lookupIt(va, false); + + if (entry != entryList[set].end()) { + freeList[set].push_back(*entry); + entryList[set].erase(entry); + } + } + + Fault + GpuTLB::translateInt(RequestPtr req, ThreadContext *tc) + { + DPRINTF(GPUTLB, "Addresses references internal memory.\n"); + Addr vaddr = req->getVaddr(); + Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; + + if (prefix == IntAddrPrefixCPUID) { + panic("CPUID memory space not yet implemented!\n"); + } else if (prefix == IntAddrPrefixMSR) { + vaddr = vaddr >> 3; + req->setFlags(Request::MMAPPED_IPR); + Addr regNum = 0; + + switch (vaddr & ~IntAddrPrefixMask) { + case 0x10: + regNum = MISCREG_TSC; + break; + case 0x1B: + regNum = MISCREG_APIC_BASE; + break; + case 0xFE: + regNum = MISCREG_MTRRCAP; + break; + case 0x174: + regNum = MISCREG_SYSENTER_CS; + break; + case 0x175: + regNum = MISCREG_SYSENTER_ESP; + break; + case 0x176: + regNum = MISCREG_SYSENTER_EIP; + break; + case 0x179: + regNum = MISCREG_MCG_CAP; + break; + case 0x17A: + regNum = MISCREG_MCG_STATUS; + break; + case 0x17B: + regNum = MISCREG_MCG_CTL; + break; + case 0x1D9: + regNum = MISCREG_DEBUG_CTL_MSR; + break; + case 0x1DB: + regNum = MISCREG_LAST_BRANCH_FROM_IP; + break; + case 0x1DC: + regNum = MISCREG_LAST_BRANCH_TO_IP; + break; + case 0x1DD: + regNum = MISCREG_LAST_EXCEPTION_FROM_IP; + break; + case 0x1DE: + regNum = MISCREG_LAST_EXCEPTION_TO_IP; + break; + case 0x200: + regNum = MISCREG_MTRR_PHYS_BASE_0; + break; + case 0x201: + regNum = MISCREG_MTRR_PHYS_MASK_0; + break; + case 0x202: + regNum = MISCREG_MTRR_PHYS_BASE_1; + break; + case 0x203: + regNum = MISCREG_MTRR_PHYS_MASK_1; + break; + case 0x204: + regNum = MISCREG_MTRR_PHYS_BASE_2; + break; + case 0x205: + regNum = MISCREG_MTRR_PHYS_MASK_2; + break; + case 0x206: + regNum = MISCREG_MTRR_PHYS_BASE_3; + break; + case 0x207: + regNum = MISCREG_MTRR_PHYS_MASK_3; + break; + case 0x208: + regNum = MISCREG_MTRR_PHYS_BASE_4; + break; + case 0x209: + regNum = MISCREG_MTRR_PHYS_MASK_4; + break; + case 0x20A: + regNum = MISCREG_MTRR_PHYS_BASE_5; + break; + case 0x20B: + regNum = MISCREG_MTRR_PHYS_MASK_5; + break; + case 0x20C: + regNum = MISCREG_MTRR_PHYS_BASE_6; + break; + case 0x20D: + regNum = MISCREG_MTRR_PHYS_MASK_6; + break; + case 0x20E: + regNum = MISCREG_MTRR_PHYS_BASE_7; + break; + case 0x20F: + regNum = MISCREG_MTRR_PHYS_MASK_7; + break; + case 0x250: + regNum = MISCREG_MTRR_FIX_64K_00000; + break; + case 0x258: + regNum = MISCREG_MTRR_FIX_16K_80000; + break; + case 0x259: + regNum = MISCREG_MTRR_FIX_16K_A0000; + break; + case 0x268: + regNum = MISCREG_MTRR_FIX_4K_C0000; + break; + case 0x269: + regNum = MISCREG_MTRR_FIX_4K_C8000; + break; + case 0x26A: + regNum = MISCREG_MTRR_FIX_4K_D0000; + break; + case 0x26B: + regNum = MISCREG_MTRR_FIX_4K_D8000; + break; + case 0x26C: + regNum = MISCREG_MTRR_FIX_4K_E0000; + break; + case 0x26D: + regNum = MISCREG_MTRR_FIX_4K_E8000; + break; + case 0x26E: + regNum = MISCREG_MTRR_FIX_4K_F0000; + break; + case 0x26F: + regNum = MISCREG_MTRR_FIX_4K_F8000; + break; + case 0x277: + regNum = MISCREG_PAT; + break; + case 0x2FF: + regNum = MISCREG_DEF_TYPE; + break; + case 0x400: + regNum = MISCREG_MC0_CTL; + break; + case 0x404: + regNum = MISCREG_MC1_CTL; + break; + case 0x408: + regNum = MISCREG_MC2_CTL; + break; + case 0x40C: + regNum = MISCREG_MC3_CTL; + break; + case 0x410: + regNum = MISCREG_MC4_CTL; + break; + case 0x414: + regNum = MISCREG_MC5_CTL; + break; + case 0x418: + regNum = MISCREG_MC6_CTL; + break; + case 0x41C: + regNum = MISCREG_MC7_CTL; + break; + case 0x401: + regNum = MISCREG_MC0_STATUS; + break; + case 0x405: + regNum = MISCREG_MC1_STATUS; + break; + case 0x409: + regNum = MISCREG_MC2_STATUS; + break; + case 0x40D: + regNum = MISCREG_MC3_STATUS; + break; + case 0x411: + regNum = MISCREG_MC4_STATUS; + break; + case 0x415: + regNum = MISCREG_MC5_STATUS; + break; + case 0x419: + regNum = MISCREG_MC6_STATUS; + break; + case 0x41D: + regNum = MISCREG_MC7_STATUS; + break; + case 0x402: + regNum = MISCREG_MC0_ADDR; + break; + case 0x406: + regNum = MISCREG_MC1_ADDR; + break; + case 0x40A: + regNum = MISCREG_MC2_ADDR; + break; + case 0x40E: + regNum = MISCREG_MC3_ADDR; + break; + case 0x412: + regNum = MISCREG_MC4_ADDR; + break; + case 0x416: + regNum = MISCREG_MC5_ADDR; + break; + case 0x41A: + regNum = MISCREG_MC6_ADDR; + break; + case 0x41E: + regNum = MISCREG_MC7_ADDR; + break; + case 0x403: + regNum = MISCREG_MC0_MISC; + break; + case 0x407: + regNum = MISCREG_MC1_MISC; + break; + case 0x40B: + regNum = MISCREG_MC2_MISC; + break; + case 0x40F: + regNum = MISCREG_MC3_MISC; + break; + case 0x413: + regNum = MISCREG_MC4_MISC; + break; + case 0x417: + regNum = MISCREG_MC5_MISC; + break; + case 0x41B: + regNum = MISCREG_MC6_MISC; + break; + case 0x41F: + regNum = MISCREG_MC7_MISC; + break; + case 0xC0000080: + regNum = MISCREG_EFER; + break; + case 0xC0000081: + regNum = MISCREG_STAR; + break; + case 0xC0000082: + regNum = MISCREG_LSTAR; + break; + case 0xC0000083: + regNum = MISCREG_CSTAR; + break; + case 0xC0000084: + regNum = MISCREG_SF_MASK; + break; + case 0xC0000100: + regNum = MISCREG_FS_BASE; + break; + case 0xC0000101: + regNum = MISCREG_GS_BASE; + break; + case 0xC0000102: + regNum = MISCREG_KERNEL_GS_BASE; + break; + case 0xC0000103: + regNum = MISCREG_TSC_AUX; + break; + case 0xC0010000: + regNum = MISCREG_PERF_EVT_SEL0; + break; + case 0xC0010001: + regNum = MISCREG_PERF_EVT_SEL1; + break; + case 0xC0010002: + regNum = MISCREG_PERF_EVT_SEL2; + break; + case 0xC0010003: + regNum = MISCREG_PERF_EVT_SEL3; + break; + case 0xC0010004: + regNum = MISCREG_PERF_EVT_CTR0; + break; + case 0xC0010005: + regNum = MISCREG_PERF_EVT_CTR1; + break; + case 0xC0010006: + regNum = MISCREG_PERF_EVT_CTR2; + break; + case 0xC0010007: + regNum = MISCREG_PERF_EVT_CTR3; + break; + case 0xC0010010: + regNum = MISCREG_SYSCFG; + break; + case 0xC0010016: + regNum = MISCREG_IORR_BASE0; + break; + case 0xC0010017: + regNum = MISCREG_IORR_BASE1; + break; + case 0xC0010018: + regNum = MISCREG_IORR_MASK0; + break; + case 0xC0010019: + regNum = MISCREG_IORR_MASK1; + break; + case 0xC001001A: + regNum = MISCREG_TOP_MEM; + break; + case 0xC001001D: + regNum = MISCREG_TOP_MEM2; + break; + case 0xC0010114: + regNum = MISCREG_VM_CR; + break; + case 0xC0010115: + regNum = MISCREG_IGNNE; + break; + case 0xC0010116: + regNum = MISCREG_SMM_CTL; + break; + case 0xC0010117: + regNum = MISCREG_VM_HSAVE_PA; + break; + default: + return std::make_shared<GeneralProtection>(0); + } + //The index is multiplied by the size of a MiscReg so that + //any memory dependence calculations will not see these as + //overlapping. + req->setPaddr(regNum * sizeof(MiscReg)); + return NoFault; + } else if (prefix == IntAddrPrefixIO) { + // TODO If CPL > IOPL or in virtual mode, check the I/O permission + // bitmap in the TSS. + + Addr IOPort = vaddr & ~IntAddrPrefixMask; + // Make sure the address fits in the expected 16 bit IO address + // space. + assert(!(IOPort & ~0xFFFF)); + + if (IOPort == 0xCF8 && req->getSize() == 4) { + req->setFlags(Request::MMAPPED_IPR); + req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); + } else if ((IOPort & ~mask(2)) == 0xCFC) { + req->setFlags(Request::UNCACHEABLE); + + Addr configAddress = + tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); + + if (bits(configAddress, 31, 31)) { + req->setPaddr(PhysAddrPrefixPciConfig | + mbits(configAddress, 30, 2) | + (IOPort & mask(2))); + } else { + req->setPaddr(PhysAddrPrefixIO | IOPort); + } + } else { + req->setFlags(Request::UNCACHEABLE); + req->setPaddr(PhysAddrPrefixIO | IOPort); + } + return NoFault; + } else { + panic("Access to unrecognized internal address space %#x.\n", + prefix); + } + } + + /** + * TLB_lookup will only perform a TLB lookup returning true on a TLB hit + * and false on a TLB miss. + * Many of the checks about different modes have been converted to + * assertions, since these parts of the code are not really used. + * On a hit it will update the LRU stack. + */ + bool + GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats) + { + bool tlb_hit = false; + #ifndef NDEBUG + uint32_t flags = req->getFlags(); + int seg = flags & SegmentFlagMask; + #endif + + assert(seg != SEGMENT_REG_MS); + Addr vaddr = req->getVaddr(); + DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); + HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); + + if (m5Reg.prot) { + DPRINTF(GPUTLB, "In protected mode.\n"); + // make sure we are in 64-bit mode + assert(m5Reg.mode == LongMode); + + // If paging is enabled, do the translation. + if (m5Reg.paging) { + DPRINTF(GPUTLB, "Paging enabled.\n"); + //update LRU stack on a hit + GpuTlbEntry *entry = lookup(vaddr, true); + + if (entry) + tlb_hit = true; + + if (!update_stats) { + // functional tlb access for memory initialization + // i.e., memory seeding or instr. seeding -> don't update + // TLB and stats + return tlb_hit; + } + + localNumTLBAccesses++; + + if (!entry) { + localNumTLBMisses++; + } else { + localNumTLBHits++; + } + } + } + + return tlb_hit; + } + + Fault + GpuTLB::translate(RequestPtr req, ThreadContext *tc, + Translation *translation, Mode mode, + bool &delayedResponse, bool timing, int &latency) + { + uint32_t flags = req->getFlags(); + int seg = flags & SegmentFlagMask; + bool storeCheck = flags & (StoreCheck << FlagShift); + + // If this is true, we're dealing with a request + // to a non-memory address space. + if (seg == SEGMENT_REG_MS) { + return translateInt(req, tc); + } + + delayedResponse = false; + Addr vaddr = req->getVaddr(); + DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); + + HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); + + // If protected mode has been enabled... + if (m5Reg.prot) { + DPRINTF(GPUTLB, "In protected mode.\n"); + // If we're not in 64-bit mode, do protection/limit checks + if (m5Reg.mode != LongMode) { + DPRINTF(GPUTLB, "Not in long mode. Checking segment " + "protection.\n"); + + // Check for a null segment selector. + if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || + seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) + && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { + return std::make_shared<GeneralProtection>(0); + } + + bool expandDown = false; + SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); + + if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { + if (!attr.writable && (mode == BaseTLB::Write || + storeCheck)) + return std::make_shared<GeneralProtection>(0); + + if (!attr.readable && mode == BaseTLB::Read) + return std::make_shared<GeneralProtection>(0); + + expandDown = attr.expandDown; + + } + + Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); + Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); + // This assumes we're not in 64 bit mode. If we were, the + // default address size is 64 bits, overridable to 32. + int size = 32; + bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); + SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); + + if ((csAttr.defaultSize && sizeOverride) || + (!csAttr.defaultSize && !sizeOverride)) { + size = 16; + } + + Addr offset = bits(vaddr - base, size - 1, 0); + Addr endOffset = offset + req->getSize() - 1; + + if (expandDown) { + DPRINTF(GPUTLB, "Checking an expand down segment.\n"); + warn_once("Expand down segments are untested.\n"); + + if (offset <= limit || endOffset <= limit) + return std::make_shared<GeneralProtection>(0); + } else { + if (offset > limit || endOffset > limit) + return std::make_shared<GeneralProtection>(0); + } + } + + // If paging is enabled, do the translation. + if (m5Reg.paging) { + DPRINTF(GPUTLB, "Paging enabled.\n"); + // The vaddr already has the segment base applied. + GpuTlbEntry *entry = lookup(vaddr); + localNumTLBAccesses++; + + if (!entry) { + localNumTLBMisses++; + if (timing) { + latency = missLatency1; + } + + if (FullSystem) { + fatal("GpuTLB doesn't support full-system mode\n"); + } else { + DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " + "at pc %#x.\n", vaddr, tc->instAddr()); + + Process *p = tc->getProcessPtr(); + GpuTlbEntry newEntry; + bool success = p->pTable->lookup(vaddr, newEntry); + + if (!success && mode != BaseTLB::Execute) { + // penalize a "page fault" more + if (timing) { + latency += missLatency2; + } + + if (p->fixupStackFault(vaddr)) + success = p->pTable->lookup(vaddr, newEntry); + } + + if (!success) { + return std::make_shared<PageFault>(vaddr, true, + mode, true, + false); + } else { + newEntry.valid = success; + Addr alignedVaddr = p->pTable->pageAlign(vaddr); + + DPRINTF(GPUTLB, "Mapping %#x to %#x\n", + alignedVaddr, newEntry.pageStart()); + + entry = insert(alignedVaddr, newEntry); + } + + DPRINTF(GPUTLB, "Miss was serviced.\n"); + } + } else { + localNumTLBHits++; + + if (timing) { + latency = hitLatency; + } + } + + // Do paging protection checks. + bool inUser = (m5Reg.cpl == 3 && + !(flags & (CPL0FlagBit << FlagShift))); + + CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); + bool badWrite = (!entry->writable && (inUser || cr0.wp)); + + if ((inUser && !entry->user) || (mode == BaseTLB::Write && + badWrite)) { + // The page must have been present to get into the TLB in + // the first place. We'll assume the reserved bits are + // fine even though we're not checking them. + return std::make_shared<PageFault>(vaddr, true, mode, + inUser, false); + } + + if (storeCheck && badWrite) { + // This would fault if this were a write, so return a page + // fault that reflects that happening. + return std::make_shared<PageFault>(vaddr, true, + BaseTLB::Write, + inUser, false); + } + + + DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " + "checks.\n", entry->paddr); + + int page_size = entry->size(); + Addr paddr = entry->paddr | (vaddr & (page_size - 1)); + DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); + req->setPaddr(paddr); + + if (entry->uncacheable) + req->setFlags(Request::UNCACHEABLE); + } else { + //Use the address which already has segmentation applied. + DPRINTF(GPUTLB, "Paging disabled.\n"); + DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); + req->setPaddr(vaddr); + } + } else { + // Real mode + DPRINTF(GPUTLB, "In real mode.\n"); + DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); + req->setPaddr(vaddr); + } + + // Check for an access to the local APIC + if (FullSystem) { + LocalApicBase localApicBase = + tc->readMiscRegNoEffect(MISCREG_APIC_BASE); + + Addr baseAddr = localApicBase.base * PageBytes; + Addr paddr = req->getPaddr(); + + if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { + // Force the access to be uncacheable. + req->setFlags(Request::UNCACHEABLE); + req->setPaddr(x86LocalAPICAddress(tc->contextId(), + paddr - baseAddr)); + } + } + + return NoFault; + }; + + Fault + GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, + int &latency) + { + bool delayedResponse; + + return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, + latency); + } + + void + GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc, + Translation *translation, Mode mode, int &latency) + { + bool delayedResponse; + assert(translation); + + Fault fault = GpuTLB::translate(req, tc, translation, mode, + delayedResponse, true, latency); + + if (!delayedResponse) + translation->finish(fault, req, tc, mode); + } + + Walker* + GpuTLB::getWalker() + { + return walker; + } + + + void + GpuTLB::serialize(CheckpointOut &cp) const + { + } + + void + GpuTLB::unserialize(CheckpointIn &cp) + { + } + + void + GpuTLB::regStats() + { + localNumTLBAccesses + .name(name() + ".local_TLB_accesses") + .desc("Number of TLB accesses") + ; + + localNumTLBHits + .name(name() + ".local_TLB_hits") + .desc("Number of TLB hits") + ; + + localNumTLBMisses + .name(name() + ".local_TLB_misses") + .desc("Number of TLB misses") + ; + + localTLBMissRate + .name(name() + ".local_TLB_miss_rate") + .desc("TLB miss rate") + ; + + accessCycles + .name(name() + ".access_cycles") + .desc("Cycles spent accessing this TLB level") + ; + + pageTableCycles + .name(name() + ".page_table_cycles") + .desc("Cycles spent accessing the page table") + ; + + localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; + + numUniquePages + .name(name() + ".unique_pages") + .desc("Number of unique pages touched") + ; + + localCycles + .name(name() + ".local_cycles") + .desc("Number of cycles spent in queue for all incoming reqs") + ; + + localLatency + .name(name() + ".local_latency") + .desc("Avg. latency over incoming coalesced reqs") + ; + + localLatency = localCycles / localNumTLBAccesses; + + globalNumTLBAccesses + .name(name() + ".global_TLB_accesses") + .desc("Number of TLB accesses") + ; + + globalNumTLBHits + .name(name() + ".global_TLB_hits") + .desc("Number of TLB hits") + ; + + globalNumTLBMisses + .name(name() + ".global_TLB_misses") + .desc("Number of TLB misses") + ; + + globalTLBMissRate + .name(name() + ".global_TLB_miss_rate") + .desc("TLB miss rate") + ; + + globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; + + avgReuseDistance + .name(name() + ".avg_reuse_distance") + .desc("avg. reuse distance over all pages (in ticks)") + ; + + } + + /** + * Do the TLB lookup for this coalesced request and schedule + * another event <TLB access latency> cycles later. + */ + + void + GpuTLB::issueTLBLookup(PacketPtr pkt) + { + assert(pkt); + assert(pkt->senderState); + + Addr virt_page_addr = roundDown(pkt->req->getVaddr(), + TheISA::PageBytes); + + TranslationState *sender_state = + safe_cast<TranslationState*>(pkt->senderState); + + bool update_stats = !sender_state->prefetch; + ThreadContext * tmp_tc = sender_state->tc; + + DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", + virt_page_addr); + + int req_cnt = sender_state->reqCnt.back(); + + if (update_stats) { + accessCycles -= (curTick() * req_cnt); + localCycles -= curTick(); + updatePageFootprint(virt_page_addr); + globalNumTLBAccesses += req_cnt; + } + + tlbOutcome lookup_outcome = TLB_MISS; + RequestPtr tmp_req = pkt->req; + + // Access the TLB and figure out if it's a hit or a miss. + bool success = tlbLookup(tmp_req, tmp_tc, update_stats); + + if (success) { + lookup_outcome = TLB_HIT; + // Put the entry in SenderState + GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false); + assert(entry); + + sender_state->tlbEntry = + new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); + + if (update_stats) { + // the reqCnt has an entry per level, so its size tells us + // which level we are in + sender_state->hitLevel = sender_state->reqCnt.size(); + globalNumTLBHits += req_cnt; + } + } else { + if (update_stats) + globalNumTLBMisses += req_cnt; + } + + /* + * We now know the TLB lookup outcome (if it's a hit or a miss), as well + * as the TLB access latency. + * + * We create and schedule a new TLBEvent which will help us take the + * appropriate actions (e.g., update TLB on a hit, send request to lower + * level TLB on a miss, or start a page walk if this was the last-level + * TLB) + */ + TLBEvent *tlb_event = + new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); + + if (translationReturnEvent.count(virt_page_addr)) { + panic("Virtual Page Address %#x already has a return event\n", + virt_page_addr); + } + + translationReturnEvent[virt_page_addr] = tlb_event; + assert(tlb_event); + + DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", + curTick() + this->ticks(hitLatency)); + + schedule(tlb_event, curTick() + this->ticks(hitLatency)); + } + + GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, + PacketPtr _pkt) + : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), + outcome(tlb_outcome), pkt(_pkt) + { + } + + /** + * Do Paging protection checks. If we encounter a page fault, then + * an assertion is fired. + */ + void + GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, + GpuTlbEntry * tlb_entry, Mode mode) + { + HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); + uint32_t flags = pkt->req->getFlags(); + bool storeCheck = flags & (StoreCheck << FlagShift); + + // Do paging protection checks. + bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); + CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); + + bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); + + if ((inUser && !tlb_entry->user) || + (mode == BaseTLB::Write && badWrite)) { + // The page must have been present to get into the TLB in + // the first place. We'll assume the reserved bits are + // fine even though we're not checking them. + assert(false); + } + + if (storeCheck && badWrite) { + // This would fault if this were a write, so return a page + // fault that reflects that happening. + assert(false); + } + } + + /** + * handleTranslationReturn is called on a TLB hit, + * when a TLB miss returns or when a page fault returns. + * The latter calls handelHit with TLB miss as tlbOutcome. + */ + void + GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, + PacketPtr pkt) + { + + assert(pkt); + Addr vaddr = pkt->req->getVaddr(); + + TranslationState *sender_state = + safe_cast<TranslationState*>(pkt->senderState); + + ThreadContext *tc = sender_state->tc; + Mode mode = sender_state->tlbMode; + + GpuTlbEntry *local_entry, *new_entry; + + if (tlb_outcome == TLB_HIT) { + DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); + local_entry = sender_state->tlbEntry; + } else { + DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", + vaddr); + + // We are returning either from a page walk or from a hit at a lower + // TLB level. The senderState should be "carrying" a pointer to the + // correct TLBEntry. + new_entry = sender_state->tlbEntry; + assert(new_entry); + local_entry = new_entry; + + if (allocationPolicy) { + DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", + virt_page_addr); + + local_entry = insert(virt_page_addr, *new_entry); + } + + assert(local_entry); + } + + /** + * At this point the packet carries an up-to-date tlbEntry pointer + * in its senderState. + * Next step is to do the paging protection checks. + */ + DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " + "while paddr was %#x.\n", local_entry->vaddr, + local_entry->paddr); + + pagingProtectionChecks(tc, pkt, local_entry, mode); + int page_size = local_entry->size(); + Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); + DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); + + // Since this packet will be sent through the cpu side slave port, + // it must be converted to a response pkt if it is not one already + if (pkt->isRequest()) { + pkt->makeTimingResponse(); + } + + pkt->req->setPaddr(paddr); + + if (local_entry->uncacheable) { + pkt->req->setFlags(Request::UNCACHEABLE); + } + + //send packet back to coalescer + cpuSidePort[0]->sendTimingResp(pkt); + //schedule cleanup event + cleanupQueue.push(virt_page_addr); + + // schedule this only once per cycle. + // The check is required because we might have multiple translations + // returning the same cycle + // this is a maximum priority event and must be on the same cycle + // as the cleanup event in TLBCoalescer to avoid a race with + // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry + if (!cleanupEvent.scheduled()) + schedule(cleanupEvent, curTick()); + } + + /** + * Here we take the appropriate actions based on the result of the + * TLB lookup. + */ + void + GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, + PacketPtr pkt) + { + DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); + + assert(translationReturnEvent[virtPageAddr]); + assert(pkt); + + TranslationState *tmp_sender_state = + safe_cast<TranslationState*>(pkt->senderState); + + int req_cnt = tmp_sender_state->reqCnt.back(); + bool update_stats = !tmp_sender_state->prefetch; + + + if (outcome == TLB_HIT) { + handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); + + if (update_stats) { + accessCycles += (req_cnt * curTick()); + localCycles += curTick(); + } + + } else if (outcome == TLB_MISS) { + + DPRINTF(GPUTLB, "This is a TLB miss\n"); + if (update_stats) { + accessCycles += (req_cnt*curTick()); + localCycles += curTick(); + } + + if (hasMemSidePort) { + // the one cyle added here represent the delay from when we get + // the reply back till when we propagate it to the coalescer + // above. + if (update_stats) { + accessCycles += (req_cnt * 1); + localCycles += 1; + } + + /** + * There is a TLB below. Send the coalesced request. + * We actually send the very first packet of all the + * pending packets for this virtual page address. + */ + if (!memSidePort[0]->sendTimingReq(pkt)) { + DPRINTF(GPUTLB, "Failed sending translation request to " + "lower level TLB for addr %#x\n", virtPageAddr); + + memSidePort[0]->retries.push_back(pkt); + } else { + DPRINTF(GPUTLB, "Sent translation request to lower level " + "TLB for addr %#x\n", virtPageAddr); + } + } else { + //this is the last level TLB. Start a page walk + DPRINTF(GPUTLB, "Last level TLB - start a page walk for " + "addr %#x\n", virtPageAddr); + + if (update_stats) + pageTableCycles -= (req_cnt*curTick()); + + TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; + assert(tlb_event); + tlb_event->updateOutcome(PAGE_WALK); + schedule(tlb_event, curTick() + ticks(missLatency2)); + } + } else if (outcome == PAGE_WALK) { + if (update_stats) + pageTableCycles += (req_cnt*curTick()); + + // Need to access the page table and update the TLB + DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", + virtPageAddr); + + TranslationState *sender_state = + safe_cast<TranslationState*>(pkt->senderState); + + Process *p = sender_state->tc->getProcessPtr(); + TlbEntry newEntry; + Addr vaddr = pkt->req->getVaddr(); + #ifndef NDEBUG + Addr alignedVaddr = p->pTable->pageAlign(vaddr); + assert(alignedVaddr == virtPageAddr); + #endif + bool success; + success = p->pTable->lookup(vaddr, newEntry); + if (!success && sender_state->tlbMode != BaseTLB::Execute) { + if (p->fixupStackFault(vaddr)) { + success = p->pTable->lookup(vaddr, newEntry); + } + } + + DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, + newEntry.pageStart()); + + sender_state->tlbEntry = + new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success); + + handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); + } else if (outcome == MISS_RETURN) { + /** we add an extra cycle in the return path of the translation + * requests in between the various TLB levels. + */ + handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); + } else { + assert(false); + } + } + + void + GpuTLB::TLBEvent::process() + { + tlb->translationReturn(virtPageAddr, outcome, pkt); + } + + const char* + GpuTLB::TLBEvent::description() const + { + return "trigger translationDoneEvent"; + } + + void + GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) + { + outcome = _outcome; + } + + Addr + GpuTLB::TLBEvent::getTLBEventVaddr() + { + return virtPageAddr; + } + + /* + * recvTiming receives a coalesced timing request from a TLBCoalescer + * and it calls issueTLBLookup() + * It only rejects the packet if we have exceeded the max + * outstanding number of requests for the TLB + */ + bool + GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) + { + if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { + tlb->issueTLBLookup(pkt); + // update number of outstanding translation requests + tlb->outstandingReqs++; + return true; + } else { + DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", + tlb->outstandingReqs); + return false; + } + } + + /** + * handleFuncTranslationReturn is called on a TLB hit, + * when a TLB miss returns or when a page fault returns. + * It updates LRU, inserts the TLB entry on a miss + * depending on the allocation policy and does the required + * protection checks. It does NOT create a new packet to + * update the packet's addr; this is done in hsail-gpu code. + */ + void + GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) + { + TranslationState *sender_state = + safe_cast<TranslationState*>(pkt->senderState); + + ThreadContext *tc = sender_state->tc; + Mode mode = sender_state->tlbMode; + Addr vaddr = pkt->req->getVaddr(); + + GpuTlbEntry *local_entry, *new_entry; + + if (tlb_outcome == TLB_HIT) { + DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " + "%#x\n", vaddr); + + local_entry = sender_state->tlbEntry; + } else { + DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " + "%#x\n", vaddr); + + // We are returning either from a page walk or from a hit at a lower + // TLB level. The senderState should be "carrying" a pointer to the + // correct TLBEntry. + new_entry = sender_state->tlbEntry; + assert(new_entry); + local_entry = new_entry; + + if (allocationPolicy) { + Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); + + DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", + virt_page_addr); + + local_entry = insert(virt_page_addr, *new_entry); + } + + assert(local_entry); + } + + DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " + "while paddr was %#x.\n", local_entry->vaddr, + local_entry->paddr); + + // Do paging checks if it's a normal functional access. If it's for a + // prefetch, then sometimes you can try to prefetch something that won't + // pass protection. We don't actually want to fault becuase there is no + // demand access to deem this a violation. Just put it in the TLB and + // it will fault if indeed a future demand access touches it in + // violation. + if (!sender_state->prefetch && sender_state->tlbEntry->valid) + pagingProtectionChecks(tc, pkt, local_entry, mode); + + int page_size = local_entry->size(); + Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); + DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); + + pkt->req->setPaddr(paddr); + + if (local_entry->uncacheable) + pkt->req->setFlags(Request::UNCACHEABLE); + } + + // This is used for atomic translations. Need to + // make it all happen during the same cycle. + void + GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) + { + TranslationState *sender_state = + safe_cast<TranslationState*>(pkt->senderState); + + ThreadContext *tc = sender_state->tc; + bool update_stats = !sender_state->prefetch; + + Addr virt_page_addr = roundDown(pkt->req->getVaddr(), + TheISA::PageBytes); + + if (update_stats) + tlb->updatePageFootprint(virt_page_addr); + + // do the TLB lookup without updating the stats + bool success = tlb->tlbLookup(pkt->req, tc, update_stats); + tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; + + // functional mode means no coalescing + // global metrics are the same as the local metrics + if (update_stats) { + tlb->globalNumTLBAccesses++; + + if (success) { + sender_state->hitLevel = sender_state->reqCnt.size(); + tlb->globalNumTLBHits++; + } + } + + if (!success) { + if (update_stats) + tlb->globalNumTLBMisses++; + if (tlb->hasMemSidePort) { + // there is a TLB below -> propagate down the TLB hierarchy + tlb->memSidePort[0]->sendFunctional(pkt); + // If no valid translation from a prefetch, then just return + if (sender_state->prefetch && !pkt->req->hasPaddr()) + return; + } else { + // Need to access the page table and update the TLB + DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", + virt_page_addr); + + Process *p = tc->getProcessPtr(); + TlbEntry newEntry; + + Addr vaddr = pkt->req->getVaddr(); + #ifndef NDEBUG + Addr alignedVaddr = p->pTable->pageAlign(vaddr); + assert(alignedVaddr == virt_page_addr); + #endif + + bool success = p->pTable->lookup(vaddr, newEntry); + if (!success && sender_state->tlbMode != BaseTLB::Execute) { + if (p->fixupStackFault(vaddr)) + success = p->pTable->lookup(vaddr, newEntry); + } + + if (!sender_state->prefetch) { + // no PageFaults are permitted after + // the second page table lookup + assert(success); + + DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, + newEntry.pageStart()); + + sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr, + newEntry.paddr, + success); + } else { + // If this was a prefetch, then do the normal thing if it + // was a successful translation. Otherwise, send an empty + // TLB entry back so that it can be figured out as empty and + // handled accordingly. + if (success) { + DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, + newEntry.pageStart()); + + sender_state->tlbEntry = new GpuTlbEntry(0, + newEntry.vaddr, + newEntry.paddr, + success); + } else { + DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", + alignedVaddr); + + sender_state->tlbEntry = new GpuTlbEntry(); + + return; + } + } + } + } else { + DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", + tlb->lookup(pkt->req->getVaddr())); + + GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), + update_stats); + + assert(entry); + + sender_state->tlbEntry = + new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); + } + // This is the function that would populate pkt->req with the paddr of + // the translation. But if no translation happens (i.e Prefetch fails) + // then the early returns in the above code wiill keep this function + // from executing. + tlb->handleFuncTranslationReturn(pkt, tlb_outcome); + } + + void + GpuTLB::CpuSidePort::recvReqRetry() + { + // The CPUSidePort never sends anything but replies. No retries + // expected. + assert(false); + } + + AddrRangeList + GpuTLB::CpuSidePort::getAddrRanges() const + { + // currently not checked by the master + AddrRangeList ranges; + + return ranges; + } + + /** + * MemSidePort receives the packet back. + * We need to call the handleTranslationReturn + * and propagate up the hierarchy. + */ + bool + GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) + { + Addr virt_page_addr = roundDown(pkt->req->getVaddr(), + TheISA::PageBytes); + + DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", + virt_page_addr); + + TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; + assert(tlb_event); + assert(virt_page_addr == tlb_event->getTLBEventVaddr()); + + tlb_event->updateOutcome(MISS_RETURN); + tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); + + return true; + } + + void + GpuTLB::MemSidePort::recvReqRetry() + { + // No retries should reach the TLB. The retries + // should only reach the TLBCoalescer. + assert(false); + } + + void + GpuTLB::cleanup() + { + while (!cleanupQueue.empty()) { + Addr cleanup_addr = cleanupQueue.front(); + cleanupQueue.pop(); + + // delete TLBEvent + TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; + delete old_tlb_event; + translationReturnEvent.erase(cleanup_addr); + + // update number of outstanding requests + outstandingReqs--; + } + + /** the higher level coalescer should retry if it has + * any pending requests. + */ + for (int i = 0; i < cpuSidePort.size(); ++i) { + cpuSidePort[i]->sendRetryReq(); + } + } + + void + GpuTLB::updatePageFootprint(Addr virt_page_addr) + { + + std::pair<AccessPatternTable::iterator, bool> ret; + + AccessInfo tmp_access_info; + tmp_access_info.lastTimeAccessed = 0; + tmp_access_info.accessesPerPage = 0; + tmp_access_info.totalReuseDistance = 0; + tmp_access_info.sumDistance = 0; + tmp_access_info.meanDistance = 0; + + ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, + tmp_access_info)); + + bool first_page_access = ret.second; + + if (first_page_access) { + numUniquePages++; + } else { + int accessed_before; + accessed_before = curTick() - ret.first->second.lastTimeAccessed; + ret.first->second.totalReuseDistance += accessed_before; + } + + ret.first->second.accessesPerPage++; + ret.first->second.lastTimeAccessed = curTick(); + + if (accessDistance) { + ret.first->second.localTLBAccesses + .push_back(localNumTLBAccesses.value()); + } + } + + void + GpuTLB::exitCallback() + { + std::ostream *page_stat_file = nullptr; + + if (accessDistance) { + + // print per page statistics to a separate file (.csv format) + // simout is the gem5 output directory (default is m5out or the one + // specified with -d + page_stat_file = simout.create(name().c_str()); + + // print header + *page_stat_file << "page,max_access_distance,mean_access_distance, " + << "stddev_distance" << std::endl; + } + + // update avg. reuse distance footprint + AccessPatternTable::iterator iter, iter_begin, iter_end; + unsigned int sum_avg_reuse_distance_per_page = 0; + + // iterate through all pages seen by this TLB + for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { + sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / + iter->second.accessesPerPage; + + if (accessDistance) { + unsigned int tmp = iter->second.localTLBAccesses[0]; + unsigned int prev = tmp; + + for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { + if (i) { + tmp = prev + 1; + } + + prev = iter->second.localTLBAccesses[i]; + // update the localTLBAccesses value + // with the actual differece + iter->second.localTLBAccesses[i] -= tmp; + // compute the sum of AccessDistance per page + // used later for mean + iter->second.sumDistance += + iter->second.localTLBAccesses[i]; + } + + iter->second.meanDistance = + iter->second.sumDistance / iter->second.accessesPerPage; + + // compute std_dev and max (we need a second round because we + // need to know the mean value + unsigned int max_distance = 0; + unsigned int stddev_distance = 0; + + for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { + unsigned int tmp_access_distance = + iter->second.localTLBAccesses[i]; + + if (tmp_access_distance > max_distance) { + max_distance = tmp_access_distance; + } + + unsigned int diff = + tmp_access_distance - iter->second.meanDistance; + stddev_distance += pow(diff, 2); + + } + + stddev_distance = + sqrt(stddev_distance/iter->second.accessesPerPage); + + if (page_stat_file) { + *page_stat_file << std::hex << iter->first << ","; + *page_stat_file << std::dec << max_distance << ","; + *page_stat_file << std::dec << iter->second.meanDistance + << ","; + *page_stat_file << std::dec << stddev_distance; + *page_stat_file << std::endl; + } + + // erase the localTLBAccesses array + iter->second.localTLBAccesses.clear(); + } + } + + if (!TLBFootprint.empty()) { + avgReuseDistance = + sum_avg_reuse_distance_per_page / TLBFootprint.size(); + } + + //clear the TLBFootprint map + TLBFootprint.clear(); + } +} // namespace X86ISA + +X86ISA::GpuTLB* +X86GPUTLBParams::create() +{ + return new X86ISA::GpuTLB(this); +} + |