summaryrefslogtreecommitdiff
path: root/src/gpu-compute/gpu_tlb.cc
diff options
context:
space:
mode:
authorTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
committerTony Gutierrez <anthony.gutierrez@amd.com>2016-01-19 14:28:22 -0500
commit1a7d3f9fcb76a68540dd948f91413533a383bfde (patch)
tree867510a147cd095f19499d26b7c02d27de4cae9d /src/gpu-compute/gpu_tlb.cc
parent28e353e0403ea379d244a418e8dc8ee0b48187cf (diff)
downloadgem5-1a7d3f9fcb76a68540dd948f91413533a383bfde.tar.xz
gpu-compute: AMD's baseline GPU model
Diffstat (limited to 'src/gpu-compute/gpu_tlb.cc')
-rw-r--r--src/gpu-compute/gpu_tlb.cc1801
1 files changed, 1801 insertions, 0 deletions
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc
new file mode 100644
index 000000000..de005fd04
--- /dev/null
+++ b/src/gpu-compute/gpu_tlb.cc
@@ -0,0 +1,1801 @@
+/*
+ * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Lisa Hsu
+ */
+
+#include "gpu-compute/gpu_tlb.hh"
+
+#include <cmath>
+#include <cstring>
+
+#include "arch/x86/faults.hh"
+#include "arch/x86/insts/microldstop.hh"
+#include "arch/x86/pagetable.hh"
+#include "arch/x86/pagetable_walker.hh"
+#include "arch/x86/regs/misc.hh"
+#include "arch/x86/x86_traits.hh"
+#include "base/bitfield.hh"
+#include "base/output.hh"
+#include "base/trace.hh"
+#include "cpu/base.hh"
+#include "cpu/thread_context.hh"
+#include "debug/GPUPrefetch.hh"
+#include "debug/GPUTLB.hh"
+#include "mem/packet_access.hh"
+#include "mem/page_table.hh"
+#include "mem/request.hh"
+#include "sim/process.hh"
+
+namespace X86ISA
+{
+
+ GpuTLB::GpuTLB(const Params *p)
+ : MemObject(p), configAddress(0), size(p->size),
+ cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
+ {
+ assoc = p->assoc;
+ assert(assoc <= size);
+ numSets = size/assoc;
+ allocationPolicy = p->allocationPolicy;
+ hasMemSidePort = false;
+ accessDistance = p->accessDistance;
+ clock = p->clk_domain->clockPeriod();
+
+ tlb = new GpuTlbEntry[size];
+ std::memset(tlb, 0, sizeof(GpuTlbEntry) * size);
+
+ freeList.resize(numSets);
+ entryList.resize(numSets);
+
+ for (int set = 0; set < numSets; ++set) {
+ for (int way = 0; way < assoc; ++way) {
+ int x = set*assoc + way;
+ freeList[set].push_back(&tlb[x]);
+ }
+ }
+
+ FA = (size == assoc);
+
+ /**
+ * @warning: the set-associative version assumes you have a
+ * fixed page size of 4KB.
+ * If the page size is greather than 4KB (as defined in the
+ * TheISA::PageBytes), then there are various issues w/ the current
+ * implementation (you'd have the same 8KB page being replicated in
+ * different sets etc)
+ */
+ setMask = numSets - 1;
+
+ #if 0
+ // GpuTLB doesn't yet support full system
+ walker = p->walker;
+ walker->setTLB(this);
+ #endif
+
+ maxCoalescedReqs = p->maxOutstandingReqs;
+
+ // Do not allow maxCoalescedReqs to be more than the TLB associativity
+ if (maxCoalescedReqs > assoc) {
+ maxCoalescedReqs = assoc;
+ cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
+ }
+
+ outstandingReqs = 0;
+ hitLatency = p->hitLatency;
+ missLatency1 = p->missLatency1;
+ missLatency2 = p->missLatency2;
+
+ // create the slave ports based on the number of connected ports
+ for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
+ cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
+ name(), i), this, i));
+ }
+
+ // create the master ports based on the number of connected ports
+ for (size_t i = 0; i < p->port_master_connection_count; ++i) {
+ memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
+ name(), i), this, i));
+ }
+ }
+
+ // fixme: this is never called?
+ GpuTLB::~GpuTLB()
+ {
+ // make sure all the hash-maps are empty
+ assert(translationReturnEvent.empty());
+
+ // delete the TLB
+ delete[] tlb;
+ }
+
+ BaseSlavePort&
+ GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
+ {
+ if (if_name == "slave") {
+ if (idx >= static_cast<PortID>(cpuSidePort.size())) {
+ panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
+ }
+
+ return *cpuSidePort[idx];
+ } else {
+ panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
+ }
+ }
+
+ BaseMasterPort&
+ GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
+ {
+ if (if_name == "master") {
+ if (idx >= static_cast<PortID>(memSidePort.size())) {
+ panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
+ }
+
+ hasMemSidePort = true;
+
+ return *memSidePort[idx];
+ } else {
+ panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
+ }
+ }
+
+ GpuTlbEntry*
+ GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
+ {
+ GpuTlbEntry *newEntry = nullptr;
+
+ /**
+ * vpn holds the virtual page address
+ * The least significant bits are simply masked
+ */
+ int set = (vpn >> TheISA::PageShift) & setMask;
+
+ if (!freeList[set].empty()) {
+ newEntry = freeList[set].front();
+ freeList[set].pop_front();
+ } else {
+ newEntry = entryList[set].back();
+ entryList[set].pop_back();
+ }
+
+ *newEntry = entry;
+ newEntry->vaddr = vpn;
+ entryList[set].push_front(newEntry);
+
+ return newEntry;
+ }
+
+ GpuTLB::EntryList::iterator
+ GpuTLB::lookupIt(Addr va, bool update_lru)
+ {
+ int set = (va >> TheISA::PageShift) & setMask;
+
+ if (FA) {
+ assert(!set);
+ }
+
+ auto entry = entryList[set].begin();
+ for (; entry != entryList[set].end(); ++entry) {
+ int page_size = (*entry)->size();
+
+ if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
+ DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
+ "with size %#x.\n", va, (*entry)->vaddr, page_size);
+
+ if (update_lru) {
+ entryList[set].push_front(*entry);
+ entryList[set].erase(entry);
+ entry = entryList[set].begin();
+ }
+
+ break;
+ }
+ }
+
+ return entry;
+ }
+
+ GpuTlbEntry*
+ GpuTLB::lookup(Addr va, bool update_lru)
+ {
+ int set = (va >> TheISA::PageShift) & setMask;
+
+ auto entry = lookupIt(va, update_lru);
+
+ if (entry == entryList[set].end())
+ return nullptr;
+ else
+ return *entry;
+ }
+
+ void
+ GpuTLB::invalidateAll()
+ {
+ DPRINTF(GPUTLB, "Invalidating all entries.\n");
+
+ for (int i = 0; i < numSets; ++i) {
+ while (!entryList[i].empty()) {
+ GpuTlbEntry *entry = entryList[i].front();
+ entryList[i].pop_front();
+ freeList[i].push_back(entry);
+ }
+ }
+ }
+
+ void
+ GpuTLB::setConfigAddress(uint32_t addr)
+ {
+ configAddress = addr;
+ }
+
+ void
+ GpuTLB::invalidateNonGlobal()
+ {
+ DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
+
+ for (int i = 0; i < numSets; ++i) {
+ for (auto entryIt = entryList[i].begin();
+ entryIt != entryList[i].end();) {
+ if (!(*entryIt)->global) {
+ freeList[i].push_back(*entryIt);
+ entryList[i].erase(entryIt++);
+ } else {
+ ++entryIt;
+ }
+ }
+ }
+ }
+
+ void
+ GpuTLB::demapPage(Addr va, uint64_t asn)
+ {
+
+ int set = (va >> TheISA::PageShift) & setMask;
+ auto entry = lookupIt(va, false);
+
+ if (entry != entryList[set].end()) {
+ freeList[set].push_back(*entry);
+ entryList[set].erase(entry);
+ }
+ }
+
+ Fault
+ GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
+ {
+ DPRINTF(GPUTLB, "Addresses references internal memory.\n");
+ Addr vaddr = req->getVaddr();
+ Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
+
+ if (prefix == IntAddrPrefixCPUID) {
+ panic("CPUID memory space not yet implemented!\n");
+ } else if (prefix == IntAddrPrefixMSR) {
+ vaddr = vaddr >> 3;
+ req->setFlags(Request::MMAPPED_IPR);
+ Addr regNum = 0;
+
+ switch (vaddr & ~IntAddrPrefixMask) {
+ case 0x10:
+ regNum = MISCREG_TSC;
+ break;
+ case 0x1B:
+ regNum = MISCREG_APIC_BASE;
+ break;
+ case 0xFE:
+ regNum = MISCREG_MTRRCAP;
+ break;
+ case 0x174:
+ regNum = MISCREG_SYSENTER_CS;
+ break;
+ case 0x175:
+ regNum = MISCREG_SYSENTER_ESP;
+ break;
+ case 0x176:
+ regNum = MISCREG_SYSENTER_EIP;
+ break;
+ case 0x179:
+ regNum = MISCREG_MCG_CAP;
+ break;
+ case 0x17A:
+ regNum = MISCREG_MCG_STATUS;
+ break;
+ case 0x17B:
+ regNum = MISCREG_MCG_CTL;
+ break;
+ case 0x1D9:
+ regNum = MISCREG_DEBUG_CTL_MSR;
+ break;
+ case 0x1DB:
+ regNum = MISCREG_LAST_BRANCH_FROM_IP;
+ break;
+ case 0x1DC:
+ regNum = MISCREG_LAST_BRANCH_TO_IP;
+ break;
+ case 0x1DD:
+ regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
+ break;
+ case 0x1DE:
+ regNum = MISCREG_LAST_EXCEPTION_TO_IP;
+ break;
+ case 0x200:
+ regNum = MISCREG_MTRR_PHYS_BASE_0;
+ break;
+ case 0x201:
+ regNum = MISCREG_MTRR_PHYS_MASK_0;
+ break;
+ case 0x202:
+ regNum = MISCREG_MTRR_PHYS_BASE_1;
+ break;
+ case 0x203:
+ regNum = MISCREG_MTRR_PHYS_MASK_1;
+ break;
+ case 0x204:
+ regNum = MISCREG_MTRR_PHYS_BASE_2;
+ break;
+ case 0x205:
+ regNum = MISCREG_MTRR_PHYS_MASK_2;
+ break;
+ case 0x206:
+ regNum = MISCREG_MTRR_PHYS_BASE_3;
+ break;
+ case 0x207:
+ regNum = MISCREG_MTRR_PHYS_MASK_3;
+ break;
+ case 0x208:
+ regNum = MISCREG_MTRR_PHYS_BASE_4;
+ break;
+ case 0x209:
+ regNum = MISCREG_MTRR_PHYS_MASK_4;
+ break;
+ case 0x20A:
+ regNum = MISCREG_MTRR_PHYS_BASE_5;
+ break;
+ case 0x20B:
+ regNum = MISCREG_MTRR_PHYS_MASK_5;
+ break;
+ case 0x20C:
+ regNum = MISCREG_MTRR_PHYS_BASE_6;
+ break;
+ case 0x20D:
+ regNum = MISCREG_MTRR_PHYS_MASK_6;
+ break;
+ case 0x20E:
+ regNum = MISCREG_MTRR_PHYS_BASE_7;
+ break;
+ case 0x20F:
+ regNum = MISCREG_MTRR_PHYS_MASK_7;
+ break;
+ case 0x250:
+ regNum = MISCREG_MTRR_FIX_64K_00000;
+ break;
+ case 0x258:
+ regNum = MISCREG_MTRR_FIX_16K_80000;
+ break;
+ case 0x259:
+ regNum = MISCREG_MTRR_FIX_16K_A0000;
+ break;
+ case 0x268:
+ regNum = MISCREG_MTRR_FIX_4K_C0000;
+ break;
+ case 0x269:
+ regNum = MISCREG_MTRR_FIX_4K_C8000;
+ break;
+ case 0x26A:
+ regNum = MISCREG_MTRR_FIX_4K_D0000;
+ break;
+ case 0x26B:
+ regNum = MISCREG_MTRR_FIX_4K_D8000;
+ break;
+ case 0x26C:
+ regNum = MISCREG_MTRR_FIX_4K_E0000;
+ break;
+ case 0x26D:
+ regNum = MISCREG_MTRR_FIX_4K_E8000;
+ break;
+ case 0x26E:
+ regNum = MISCREG_MTRR_FIX_4K_F0000;
+ break;
+ case 0x26F:
+ regNum = MISCREG_MTRR_FIX_4K_F8000;
+ break;
+ case 0x277:
+ regNum = MISCREG_PAT;
+ break;
+ case 0x2FF:
+ regNum = MISCREG_DEF_TYPE;
+ break;
+ case 0x400:
+ regNum = MISCREG_MC0_CTL;
+ break;
+ case 0x404:
+ regNum = MISCREG_MC1_CTL;
+ break;
+ case 0x408:
+ regNum = MISCREG_MC2_CTL;
+ break;
+ case 0x40C:
+ regNum = MISCREG_MC3_CTL;
+ break;
+ case 0x410:
+ regNum = MISCREG_MC4_CTL;
+ break;
+ case 0x414:
+ regNum = MISCREG_MC5_CTL;
+ break;
+ case 0x418:
+ regNum = MISCREG_MC6_CTL;
+ break;
+ case 0x41C:
+ regNum = MISCREG_MC7_CTL;
+ break;
+ case 0x401:
+ regNum = MISCREG_MC0_STATUS;
+ break;
+ case 0x405:
+ regNum = MISCREG_MC1_STATUS;
+ break;
+ case 0x409:
+ regNum = MISCREG_MC2_STATUS;
+ break;
+ case 0x40D:
+ regNum = MISCREG_MC3_STATUS;
+ break;
+ case 0x411:
+ regNum = MISCREG_MC4_STATUS;
+ break;
+ case 0x415:
+ regNum = MISCREG_MC5_STATUS;
+ break;
+ case 0x419:
+ regNum = MISCREG_MC6_STATUS;
+ break;
+ case 0x41D:
+ regNum = MISCREG_MC7_STATUS;
+ break;
+ case 0x402:
+ regNum = MISCREG_MC0_ADDR;
+ break;
+ case 0x406:
+ regNum = MISCREG_MC1_ADDR;
+ break;
+ case 0x40A:
+ regNum = MISCREG_MC2_ADDR;
+ break;
+ case 0x40E:
+ regNum = MISCREG_MC3_ADDR;
+ break;
+ case 0x412:
+ regNum = MISCREG_MC4_ADDR;
+ break;
+ case 0x416:
+ regNum = MISCREG_MC5_ADDR;
+ break;
+ case 0x41A:
+ regNum = MISCREG_MC6_ADDR;
+ break;
+ case 0x41E:
+ regNum = MISCREG_MC7_ADDR;
+ break;
+ case 0x403:
+ regNum = MISCREG_MC0_MISC;
+ break;
+ case 0x407:
+ regNum = MISCREG_MC1_MISC;
+ break;
+ case 0x40B:
+ regNum = MISCREG_MC2_MISC;
+ break;
+ case 0x40F:
+ regNum = MISCREG_MC3_MISC;
+ break;
+ case 0x413:
+ regNum = MISCREG_MC4_MISC;
+ break;
+ case 0x417:
+ regNum = MISCREG_MC5_MISC;
+ break;
+ case 0x41B:
+ regNum = MISCREG_MC6_MISC;
+ break;
+ case 0x41F:
+ regNum = MISCREG_MC7_MISC;
+ break;
+ case 0xC0000080:
+ regNum = MISCREG_EFER;
+ break;
+ case 0xC0000081:
+ regNum = MISCREG_STAR;
+ break;
+ case 0xC0000082:
+ regNum = MISCREG_LSTAR;
+ break;
+ case 0xC0000083:
+ regNum = MISCREG_CSTAR;
+ break;
+ case 0xC0000084:
+ regNum = MISCREG_SF_MASK;
+ break;
+ case 0xC0000100:
+ regNum = MISCREG_FS_BASE;
+ break;
+ case 0xC0000101:
+ regNum = MISCREG_GS_BASE;
+ break;
+ case 0xC0000102:
+ regNum = MISCREG_KERNEL_GS_BASE;
+ break;
+ case 0xC0000103:
+ regNum = MISCREG_TSC_AUX;
+ break;
+ case 0xC0010000:
+ regNum = MISCREG_PERF_EVT_SEL0;
+ break;
+ case 0xC0010001:
+ regNum = MISCREG_PERF_EVT_SEL1;
+ break;
+ case 0xC0010002:
+ regNum = MISCREG_PERF_EVT_SEL2;
+ break;
+ case 0xC0010003:
+ regNum = MISCREG_PERF_EVT_SEL3;
+ break;
+ case 0xC0010004:
+ regNum = MISCREG_PERF_EVT_CTR0;
+ break;
+ case 0xC0010005:
+ regNum = MISCREG_PERF_EVT_CTR1;
+ break;
+ case 0xC0010006:
+ regNum = MISCREG_PERF_EVT_CTR2;
+ break;
+ case 0xC0010007:
+ regNum = MISCREG_PERF_EVT_CTR3;
+ break;
+ case 0xC0010010:
+ regNum = MISCREG_SYSCFG;
+ break;
+ case 0xC0010016:
+ regNum = MISCREG_IORR_BASE0;
+ break;
+ case 0xC0010017:
+ regNum = MISCREG_IORR_BASE1;
+ break;
+ case 0xC0010018:
+ regNum = MISCREG_IORR_MASK0;
+ break;
+ case 0xC0010019:
+ regNum = MISCREG_IORR_MASK1;
+ break;
+ case 0xC001001A:
+ regNum = MISCREG_TOP_MEM;
+ break;
+ case 0xC001001D:
+ regNum = MISCREG_TOP_MEM2;
+ break;
+ case 0xC0010114:
+ regNum = MISCREG_VM_CR;
+ break;
+ case 0xC0010115:
+ regNum = MISCREG_IGNNE;
+ break;
+ case 0xC0010116:
+ regNum = MISCREG_SMM_CTL;
+ break;
+ case 0xC0010117:
+ regNum = MISCREG_VM_HSAVE_PA;
+ break;
+ default:
+ return std::make_shared<GeneralProtection>(0);
+ }
+ //The index is multiplied by the size of a MiscReg so that
+ //any memory dependence calculations will not see these as
+ //overlapping.
+ req->setPaddr(regNum * sizeof(MiscReg));
+ return NoFault;
+ } else if (prefix == IntAddrPrefixIO) {
+ // TODO If CPL > IOPL or in virtual mode, check the I/O permission
+ // bitmap in the TSS.
+
+ Addr IOPort = vaddr & ~IntAddrPrefixMask;
+ // Make sure the address fits in the expected 16 bit IO address
+ // space.
+ assert(!(IOPort & ~0xFFFF));
+
+ if (IOPort == 0xCF8 && req->getSize() == 4) {
+ req->setFlags(Request::MMAPPED_IPR);
+ req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
+ } else if ((IOPort & ~mask(2)) == 0xCFC) {
+ req->setFlags(Request::UNCACHEABLE);
+
+ Addr configAddress =
+ tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
+
+ if (bits(configAddress, 31, 31)) {
+ req->setPaddr(PhysAddrPrefixPciConfig |
+ mbits(configAddress, 30, 2) |
+ (IOPort & mask(2)));
+ } else {
+ req->setPaddr(PhysAddrPrefixIO | IOPort);
+ }
+ } else {
+ req->setFlags(Request::UNCACHEABLE);
+ req->setPaddr(PhysAddrPrefixIO | IOPort);
+ }
+ return NoFault;
+ } else {
+ panic("Access to unrecognized internal address space %#x.\n",
+ prefix);
+ }
+ }
+
+ /**
+ * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
+ * and false on a TLB miss.
+ * Many of the checks about different modes have been converted to
+ * assertions, since these parts of the code are not really used.
+ * On a hit it will update the LRU stack.
+ */
+ bool
+ GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
+ {
+ bool tlb_hit = false;
+ #ifndef NDEBUG
+ uint32_t flags = req->getFlags();
+ int seg = flags & SegmentFlagMask;
+ #endif
+
+ assert(seg != SEGMENT_REG_MS);
+ Addr vaddr = req->getVaddr();
+ DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
+ HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
+
+ if (m5Reg.prot) {
+ DPRINTF(GPUTLB, "In protected mode.\n");
+ // make sure we are in 64-bit mode
+ assert(m5Reg.mode == LongMode);
+
+ // If paging is enabled, do the translation.
+ if (m5Reg.paging) {
+ DPRINTF(GPUTLB, "Paging enabled.\n");
+ //update LRU stack on a hit
+ GpuTlbEntry *entry = lookup(vaddr, true);
+
+ if (entry)
+ tlb_hit = true;
+
+ if (!update_stats) {
+ // functional tlb access for memory initialization
+ // i.e., memory seeding or instr. seeding -> don't update
+ // TLB and stats
+ return tlb_hit;
+ }
+
+ localNumTLBAccesses++;
+
+ if (!entry) {
+ localNumTLBMisses++;
+ } else {
+ localNumTLBHits++;
+ }
+ }
+ }
+
+ return tlb_hit;
+ }
+
+ Fault
+ GpuTLB::translate(RequestPtr req, ThreadContext *tc,
+ Translation *translation, Mode mode,
+ bool &delayedResponse, bool timing, int &latency)
+ {
+ uint32_t flags = req->getFlags();
+ int seg = flags & SegmentFlagMask;
+ bool storeCheck = flags & (StoreCheck << FlagShift);
+
+ // If this is true, we're dealing with a request
+ // to a non-memory address space.
+ if (seg == SEGMENT_REG_MS) {
+ return translateInt(req, tc);
+ }
+
+ delayedResponse = false;
+ Addr vaddr = req->getVaddr();
+ DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
+
+ HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
+
+ // If protected mode has been enabled...
+ if (m5Reg.prot) {
+ DPRINTF(GPUTLB, "In protected mode.\n");
+ // If we're not in 64-bit mode, do protection/limit checks
+ if (m5Reg.mode != LongMode) {
+ DPRINTF(GPUTLB, "Not in long mode. Checking segment "
+ "protection.\n");
+
+ // Check for a null segment selector.
+ if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
+ seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
+ && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
+ return std::make_shared<GeneralProtection>(0);
+ }
+
+ bool expandDown = false;
+ SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
+
+ if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
+ if (!attr.writable && (mode == BaseTLB::Write ||
+ storeCheck))
+ return std::make_shared<GeneralProtection>(0);
+
+ if (!attr.readable && mode == BaseTLB::Read)
+ return std::make_shared<GeneralProtection>(0);
+
+ expandDown = attr.expandDown;
+
+ }
+
+ Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
+ Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
+ // This assumes we're not in 64 bit mode. If we were, the
+ // default address size is 64 bits, overridable to 32.
+ int size = 32;
+ bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
+ SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
+
+ if ((csAttr.defaultSize && sizeOverride) ||
+ (!csAttr.defaultSize && !sizeOverride)) {
+ size = 16;
+ }
+
+ Addr offset = bits(vaddr - base, size - 1, 0);
+ Addr endOffset = offset + req->getSize() - 1;
+
+ if (expandDown) {
+ DPRINTF(GPUTLB, "Checking an expand down segment.\n");
+ warn_once("Expand down segments are untested.\n");
+
+ if (offset <= limit || endOffset <= limit)
+ return std::make_shared<GeneralProtection>(0);
+ } else {
+ if (offset > limit || endOffset > limit)
+ return std::make_shared<GeneralProtection>(0);
+ }
+ }
+
+ // If paging is enabled, do the translation.
+ if (m5Reg.paging) {
+ DPRINTF(GPUTLB, "Paging enabled.\n");
+ // The vaddr already has the segment base applied.
+ GpuTlbEntry *entry = lookup(vaddr);
+ localNumTLBAccesses++;
+
+ if (!entry) {
+ localNumTLBMisses++;
+ if (timing) {
+ latency = missLatency1;
+ }
+
+ if (FullSystem) {
+ fatal("GpuTLB doesn't support full-system mode\n");
+ } else {
+ DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
+ "at pc %#x.\n", vaddr, tc->instAddr());
+
+ Process *p = tc->getProcessPtr();
+ GpuTlbEntry newEntry;
+ bool success = p->pTable->lookup(vaddr, newEntry);
+
+ if (!success && mode != BaseTLB::Execute) {
+ // penalize a "page fault" more
+ if (timing) {
+ latency += missLatency2;
+ }
+
+ if (p->fixupStackFault(vaddr))
+ success = p->pTable->lookup(vaddr, newEntry);
+ }
+
+ if (!success) {
+ return std::make_shared<PageFault>(vaddr, true,
+ mode, true,
+ false);
+ } else {
+ newEntry.valid = success;
+ Addr alignedVaddr = p->pTable->pageAlign(vaddr);
+
+ DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
+ alignedVaddr, newEntry.pageStart());
+
+ entry = insert(alignedVaddr, newEntry);
+ }
+
+ DPRINTF(GPUTLB, "Miss was serviced.\n");
+ }
+ } else {
+ localNumTLBHits++;
+
+ if (timing) {
+ latency = hitLatency;
+ }
+ }
+
+ // Do paging protection checks.
+ bool inUser = (m5Reg.cpl == 3 &&
+ !(flags & (CPL0FlagBit << FlagShift)));
+
+ CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
+ bool badWrite = (!entry->writable && (inUser || cr0.wp));
+
+ if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
+ badWrite)) {
+ // The page must have been present to get into the TLB in
+ // the first place. We'll assume the reserved bits are
+ // fine even though we're not checking them.
+ return std::make_shared<PageFault>(vaddr, true, mode,
+ inUser, false);
+ }
+
+ if (storeCheck && badWrite) {
+ // This would fault if this were a write, so return a page
+ // fault that reflects that happening.
+ return std::make_shared<PageFault>(vaddr, true,
+ BaseTLB::Write,
+ inUser, false);
+ }
+
+
+ DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
+ "checks.\n", entry->paddr);
+
+ int page_size = entry->size();
+ Addr paddr = entry->paddr | (vaddr & (page_size - 1));
+ DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
+ req->setPaddr(paddr);
+
+ if (entry->uncacheable)
+ req->setFlags(Request::UNCACHEABLE);
+ } else {
+ //Use the address which already has segmentation applied.
+ DPRINTF(GPUTLB, "Paging disabled.\n");
+ DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
+ req->setPaddr(vaddr);
+ }
+ } else {
+ // Real mode
+ DPRINTF(GPUTLB, "In real mode.\n");
+ DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
+ req->setPaddr(vaddr);
+ }
+
+ // Check for an access to the local APIC
+ if (FullSystem) {
+ LocalApicBase localApicBase =
+ tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
+
+ Addr baseAddr = localApicBase.base * PageBytes;
+ Addr paddr = req->getPaddr();
+
+ if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
+ // Force the access to be uncacheable.
+ req->setFlags(Request::UNCACHEABLE);
+ req->setPaddr(x86LocalAPICAddress(tc->contextId(),
+ paddr - baseAddr));
+ }
+ }
+
+ return NoFault;
+ };
+
+ Fault
+ GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
+ int &latency)
+ {
+ bool delayedResponse;
+
+ return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
+ latency);
+ }
+
+ void
+ GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
+ Translation *translation, Mode mode, int &latency)
+ {
+ bool delayedResponse;
+ assert(translation);
+
+ Fault fault = GpuTLB::translate(req, tc, translation, mode,
+ delayedResponse, true, latency);
+
+ if (!delayedResponse)
+ translation->finish(fault, req, tc, mode);
+ }
+
+ Walker*
+ GpuTLB::getWalker()
+ {
+ return walker;
+ }
+
+
+ void
+ GpuTLB::serialize(CheckpointOut &cp) const
+ {
+ }
+
+ void
+ GpuTLB::unserialize(CheckpointIn &cp)
+ {
+ }
+
+ void
+ GpuTLB::regStats()
+ {
+ localNumTLBAccesses
+ .name(name() + ".local_TLB_accesses")
+ .desc("Number of TLB accesses")
+ ;
+
+ localNumTLBHits
+ .name(name() + ".local_TLB_hits")
+ .desc("Number of TLB hits")
+ ;
+
+ localNumTLBMisses
+ .name(name() + ".local_TLB_misses")
+ .desc("Number of TLB misses")
+ ;
+
+ localTLBMissRate
+ .name(name() + ".local_TLB_miss_rate")
+ .desc("TLB miss rate")
+ ;
+
+ accessCycles
+ .name(name() + ".access_cycles")
+ .desc("Cycles spent accessing this TLB level")
+ ;
+
+ pageTableCycles
+ .name(name() + ".page_table_cycles")
+ .desc("Cycles spent accessing the page table")
+ ;
+
+ localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
+
+ numUniquePages
+ .name(name() + ".unique_pages")
+ .desc("Number of unique pages touched")
+ ;
+
+ localCycles
+ .name(name() + ".local_cycles")
+ .desc("Number of cycles spent in queue for all incoming reqs")
+ ;
+
+ localLatency
+ .name(name() + ".local_latency")
+ .desc("Avg. latency over incoming coalesced reqs")
+ ;
+
+ localLatency = localCycles / localNumTLBAccesses;
+
+ globalNumTLBAccesses
+ .name(name() + ".global_TLB_accesses")
+ .desc("Number of TLB accesses")
+ ;
+
+ globalNumTLBHits
+ .name(name() + ".global_TLB_hits")
+ .desc("Number of TLB hits")
+ ;
+
+ globalNumTLBMisses
+ .name(name() + ".global_TLB_misses")
+ .desc("Number of TLB misses")
+ ;
+
+ globalTLBMissRate
+ .name(name() + ".global_TLB_miss_rate")
+ .desc("TLB miss rate")
+ ;
+
+ globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
+
+ avgReuseDistance
+ .name(name() + ".avg_reuse_distance")
+ .desc("avg. reuse distance over all pages (in ticks)")
+ ;
+
+ }
+
+ /**
+ * Do the TLB lookup for this coalesced request and schedule
+ * another event <TLB access latency> cycles later.
+ */
+
+ void
+ GpuTLB::issueTLBLookup(PacketPtr pkt)
+ {
+ assert(pkt);
+ assert(pkt->senderState);
+
+ Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
+ TheISA::PageBytes);
+
+ TranslationState *sender_state =
+ safe_cast<TranslationState*>(pkt->senderState);
+
+ bool update_stats = !sender_state->prefetch;
+ ThreadContext * tmp_tc = sender_state->tc;
+
+ DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
+ virt_page_addr);
+
+ int req_cnt = sender_state->reqCnt.back();
+
+ if (update_stats) {
+ accessCycles -= (curTick() * req_cnt);
+ localCycles -= curTick();
+ updatePageFootprint(virt_page_addr);
+ globalNumTLBAccesses += req_cnt;
+ }
+
+ tlbOutcome lookup_outcome = TLB_MISS;
+ RequestPtr tmp_req = pkt->req;
+
+ // Access the TLB and figure out if it's a hit or a miss.
+ bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
+
+ if (success) {
+ lookup_outcome = TLB_HIT;
+ // Put the entry in SenderState
+ GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
+ assert(entry);
+
+ sender_state->tlbEntry =
+ new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
+
+ if (update_stats) {
+ // the reqCnt has an entry per level, so its size tells us
+ // which level we are in
+ sender_state->hitLevel = sender_state->reqCnt.size();
+ globalNumTLBHits += req_cnt;
+ }
+ } else {
+ if (update_stats)
+ globalNumTLBMisses += req_cnt;
+ }
+
+ /*
+ * We now know the TLB lookup outcome (if it's a hit or a miss), as well
+ * as the TLB access latency.
+ *
+ * We create and schedule a new TLBEvent which will help us take the
+ * appropriate actions (e.g., update TLB on a hit, send request to lower
+ * level TLB on a miss, or start a page walk if this was the last-level
+ * TLB)
+ */
+ TLBEvent *tlb_event =
+ new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
+
+ if (translationReturnEvent.count(virt_page_addr)) {
+ panic("Virtual Page Address %#x already has a return event\n",
+ virt_page_addr);
+ }
+
+ translationReturnEvent[virt_page_addr] = tlb_event;
+ assert(tlb_event);
+
+ DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
+ curTick() + this->ticks(hitLatency));
+
+ schedule(tlb_event, curTick() + this->ticks(hitLatency));
+ }
+
+ GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
+ PacketPtr _pkt)
+ : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
+ outcome(tlb_outcome), pkt(_pkt)
+ {
+ }
+
+ /**
+ * Do Paging protection checks. If we encounter a page fault, then
+ * an assertion is fired.
+ */
+ void
+ GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
+ GpuTlbEntry * tlb_entry, Mode mode)
+ {
+ HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
+ uint32_t flags = pkt->req->getFlags();
+ bool storeCheck = flags & (StoreCheck << FlagShift);
+
+ // Do paging protection checks.
+ bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
+ CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
+
+ bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
+
+ if ((inUser && !tlb_entry->user) ||
+ (mode == BaseTLB::Write && badWrite)) {
+ // The page must have been present to get into the TLB in
+ // the first place. We'll assume the reserved bits are
+ // fine even though we're not checking them.
+ assert(false);
+ }
+
+ if (storeCheck && badWrite) {
+ // This would fault if this were a write, so return a page
+ // fault that reflects that happening.
+ assert(false);
+ }
+ }
+
+ /**
+ * handleTranslationReturn is called on a TLB hit,
+ * when a TLB miss returns or when a page fault returns.
+ * The latter calls handelHit with TLB miss as tlbOutcome.
+ */
+ void
+ GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
+ PacketPtr pkt)
+ {
+
+ assert(pkt);
+ Addr vaddr = pkt->req->getVaddr();
+
+ TranslationState *sender_state =
+ safe_cast<TranslationState*>(pkt->senderState);
+
+ ThreadContext *tc = sender_state->tc;
+ Mode mode = sender_state->tlbMode;
+
+ GpuTlbEntry *local_entry, *new_entry;
+
+ if (tlb_outcome == TLB_HIT) {
+ DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
+ local_entry = sender_state->tlbEntry;
+ } else {
+ DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
+ vaddr);
+
+ // We are returning either from a page walk or from a hit at a lower
+ // TLB level. The senderState should be "carrying" a pointer to the
+ // correct TLBEntry.
+ new_entry = sender_state->tlbEntry;
+ assert(new_entry);
+ local_entry = new_entry;
+
+ if (allocationPolicy) {
+ DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
+ virt_page_addr);
+
+ local_entry = insert(virt_page_addr, *new_entry);
+ }
+
+ assert(local_entry);
+ }
+
+ /**
+ * At this point the packet carries an up-to-date tlbEntry pointer
+ * in its senderState.
+ * Next step is to do the paging protection checks.
+ */
+ DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
+ "while paddr was %#x.\n", local_entry->vaddr,
+ local_entry->paddr);
+
+ pagingProtectionChecks(tc, pkt, local_entry, mode);
+ int page_size = local_entry->size();
+ Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+ DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
+
+ // Since this packet will be sent through the cpu side slave port,
+ // it must be converted to a response pkt if it is not one already
+ if (pkt->isRequest()) {
+ pkt->makeTimingResponse();
+ }
+
+ pkt->req->setPaddr(paddr);
+
+ if (local_entry->uncacheable) {
+ pkt->req->setFlags(Request::UNCACHEABLE);
+ }
+
+ //send packet back to coalescer
+ cpuSidePort[0]->sendTimingResp(pkt);
+ //schedule cleanup event
+ cleanupQueue.push(virt_page_addr);
+
+ // schedule this only once per cycle.
+ // The check is required because we might have multiple translations
+ // returning the same cycle
+ // this is a maximum priority event and must be on the same cycle
+ // as the cleanup event in TLBCoalescer to avoid a race with
+ // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
+ if (!cleanupEvent.scheduled())
+ schedule(cleanupEvent, curTick());
+ }
+
+ /**
+ * Here we take the appropriate actions based on the result of the
+ * TLB lookup.
+ */
+ void
+ GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
+ PacketPtr pkt)
+ {
+ DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
+
+ assert(translationReturnEvent[virtPageAddr]);
+ assert(pkt);
+
+ TranslationState *tmp_sender_state =
+ safe_cast<TranslationState*>(pkt->senderState);
+
+ int req_cnt = tmp_sender_state->reqCnt.back();
+ bool update_stats = !tmp_sender_state->prefetch;
+
+
+ if (outcome == TLB_HIT) {
+ handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
+
+ if (update_stats) {
+ accessCycles += (req_cnt * curTick());
+ localCycles += curTick();
+ }
+
+ } else if (outcome == TLB_MISS) {
+
+ DPRINTF(GPUTLB, "This is a TLB miss\n");
+ if (update_stats) {
+ accessCycles += (req_cnt*curTick());
+ localCycles += curTick();
+ }
+
+ if (hasMemSidePort) {
+ // the one cyle added here represent the delay from when we get
+ // the reply back till when we propagate it to the coalescer
+ // above.
+ if (update_stats) {
+ accessCycles += (req_cnt * 1);
+ localCycles += 1;
+ }
+
+ /**
+ * There is a TLB below. Send the coalesced request.
+ * We actually send the very first packet of all the
+ * pending packets for this virtual page address.
+ */
+ if (!memSidePort[0]->sendTimingReq(pkt)) {
+ DPRINTF(GPUTLB, "Failed sending translation request to "
+ "lower level TLB for addr %#x\n", virtPageAddr);
+
+ memSidePort[0]->retries.push_back(pkt);
+ } else {
+ DPRINTF(GPUTLB, "Sent translation request to lower level "
+ "TLB for addr %#x\n", virtPageAddr);
+ }
+ } else {
+ //this is the last level TLB. Start a page walk
+ DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
+ "addr %#x\n", virtPageAddr);
+
+ if (update_stats)
+ pageTableCycles -= (req_cnt*curTick());
+
+ TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
+ assert(tlb_event);
+ tlb_event->updateOutcome(PAGE_WALK);
+ schedule(tlb_event, curTick() + ticks(missLatency2));
+ }
+ } else if (outcome == PAGE_WALK) {
+ if (update_stats)
+ pageTableCycles += (req_cnt*curTick());
+
+ // Need to access the page table and update the TLB
+ DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
+ virtPageAddr);
+
+ TranslationState *sender_state =
+ safe_cast<TranslationState*>(pkt->senderState);
+
+ Process *p = sender_state->tc->getProcessPtr();
+ TlbEntry newEntry;
+ Addr vaddr = pkt->req->getVaddr();
+ #ifndef NDEBUG
+ Addr alignedVaddr = p->pTable->pageAlign(vaddr);
+ assert(alignedVaddr == virtPageAddr);
+ #endif
+ bool success;
+ success = p->pTable->lookup(vaddr, newEntry);
+ if (!success && sender_state->tlbMode != BaseTLB::Execute) {
+ if (p->fixupStackFault(vaddr)) {
+ success = p->pTable->lookup(vaddr, newEntry);
+ }
+ }
+
+ DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
+ newEntry.pageStart());
+
+ sender_state->tlbEntry =
+ new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
+
+ handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
+ } else if (outcome == MISS_RETURN) {
+ /** we add an extra cycle in the return path of the translation
+ * requests in between the various TLB levels.
+ */
+ handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
+ } else {
+ assert(false);
+ }
+ }
+
+ void
+ GpuTLB::TLBEvent::process()
+ {
+ tlb->translationReturn(virtPageAddr, outcome, pkt);
+ }
+
+ const char*
+ GpuTLB::TLBEvent::description() const
+ {
+ return "trigger translationDoneEvent";
+ }
+
+ void
+ GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
+ {
+ outcome = _outcome;
+ }
+
+ Addr
+ GpuTLB::TLBEvent::getTLBEventVaddr()
+ {
+ return virtPageAddr;
+ }
+
+ /*
+ * recvTiming receives a coalesced timing request from a TLBCoalescer
+ * and it calls issueTLBLookup()
+ * It only rejects the packet if we have exceeded the max
+ * outstanding number of requests for the TLB
+ */
+ bool
+ GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
+ {
+ if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
+ tlb->issueTLBLookup(pkt);
+ // update number of outstanding translation requests
+ tlb->outstandingReqs++;
+ return true;
+ } else {
+ DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
+ tlb->outstandingReqs);
+ return false;
+ }
+ }
+
+ /**
+ * handleFuncTranslationReturn is called on a TLB hit,
+ * when a TLB miss returns or when a page fault returns.
+ * It updates LRU, inserts the TLB entry on a miss
+ * depending on the allocation policy and does the required
+ * protection checks. It does NOT create a new packet to
+ * update the packet's addr; this is done in hsail-gpu code.
+ */
+ void
+ GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
+ {
+ TranslationState *sender_state =
+ safe_cast<TranslationState*>(pkt->senderState);
+
+ ThreadContext *tc = sender_state->tc;
+ Mode mode = sender_state->tlbMode;
+ Addr vaddr = pkt->req->getVaddr();
+
+ GpuTlbEntry *local_entry, *new_entry;
+
+ if (tlb_outcome == TLB_HIT) {
+ DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
+ "%#x\n", vaddr);
+
+ local_entry = sender_state->tlbEntry;
+ } else {
+ DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
+ "%#x\n", vaddr);
+
+ // We are returning either from a page walk or from a hit at a lower
+ // TLB level. The senderState should be "carrying" a pointer to the
+ // correct TLBEntry.
+ new_entry = sender_state->tlbEntry;
+ assert(new_entry);
+ local_entry = new_entry;
+
+ if (allocationPolicy) {
+ Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
+
+ DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
+ virt_page_addr);
+
+ local_entry = insert(virt_page_addr, *new_entry);
+ }
+
+ assert(local_entry);
+ }
+
+ DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
+ "while paddr was %#x.\n", local_entry->vaddr,
+ local_entry->paddr);
+
+ // Do paging checks if it's a normal functional access. If it's for a
+ // prefetch, then sometimes you can try to prefetch something that won't
+ // pass protection. We don't actually want to fault becuase there is no
+ // demand access to deem this a violation. Just put it in the TLB and
+ // it will fault if indeed a future demand access touches it in
+ // violation.
+ if (!sender_state->prefetch && sender_state->tlbEntry->valid)
+ pagingProtectionChecks(tc, pkt, local_entry, mode);
+
+ int page_size = local_entry->size();
+ Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+ DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
+
+ pkt->req->setPaddr(paddr);
+
+ if (local_entry->uncacheable)
+ pkt->req->setFlags(Request::UNCACHEABLE);
+ }
+
+ // This is used for atomic translations. Need to
+ // make it all happen during the same cycle.
+ void
+ GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
+ {
+ TranslationState *sender_state =
+ safe_cast<TranslationState*>(pkt->senderState);
+
+ ThreadContext *tc = sender_state->tc;
+ bool update_stats = !sender_state->prefetch;
+
+ Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
+ TheISA::PageBytes);
+
+ if (update_stats)
+ tlb->updatePageFootprint(virt_page_addr);
+
+ // do the TLB lookup without updating the stats
+ bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
+ tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
+
+ // functional mode means no coalescing
+ // global metrics are the same as the local metrics
+ if (update_stats) {
+ tlb->globalNumTLBAccesses++;
+
+ if (success) {
+ sender_state->hitLevel = sender_state->reqCnt.size();
+ tlb->globalNumTLBHits++;
+ }
+ }
+
+ if (!success) {
+ if (update_stats)
+ tlb->globalNumTLBMisses++;
+ if (tlb->hasMemSidePort) {
+ // there is a TLB below -> propagate down the TLB hierarchy
+ tlb->memSidePort[0]->sendFunctional(pkt);
+ // If no valid translation from a prefetch, then just return
+ if (sender_state->prefetch && !pkt->req->hasPaddr())
+ return;
+ } else {
+ // Need to access the page table and update the TLB
+ DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
+ virt_page_addr);
+
+ Process *p = tc->getProcessPtr();
+ TlbEntry newEntry;
+
+ Addr vaddr = pkt->req->getVaddr();
+ #ifndef NDEBUG
+ Addr alignedVaddr = p->pTable->pageAlign(vaddr);
+ assert(alignedVaddr == virt_page_addr);
+ #endif
+
+ bool success = p->pTable->lookup(vaddr, newEntry);
+ if (!success && sender_state->tlbMode != BaseTLB::Execute) {
+ if (p->fixupStackFault(vaddr))
+ success = p->pTable->lookup(vaddr, newEntry);
+ }
+
+ if (!sender_state->prefetch) {
+ // no PageFaults are permitted after
+ // the second page table lookup
+ assert(success);
+
+ DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
+ newEntry.pageStart());
+
+ sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
+ newEntry.paddr,
+ success);
+ } else {
+ // If this was a prefetch, then do the normal thing if it
+ // was a successful translation. Otherwise, send an empty
+ // TLB entry back so that it can be figured out as empty and
+ // handled accordingly.
+ if (success) {
+ DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
+ newEntry.pageStart());
+
+ sender_state->tlbEntry = new GpuTlbEntry(0,
+ newEntry.vaddr,
+ newEntry.paddr,
+ success);
+ } else {
+ DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
+ alignedVaddr);
+
+ sender_state->tlbEntry = new GpuTlbEntry();
+
+ return;
+ }
+ }
+ }
+ } else {
+ DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
+ tlb->lookup(pkt->req->getVaddr()));
+
+ GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
+ update_stats);
+
+ assert(entry);
+
+ sender_state->tlbEntry =
+ new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
+ }
+ // This is the function that would populate pkt->req with the paddr of
+ // the translation. But if no translation happens (i.e Prefetch fails)
+ // then the early returns in the above code wiill keep this function
+ // from executing.
+ tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
+ }
+
+ void
+ GpuTLB::CpuSidePort::recvReqRetry()
+ {
+ // The CPUSidePort never sends anything but replies. No retries
+ // expected.
+ assert(false);
+ }
+
+ AddrRangeList
+ GpuTLB::CpuSidePort::getAddrRanges() const
+ {
+ // currently not checked by the master
+ AddrRangeList ranges;
+
+ return ranges;
+ }
+
+ /**
+ * MemSidePort receives the packet back.
+ * We need to call the handleTranslationReturn
+ * and propagate up the hierarchy.
+ */
+ bool
+ GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
+ {
+ Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
+ TheISA::PageBytes);
+
+ DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
+ virt_page_addr);
+
+ TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
+ assert(tlb_event);
+ assert(virt_page_addr == tlb_event->getTLBEventVaddr());
+
+ tlb_event->updateOutcome(MISS_RETURN);
+ tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
+
+ return true;
+ }
+
+ void
+ GpuTLB::MemSidePort::recvReqRetry()
+ {
+ // No retries should reach the TLB. The retries
+ // should only reach the TLBCoalescer.
+ assert(false);
+ }
+
+ void
+ GpuTLB::cleanup()
+ {
+ while (!cleanupQueue.empty()) {
+ Addr cleanup_addr = cleanupQueue.front();
+ cleanupQueue.pop();
+
+ // delete TLBEvent
+ TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
+ delete old_tlb_event;
+ translationReturnEvent.erase(cleanup_addr);
+
+ // update number of outstanding requests
+ outstandingReqs--;
+ }
+
+ /** the higher level coalescer should retry if it has
+ * any pending requests.
+ */
+ for (int i = 0; i < cpuSidePort.size(); ++i) {
+ cpuSidePort[i]->sendRetryReq();
+ }
+ }
+
+ void
+ GpuTLB::updatePageFootprint(Addr virt_page_addr)
+ {
+
+ std::pair<AccessPatternTable::iterator, bool> ret;
+
+ AccessInfo tmp_access_info;
+ tmp_access_info.lastTimeAccessed = 0;
+ tmp_access_info.accessesPerPage = 0;
+ tmp_access_info.totalReuseDistance = 0;
+ tmp_access_info.sumDistance = 0;
+ tmp_access_info.meanDistance = 0;
+
+ ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
+ tmp_access_info));
+
+ bool first_page_access = ret.second;
+
+ if (first_page_access) {
+ numUniquePages++;
+ } else {
+ int accessed_before;
+ accessed_before = curTick() - ret.first->second.lastTimeAccessed;
+ ret.first->second.totalReuseDistance += accessed_before;
+ }
+
+ ret.first->second.accessesPerPage++;
+ ret.first->second.lastTimeAccessed = curTick();
+
+ if (accessDistance) {
+ ret.first->second.localTLBAccesses
+ .push_back(localNumTLBAccesses.value());
+ }
+ }
+
+ void
+ GpuTLB::exitCallback()
+ {
+ std::ostream *page_stat_file = nullptr;
+
+ if (accessDistance) {
+
+ // print per page statistics to a separate file (.csv format)
+ // simout is the gem5 output directory (default is m5out or the one
+ // specified with -d
+ page_stat_file = simout.create(name().c_str());
+
+ // print header
+ *page_stat_file << "page,max_access_distance,mean_access_distance, "
+ << "stddev_distance" << std::endl;
+ }
+
+ // update avg. reuse distance footprint
+ AccessPatternTable::iterator iter, iter_begin, iter_end;
+ unsigned int sum_avg_reuse_distance_per_page = 0;
+
+ // iterate through all pages seen by this TLB
+ for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
+ sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
+ iter->second.accessesPerPage;
+
+ if (accessDistance) {
+ unsigned int tmp = iter->second.localTLBAccesses[0];
+ unsigned int prev = tmp;
+
+ for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
+ if (i) {
+ tmp = prev + 1;
+ }
+
+ prev = iter->second.localTLBAccesses[i];
+ // update the localTLBAccesses value
+ // with the actual differece
+ iter->second.localTLBAccesses[i] -= tmp;
+ // compute the sum of AccessDistance per page
+ // used later for mean
+ iter->second.sumDistance +=
+ iter->second.localTLBAccesses[i];
+ }
+
+ iter->second.meanDistance =
+ iter->second.sumDistance / iter->second.accessesPerPage;
+
+ // compute std_dev and max (we need a second round because we
+ // need to know the mean value
+ unsigned int max_distance = 0;
+ unsigned int stddev_distance = 0;
+
+ for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
+ unsigned int tmp_access_distance =
+ iter->second.localTLBAccesses[i];
+
+ if (tmp_access_distance > max_distance) {
+ max_distance = tmp_access_distance;
+ }
+
+ unsigned int diff =
+ tmp_access_distance - iter->second.meanDistance;
+ stddev_distance += pow(diff, 2);
+
+ }
+
+ stddev_distance =
+ sqrt(stddev_distance/iter->second.accessesPerPage);
+
+ if (page_stat_file) {
+ *page_stat_file << std::hex << iter->first << ",";
+ *page_stat_file << std::dec << max_distance << ",";
+ *page_stat_file << std::dec << iter->second.meanDistance
+ << ",";
+ *page_stat_file << std::dec << stddev_distance;
+ *page_stat_file << std::endl;
+ }
+
+ // erase the localTLBAccesses array
+ iter->second.localTLBAccesses.clear();
+ }
+ }
+
+ if (!TLBFootprint.empty()) {
+ avgReuseDistance =
+ sum_avg_reuse_distance_per_page / TLBFootprint.size();
+ }
+
+ //clear the TLBFootprint map
+ TLBFootprint.clear();
+ }
+} // namespace X86ISA
+
+X86ISA::GpuTLB*
+X86GPUTLBParams::create()
+{
+ return new X86ISA::GpuTLB(this);
+}
+