From 23c961a0fd97251ee7c760bc2ff2a011a417ad9b Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Thu, 10 Dec 2015 10:35:18 +0000 Subject: dev: Move network devices to src/dev/net/ --HG-- rename : src/dev/Ethernet.py => src/dev/net/Ethernet.py rename : src/dev/etherbus.cc => src/dev/net/etherbus.cc rename : src/dev/etherbus.hh => src/dev/net/etherbus.hh rename : src/dev/etherdevice.cc => src/dev/net/etherdevice.cc rename : src/dev/etherdevice.hh => src/dev/net/etherdevice.hh rename : src/dev/etherdump.cc => src/dev/net/etherdump.cc rename : src/dev/etherdump.hh => src/dev/net/etherdump.hh rename : src/dev/etherint.cc => src/dev/net/etherint.cc rename : src/dev/etherint.hh => src/dev/net/etherint.hh rename : src/dev/etherlink.cc => src/dev/net/etherlink.cc rename : src/dev/etherlink.hh => src/dev/net/etherlink.hh rename : src/dev/etherobject.hh => src/dev/net/etherobject.hh rename : src/dev/etherpkt.cc => src/dev/net/etherpkt.cc rename : src/dev/etherpkt.hh => src/dev/net/etherpkt.hh rename : src/dev/ethertap.cc => src/dev/net/ethertap.cc rename : src/dev/ethertap.hh => src/dev/net/ethertap.hh rename : src/dev/i8254xGBe.cc => src/dev/net/i8254xGBe.cc rename : src/dev/i8254xGBe.hh => src/dev/net/i8254xGBe.hh rename : src/dev/i8254xGBe_defs.hh => src/dev/net/i8254xGBe_defs.hh rename : src/dev/multi_etherlink.cc => src/dev/net/multi_etherlink.cc rename : src/dev/multi_etherlink.hh => src/dev/net/multi_etherlink.hh rename : src/dev/multi_iface.cc => src/dev/net/multi_iface.cc rename : src/dev/multi_iface.hh => src/dev/net/multi_iface.hh rename : src/dev/multi_packet.cc => src/dev/net/multi_packet.cc rename : src/dev/multi_packet.hh => src/dev/net/multi_packet.hh rename : src/dev/ns_gige.cc => src/dev/net/ns_gige.cc rename : src/dev/ns_gige.hh => src/dev/net/ns_gige.hh rename : src/dev/ns_gige_reg.h => src/dev/net/ns_gige_reg.h rename : src/dev/pktfifo.cc => src/dev/net/pktfifo.cc rename : src/dev/pktfifo.hh => src/dev/net/pktfifo.hh rename : src/dev/sinic.cc => src/dev/net/sinic.cc rename : src/dev/sinic.hh => src/dev/net/sinic.hh rename : src/dev/sinicreg.hh => src/dev/net/sinicreg.hh rename : src/dev/tcp_iface.cc => src/dev/net/tcp_iface.cc rename : src/dev/tcp_iface.hh => src/dev/net/tcp_iface.hh --- src/dev/net/Ethernet.py | 245 ++++ src/dev/net/SConscript | 94 ++ src/dev/net/etherbus.cc | 113 ++ src/dev/net/etherbus.hh | 90 ++ src/dev/net/etherdevice.cc | 369 ++++++ src/dev/net/etherdevice.hh | 152 +++ src/dev/net/etherdump.cc | 110 ++ src/dev/net/etherdump.hh | 62 + src/dev/net/etherint.cc | 44 + src/dev/net/etherint.hh | 76 ++ src/dev/net/etherlink.cc | 280 +++++ src/dev/net/etherlink.hh | 166 +++ src/dev/net/etherobject.hh | 67 ++ src/dev/net/etherpkt.cc | 71 ++ src/dev/net/etherpkt.hh | 95 ++ src/dev/net/ethertap.cc | 348 ++++++ src/dev/net/ethertap.hh | 136 +++ src/dev/net/i8254xGBe.cc | 2562 ++++++++++++++++++++++++++++++++++++++++ src/dev/net/i8254xGBe.hh | 560 +++++++++ src/dev/net/i8254xGBe_defs.hh | 854 ++++++++++++++ src/dev/net/multi_etherlink.cc | 266 +++++ src/dev/net/multi_etherlink.hh | 235 ++++ src/dev/net/multi_iface.cc | 622 ++++++++++ src/dev/net/multi_iface.hh | 492 ++++++++ src/dev/net/multi_packet.cc | 100 ++ src/dev/net/multi_packet.hh | 130 ++ src/dev/net/ns_gige.cc | 2483 ++++++++++++++++++++++++++++++++++++++ src/dev/net/ns_gige.hh | 392 ++++++ src/dev/net/ns_gige_reg.h | 401 +++++++ src/dev/net/pktfifo.cc | 116 ++ src/dev/net/pktfifo.hh | 212 ++++ src/dev/net/sinic.cc | 1563 ++++++++++++++++++++++++ src/dev/net/sinic.hh | 327 +++++ src/dev/net/sinicreg.hh | 239 ++++ src/dev/net/tcp_iface.cc | 158 +++ src/dev/net/tcp_iface.hh | 133 +++ 36 files changed, 14363 insertions(+) create mode 100644 src/dev/net/Ethernet.py create mode 100644 src/dev/net/SConscript create mode 100644 src/dev/net/etherbus.cc create mode 100644 src/dev/net/etherbus.hh create mode 100644 src/dev/net/etherdevice.cc create mode 100644 src/dev/net/etherdevice.hh create mode 100644 src/dev/net/etherdump.cc create mode 100644 src/dev/net/etherdump.hh create mode 100644 src/dev/net/etherint.cc create mode 100644 src/dev/net/etherint.hh create mode 100644 src/dev/net/etherlink.cc create mode 100644 src/dev/net/etherlink.hh create mode 100644 src/dev/net/etherobject.hh create mode 100644 src/dev/net/etherpkt.cc create mode 100644 src/dev/net/etherpkt.hh create mode 100644 src/dev/net/ethertap.cc create mode 100644 src/dev/net/ethertap.hh create mode 100644 src/dev/net/i8254xGBe.cc create mode 100644 src/dev/net/i8254xGBe.hh create mode 100644 src/dev/net/i8254xGBe_defs.hh create mode 100644 src/dev/net/multi_etherlink.cc create mode 100644 src/dev/net/multi_etherlink.hh create mode 100644 src/dev/net/multi_iface.cc create mode 100644 src/dev/net/multi_iface.hh create mode 100644 src/dev/net/multi_packet.cc create mode 100644 src/dev/net/multi_packet.hh create mode 100644 src/dev/net/ns_gige.cc create mode 100644 src/dev/net/ns_gige.hh create mode 100644 src/dev/net/ns_gige_reg.h create mode 100644 src/dev/net/pktfifo.cc create mode 100644 src/dev/net/pktfifo.hh create mode 100644 src/dev/net/sinic.cc create mode 100644 src/dev/net/sinic.hh create mode 100644 src/dev/net/sinicreg.hh create mode 100644 src/dev/net/tcp_iface.cc create mode 100644 src/dev/net/tcp_iface.hh (limited to 'src/dev/net') diff --git a/src/dev/net/Ethernet.py b/src/dev/net/Ethernet.py new file mode 100644 index 000000000..9859857a0 --- /dev/null +++ b/src/dev/net/Ethernet.py @@ -0,0 +1,245 @@ +# Copyright (c) 2015 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2005-2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +from m5.SimObject import SimObject +from m5.params import * +from m5.proxy import * +from PciDevice import PciDevice + +class EtherObject(SimObject): + type = 'EtherObject' + abstract = True + cxx_header = "dev/net/etherobject.hh" + +class EtherLink(EtherObject): + type = 'EtherLink' + cxx_header = "dev/net/etherlink.hh" + int0 = SlavePort("interface 0") + int1 = SlavePort("interface 1") + delay = Param.Latency('0us', "packet transmit delay") + delay_var = Param.Latency('0ns', "packet transmit delay variability") + speed = Param.NetworkBandwidth('1Gbps', "link speed") + dump = Param.EtherDump(NULL, "dump object") + +class MultiEtherLink(EtherObject): + type = 'MultiEtherLink' + cxx_header = "dev/net/multi_etherlink.hh" + int0 = SlavePort("interface 0") + delay = Param.Latency('0us', "packet transmit delay") + delay_var = Param.Latency('0ns', "packet transmit delay variability") + speed = Param.NetworkBandwidth('1Gbps', "link speed") + dump = Param.EtherDump(NULL, "dump object") + multi_rank = Param.UInt32('0', "Rank of the this gem5 process (multi run)") + sync_start = Param.Latency('5200000000000t', "first multi sync barrier") + sync_repeat = Param.Latency('10us', "multi sync barrier repeat") + server_name = Param.String('localhost', "Message server name") + server_port = Param.UInt32('2200', "Message server port") + +class EtherBus(EtherObject): + type = 'EtherBus' + cxx_header = "dev/net/etherbus.hh" + loopback = Param.Bool(True, "send packet back to the sending interface") + dump = Param.EtherDump(NULL, "dump object") + speed = Param.NetworkBandwidth('100Mbps', "bus speed in bits per second") + +class EtherTap(EtherObject): + type = 'EtherTap' + cxx_header = "dev/net/ethertap.hh" + bufsz = Param.Int(10000, "tap buffer size") + dump = Param.EtherDump(NULL, "dump object") + port = Param.UInt16(3500, "tap port") + +class EtherDump(SimObject): + type = 'EtherDump' + cxx_header = "dev/net/etherdump.hh" + file = Param.String("dump file") + maxlen = Param.Int(96, "max portion of packet data to dump") + +class EtherDevice(PciDevice): + type = 'EtherDevice' + abstract = True + cxx_header = "dev/net/etherdevice.hh" + interface = MasterPort("Ethernet Interface") + +class IGbE(EtherDevice): + # Base class for two IGbE adapters listed above + type = 'IGbE' + cxx_header = "dev/net/i8254xGBe.hh" + hardware_address = Param.EthernetAddr(NextEthernetAddr, + "Ethernet Hardware Address") + rx_fifo_size = Param.MemorySize('384kB', "Size of the rx FIFO") + tx_fifo_size = Param.MemorySize('384kB', "Size of the tx FIFO") + rx_desc_cache_size = Param.Int(64, + "Number of enteries in the rx descriptor cache") + tx_desc_cache_size = Param.Int(64, + "Number of enteries in the rx descriptor cache") + VendorID = 0x8086 + SubsystemID = 0x1008 + SubsystemVendorID = 0x8086 + Status = 0x0000 + SubClassCode = 0x00 + ClassCode = 0x02 + ProgIF = 0x00 + BAR0 = 0x00000000 + BAR1 = 0x00000000 + BAR2 = 0x00000000 + BAR3 = 0x00000000 + BAR4 = 0x00000000 + BAR5 = 0x00000000 + MaximumLatency = 0x00 + MinimumGrant = 0xff + InterruptLine = 0x1e + InterruptPin = 0x01 + BAR0Size = '128kB' + wb_delay = Param.Latency('10ns', "delay before desc writeback occurs") + fetch_delay = Param.Latency('10ns', "delay before desc fetch occurs") + fetch_comp_delay = Param.Latency('10ns', "delay after desc fetch occurs") + wb_comp_delay = Param.Latency('10ns', "delay after desc wb occurs") + tx_read_delay = Param.Latency('0ns', "delay after tx dma read") + rx_write_delay = Param.Latency('0ns', "delay after rx dma read") + phy_pid = Param.UInt16("Phy PID that corresponds to device ID") + phy_epid = Param.UInt16("Phy EPID that corresponds to device ID") + +class IGbE_e1000(IGbE): + # Older Intel 8254x based gigabit ethernet adapter + # Uses Intel e1000 driver + DeviceID = 0x1075 + phy_pid = 0x02A8 + phy_epid = 0x0380 + +class IGbE_igb(IGbE): + # Newer Intel 8257x based gigabit ethernet adapter + # Uses Intel igb driver and in theory supports packet splitting and LRO + DeviceID = 0x10C9 + phy_pid = 0x0141 + phy_epid = 0x0CC0 + +class EtherDevBase(EtherDevice): + type = 'EtherDevBase' + abstract = True + cxx_header = "dev/net/etherdevice.hh" + + hardware_address = Param.EthernetAddr(NextEthernetAddr, + "Ethernet Hardware Address") + + dma_read_delay = Param.Latency('0us', "fixed delay for dma reads") + dma_read_factor = Param.Latency('0us', "multiplier for dma reads") + dma_write_delay = Param.Latency('0us', "fixed delay for dma writes") + dma_write_factor = Param.Latency('0us', "multiplier for dma writes") + + rx_delay = Param.Latency('1us', "Receive Delay") + tx_delay = Param.Latency('1us', "Transmit Delay") + rx_fifo_size = Param.MemorySize('512kB', "max size of rx fifo") + tx_fifo_size = Param.MemorySize('512kB', "max size of tx fifo") + + rx_filter = Param.Bool(True, "Enable Receive Filter") + intr_delay = Param.Latency('10us', "Interrupt propagation delay") + rx_thread = Param.Bool(False, "dedicated kernel thread for transmit") + tx_thread = Param.Bool(False, "dedicated kernel threads for receive") + rss = Param.Bool(False, "Receive Side Scaling") + +class NSGigE(EtherDevBase): + type = 'NSGigE' + cxx_header = "dev/net/ns_gige.hh" + + dma_data_free = Param.Bool(False, "DMA of Data is free") + dma_desc_free = Param.Bool(False, "DMA of Descriptors is free") + dma_no_allocate = Param.Bool(True, "Should we allocate cache on read") + + VendorID = 0x100B + DeviceID = 0x0022 + Status = 0x0290 + SubClassCode = 0x00 + ClassCode = 0x02 + ProgIF = 0x00 + BAR0 = 0x00000001 + BAR1 = 0x00000000 + BAR2 = 0x00000000 + BAR3 = 0x00000000 + BAR4 = 0x00000000 + BAR5 = 0x00000000 + MaximumLatency = 0x34 + MinimumGrant = 0xb0 + InterruptLine = 0x1e + InterruptPin = 0x01 + BAR0Size = '256B' + BAR1Size = '4kB' + + + +class Sinic(EtherDevBase): + type = 'Sinic' + cxx_class = 'Sinic::Device' + cxx_header = "dev/net/sinic.hh" + + rx_max_copy = Param.MemorySize('1514B', "rx max copy") + tx_max_copy = Param.MemorySize('16kB', "tx max copy") + rx_max_intr = Param.UInt32(10, "max rx packets per interrupt") + rx_fifo_threshold = Param.MemorySize('384kB', "rx fifo high threshold") + rx_fifo_low_mark = Param.MemorySize('128kB', "rx fifo low threshold") + tx_fifo_high_mark = Param.MemorySize('384kB', "tx fifo high threshold") + tx_fifo_threshold = Param.MemorySize('128kB', "tx fifo low threshold") + virtual_count = Param.UInt32(1, "Virtualized SINIC") + zero_copy_size = Param.UInt32(64, "Bytes to copy if below threshold") + zero_copy_threshold = Param.UInt32(256, + "Only zero copy above this threshold") + zero_copy = Param.Bool(False, "Zero copy receive") + delay_copy = Param.Bool(False, "Delayed copy transmit") + virtual_addr = Param.Bool(False, "Virtual addressing") + + VendorID = 0x1291 + DeviceID = 0x1293 + Status = 0x0290 + SubClassCode = 0x00 + ClassCode = 0x02 + ProgIF = 0x00 + BAR0 = 0x00000000 + BAR1 = 0x00000000 + BAR2 = 0x00000000 + BAR3 = 0x00000000 + BAR4 = 0x00000000 + BAR5 = 0x00000000 + MaximumLatency = 0x34 + MinimumGrant = 0xb0 + InterruptLine = 0x1e + InterruptPin = 0x01 + BAR0Size = '64kB' + + diff --git a/src/dev/net/SConscript b/src/dev/net/SConscript new file mode 100644 index 000000000..f529a1b2a --- /dev/null +++ b/src/dev/net/SConscript @@ -0,0 +1,94 @@ +# -*- mode:python -*- + +# Copyright (c) 2015 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Steve Reinhardt +# Gabe Black +# Andreas Sandberg + +Import('*') + +if env['TARGET_ISA'] == 'null': + Return() + +SimObject('Ethernet.py') + +# Basic Ethernet infrastructure +Source('etherbus.cc') +Source('etherdevice.cc') +Source('etherdump.cc') +Source('etherint.cc') +Source('etherlink.cc') +Source('etherpkt.cc') +Source('ethertap.cc') + +Source('pktfifo.cc') + +DebugFlag('Ethernet') +DebugFlag('EthernetCksum') +DebugFlag('EthernetDMA') +DebugFlag('EthernetData') +DebugFlag('EthernetDesc') +DebugFlag('EthernetEEPROM') +DebugFlag('EthernetIntr') +DebugFlag('EthernetPIO') +DebugFlag('EthernetSM') + +# Multi gem5 +Source('multi_packet.cc') +Source('multi_iface.cc') +Source('multi_etherlink.cc') +Source('tcp_iface.cc') + +DebugFlag('MultiEthernet') +DebugFlag('MultiEthernetPkt') + +# Ethernet controllers +Source('i8254xGBe.cc') +Source('ns_gige.cc') +Source('sinic.cc') + + + +CompoundFlag('EthernetAll', [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', + 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', + 'EthernetCksum', 'EthernetEEPROM' ]) + +CompoundFlag('EthernetNoData', [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', + 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ]) diff --git a/src/dev/net/etherbus.cc b/src/dev/net/etherbus.cc new file mode 100644 index 000000000..ba5beab01 --- /dev/null +++ b/src/dev/net/etherbus.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Device module for modelling an ethernet hub + */ +#include "dev/net/etherbus.hh" + +#include +#include +#include +#include + +#include "base/trace.hh" +#include "debug/Ethernet.hh" +#include "debug/EthernetData.hh" +#include "dev/net/etherdump.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherpkt.hh" +#include "params/EtherBus.hh" +#include "sim/core.hh" + +using namespace std; + +EtherBus::EtherBus(const Params *p) + : EtherObject(p), ticksPerByte(p->speed), loopback(p->loopback), + event(this), sender(0), dump(p->dump) +{ +} + +void +EtherBus::txDone() +{ + devlist_t::iterator i = devlist.begin(); + devlist_t::iterator end = devlist.end(); + + DPRINTF(Ethernet, "ethernet packet received: length=%d\n", packet->length); + DDUMP(EthernetData, packet->data, packet->length); + + while (i != end) { + if (loopback || *i != sender) + (*i)->sendPacket(packet); + ++i; + } + + sender->sendDone(); + + if (dump) + dump->dump(packet); + + sender = 0; + packet = 0; +} + +EtherInt* +EtherBus::getEthPort(const std::string &if_name, int idx) +{ + panic("Etherbus doesn't work\n"); +} + +bool +EtherBus::send(EtherInt *sndr, EthPacketPtr &pkt) +{ + if (busy()) { + DPRINTF(Ethernet, "ethernet packet not sent, bus busy\n", curTick()); + return false; + } + + DPRINTF(Ethernet, "ethernet packet sent: length=%d\n", pkt->length); + DDUMP(EthernetData, pkt->data, pkt->length); + + packet = pkt; + sender = sndr; + int delay = (int)ceil(((double)pkt->length * ticksPerByte) + 1.0); + DPRINTF(Ethernet, "scheduling packet: delay=%d, (rate=%f)\n", + delay, ticksPerByte); + schedule(event, curTick() + delay); + + return true; +} + +EtherBus * +EtherBusParams::create() +{ + return new EtherBus(this); +} diff --git a/src/dev/net/etherbus.hh b/src/dev/net/etherbus.hh new file mode 100644 index 000000000..7395c28d8 --- /dev/null +++ b/src/dev/net/etherbus.hh @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Device module for modelling an ethernet hub + */ + +#ifndef __DEV_NET_ETHERBUS_HH__ +#define __DEV_NET_ETHERBUS_HH__ + +#include "dev/net/etherobject.hh" +#include "dev/net/etherpkt.hh" +#include "params/EtherBus.hh" +#include "sim/eventq.hh" +#include "sim/sim_object.hh" + +class EtherDump; +class EtherInt; +class EtherBus : public EtherObject +{ + protected: + typedef std::list devlist_t; + devlist_t devlist; + double ticksPerByte; + bool loopback; + + protected: + class DoneEvent : public Event + { + protected: + EtherBus *bus; + + public: + DoneEvent(EtherBus *b) : bus(b) {} + virtual void process() { bus->txDone(); } + virtual const char *description() const + { return "ethernet bus completion"; } + }; + + DoneEvent event; + EthPacketPtr packet; + EtherInt *sender; + EtherDump *dump; + + public: + typedef EtherBusParams Params; + EtherBus(const Params *p); + virtual ~EtherBus() {} + + const Params * + params() const + { + return dynamic_cast(_params); + } + + void txDone(); + void reg(EtherInt *dev); + bool busy() const { return (bool)packet; } + bool send(EtherInt *sender, EthPacketPtr &packet); + virtual EtherInt *getEthPort(const std::string &if_name, int idx); +}; + +#endif // __DEV_NET_ETHERBUS_HH__ diff --git a/src/dev/net/etherdevice.cc b/src/dev/net/etherdevice.cc new file mode 100644 index 000000000..59849502c --- /dev/null +++ b/src/dev/net/etherdevice.cc @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + * Lisa Hsu + */ + +#include "dev/net/etherdevice.hh" + +#include "sim/stats.hh" + +void +EtherDevice::regStats() +{ + txBytes + .name(name() + ".txBytes") + .desc("Bytes Transmitted") + .prereq(txBytes) + ; + + rxBytes + .name(name() + ".rxBytes") + .desc("Bytes Received") + .prereq(rxBytes) + ; + + txPackets + .name(name() + ".txPackets") + .desc("Number of Packets Transmitted") + .prereq(txBytes) + ; + + rxPackets + .name(name() + ".rxPackets") + .desc("Number of Packets Received") + .prereq(rxBytes) + ; + + txIpChecksums + .name(name() + ".txIpChecksums") + .desc("Number of tx IP Checksums done by device") + .precision(0) + .prereq(txBytes) + ; + + rxIpChecksums + .name(name() + ".rxIpChecksums") + .desc("Number of rx IP Checksums done by device") + .precision(0) + .prereq(rxBytes) + ; + + txTcpChecksums + .name(name() + ".txTcpChecksums") + .desc("Number of tx TCP Checksums done by device") + .precision(0) + .prereq(txBytes) + ; + + rxTcpChecksums + .name(name() + ".rxTcpChecksums") + .desc("Number of rx TCP Checksums done by device") + .precision(0) + .prereq(rxBytes) + ; + + txUdpChecksums + .name(name() + ".txUdpChecksums") + .desc("Number of tx UDP Checksums done by device") + .precision(0) + .prereq(txBytes) + ; + + rxUdpChecksums + .name(name() + ".rxUdpChecksums") + .desc("Number of rx UDP Checksums done by device") + .precision(0) + .prereq(rxBytes) + ; + + descDmaReads + .name(name() + ".descDMAReads") + .desc("Number of descriptors the device read w/ DMA") + .precision(0) + ; + + descDmaWrites + .name(name() + ".descDMAWrites") + .desc("Number of descriptors the device wrote w/ DMA") + .precision(0) + ; + + descDmaRdBytes + .name(name() + ".descDmaReadBytes") + .desc("number of descriptor bytes read w/ DMA") + .precision(0) + ; + + descDmaWrBytes + .name(name() + ".descDmaWriteBytes") + .desc("number of descriptor bytes write w/ DMA") + .precision(0) + ; + + txBandwidth + .name(name() + ".txBandwidth") + .desc("Transmit Bandwidth (bits/s)") + .precision(0) + .prereq(txBytes) + ; + + rxBandwidth + .name(name() + ".rxBandwidth") + .desc("Receive Bandwidth (bits/s)") + .precision(0) + .prereq(rxBytes) + ; + + totBandwidth + .name(name() + ".totBandwidth") + .desc("Total Bandwidth (bits/s)") + .precision(0) + .prereq(totBytes) + ; + + totPackets + .name(name() + ".totPackets") + .desc("Total Packets") + .precision(0) + .prereq(totBytes) + ; + + totBytes + .name(name() + ".totBytes") + .desc("Total Bytes") + .precision(0) + .prereq(totBytes) + ; + + totPacketRate + .name(name() + ".totPPS") + .desc("Total Tranmission Rate (packets/s)") + .precision(0) + .prereq(totBytes) + ; + + txPacketRate + .name(name() + ".txPPS") + .desc("Packet Tranmission Rate (packets/s)") + .precision(0) + .prereq(txBytes) + ; + + rxPacketRate + .name(name() + ".rxPPS") + .desc("Packet Reception Rate (packets/s)") + .precision(0) + .prereq(rxBytes) + ; + + postedSwi + .name(name() + ".postedSwi") + .desc("number of software interrupts posted to CPU") + .precision(0) + ; + + totalSwi + .name(name() + ".totalSwi") + .desc("total number of Swi written to ISR") + .precision(0) + ; + + coalescedSwi + .name(name() + ".coalescedSwi") + .desc("average number of Swi's coalesced into each post") + .precision(0) + ; + + postedRxIdle + .name(name() + ".postedRxIdle") + .desc("number of rxIdle interrupts posted to CPU") + .precision(0) + ; + + totalRxIdle + .name(name() + ".totalRxIdle") + .desc("total number of RxIdle written to ISR") + .precision(0) + ; + + coalescedRxIdle + .name(name() + ".coalescedRxIdle") + .desc("average number of RxIdle's coalesced into each post") + .precision(0) + ; + + postedRxOk + .name(name() + ".postedRxOk") + .desc("number of RxOk interrupts posted to CPU") + .precision(0) + ; + + totalRxOk + .name(name() + ".totalRxOk") + .desc("total number of RxOk written to ISR") + .precision(0) + ; + + coalescedRxOk + .name(name() + ".coalescedRxOk") + .desc("average number of RxOk's coalesced into each post") + .precision(0) + ; + + postedRxDesc + .name(name() + ".postedRxDesc") + .desc("number of RxDesc interrupts posted to CPU") + .precision(0) + ; + + totalRxDesc + .name(name() + ".totalRxDesc") + .desc("total number of RxDesc written to ISR") + .precision(0) + ; + + coalescedRxDesc + .name(name() + ".coalescedRxDesc") + .desc("average number of RxDesc's coalesced into each post") + .precision(0) + ; + + postedTxOk + .name(name() + ".postedTxOk") + .desc("number of TxOk interrupts posted to CPU") + .precision(0) + ; + + totalTxOk + .name(name() + ".totalTxOk") + .desc("total number of TxOk written to ISR") + .precision(0) + ; + + coalescedTxOk + .name(name() + ".coalescedTxOk") + .desc("average number of TxOk's coalesced into each post") + .precision(0) + ; + + postedTxIdle + .name(name() + ".postedTxIdle") + .desc("number of TxIdle interrupts posted to CPU") + .precision(0) + ; + + totalTxIdle + .name(name() + ".totalTxIdle") + .desc("total number of TxIdle written to ISR") + .precision(0) + ; + + coalescedTxIdle + .name(name() + ".coalescedTxIdle") + .desc("average number of TxIdle's coalesced into each post") + .precision(0) + ; + + postedTxDesc + .name(name() + ".postedTxDesc") + .desc("number of TxDesc interrupts posted to CPU") + .precision(0) + ; + + totalTxDesc + .name(name() + ".totalTxDesc") + .desc("total number of TxDesc written to ISR") + .precision(0) + ; + + coalescedTxDesc + .name(name() + ".coalescedTxDesc") + .desc("average number of TxDesc's coalesced into each post") + .precision(0) + ; + + postedRxOrn + .name(name() + ".postedRxOrn") + .desc("number of RxOrn posted to CPU") + .precision(0) + ; + + totalRxOrn + .name(name() + ".totalRxOrn") + .desc("total number of RxOrn written to ISR") + .precision(0) + ; + + coalescedRxOrn + .name(name() + ".coalescedRxOrn") + .desc("average number of RxOrn's coalesced into each post") + .precision(0) + ; + + coalescedTotal + .name(name() + ".coalescedTotal") + .desc("average number of interrupts coalesced into each post") + .precision(0) + ; + + postedInterrupts + .name(name() + ".postedInterrupts") + .desc("number of posts to CPU") + .precision(0) + ; + + droppedPackets + .name(name() + ".droppedPackets") + .desc("number of packets dropped") + .precision(0) + ; + + coalescedSwi = totalSwi / postedInterrupts; + coalescedRxIdle = totalRxIdle / postedInterrupts; + coalescedRxOk = totalRxOk / postedInterrupts; + coalescedRxDesc = totalRxDesc / postedInterrupts; + coalescedTxOk = totalTxOk / postedInterrupts; + coalescedTxIdle = totalTxIdle / postedInterrupts; + coalescedTxDesc = totalTxDesc / postedInterrupts; + coalescedRxOrn = totalRxOrn / postedInterrupts; + + coalescedTotal = (totalSwi + totalRxIdle + totalRxOk + totalRxDesc + + totalTxOk + totalTxIdle + totalTxDesc + + totalRxOrn) / postedInterrupts; + + txBandwidth = txBytes * Stats::constant(8) / simSeconds; + rxBandwidth = rxBytes * Stats::constant(8) / simSeconds; + totBandwidth = txBandwidth + rxBandwidth; + totBytes = txBytes + rxBytes; + totPackets = txPackets + rxPackets; + + txPacketRate = txPackets / simSeconds; + rxPacketRate = rxPackets / simSeconds; + totPacketRate = totPackets / simSeconds; +} diff --git a/src/dev/net/etherdevice.hh b/src/dev/net/etherdevice.hh new file mode 100644 index 000000000..d13a1c3c3 --- /dev/null +++ b/src/dev/net/etherdevice.hh @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2007 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +/** + * @file + * Base Ethernet Device declaration. + */ + +#ifndef __DEV_NET_ETHERDEVICE_HH__ +#define __DEV_NET_ETHERDEVICE_HH__ + +#include "base/statistics.hh" +#include "dev/pci/device.hh" +#include "params/EtherDevBase.hh" +#include "params/EtherDevice.hh" +#include "sim/sim_object.hh" + +class EtherInt; + +/** + * The base EtherObject class, allows for an accesor function to a + * simobj that returns the Port. + */ +class EtherDevice : public PciDevice +{ + public: + typedef EtherDeviceParams Params; + EtherDevice(const Params *params) + : PciDevice(params) + {} + + const Params * + params() const + { + return dynamic_cast(_params); + } + + public: + /** Additional function to return the Port of a memory object. */ + virtual EtherInt *getEthPort(const std::string &if_name, int idx = -1) = 0; + + public: + void regStats(); + + protected: + Stats::Scalar txBytes; + Stats::Scalar rxBytes; + Stats::Scalar txPackets; + Stats::Scalar rxPackets; + Stats::Scalar txIpChecksums; + Stats::Scalar rxIpChecksums; + Stats::Scalar txTcpChecksums; + Stats::Scalar rxTcpChecksums; + Stats::Scalar txUdpChecksums; + Stats::Scalar rxUdpChecksums; + Stats::Scalar descDmaReads; + Stats::Scalar descDmaWrites; + Stats::Scalar descDmaRdBytes; + Stats::Scalar descDmaWrBytes; + Stats::Formula totBandwidth; + Stats::Formula totPackets; + Stats::Formula totBytes; + Stats::Formula totPacketRate; + Stats::Formula txBandwidth; + Stats::Formula rxBandwidth; + Stats::Formula txPacketRate; + Stats::Formula rxPacketRate; + Stats::Scalar postedSwi; + Stats::Formula coalescedSwi; + Stats::Scalar totalSwi; + Stats::Scalar postedRxIdle; + Stats::Formula coalescedRxIdle; + Stats::Scalar totalRxIdle; + Stats::Scalar postedRxOk; + Stats::Formula coalescedRxOk; + Stats::Scalar totalRxOk; + Stats::Scalar postedRxDesc; + Stats::Formula coalescedRxDesc; + Stats::Scalar totalRxDesc; + Stats::Scalar postedTxOk; + Stats::Formula coalescedTxOk; + Stats::Scalar totalTxOk; + Stats::Scalar postedTxIdle; + Stats::Formula coalescedTxIdle; + Stats::Scalar totalTxIdle; + Stats::Scalar postedTxDesc; + Stats::Formula coalescedTxDesc; + Stats::Scalar totalTxDesc; + Stats::Scalar postedRxOrn; + Stats::Formula coalescedRxOrn; + Stats::Scalar totalRxOrn; + Stats::Formula coalescedTotal; + Stats::Scalar postedInterrupts; + Stats::Scalar droppedPackets; +}; + +/** + * Dummy class to keep the Python class hierarchy in sync with the C++ + * object hierarchy. + * + * The Python object hierarchy includes the EtherDevBase class which + * is used by some ethernet devices as a way to share common + * configuration information in the generated param structs. Since the + * Python hierarchy is used to generate a SWIG interface for all C++ + * SimObjects, we need to reflect this in the C++ object hierarchy. If + * we don't, SWIG might end up doing 'bad things' when it down casts + * ethernet objects to their base class(es). + */ +class EtherDevBase : public EtherDevice +{ + public: + EtherDevBase(const EtherDevBaseParams *params) + : EtherDevice(params) + {} + + const EtherDevBaseParams * + params() const + { + return dynamic_cast(_params); + } + +}; + +#endif // __DEV_NET_ETHERDEVICE_HH__ + diff --git a/src/dev/net/etherdump.cc b/src/dev/net/etherdump.cc new file mode 100644 index 000000000..c537fbf57 --- /dev/null +++ b/src/dev/net/etherdump.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Simple object for creating a simple pcap style packet trace + */ +#include "dev/net/etherdump.hh" + +#include + +#include +#include + +#include "base/misc.hh" +#include "base/output.hh" +#include "sim/core.hh" + +using std::string; + +EtherDump::EtherDump(const Params *p) + : SimObject(p), stream(simout.create(p->file, true)), + maxlen(p->maxlen) +{ +} + +#define DLT_EN10MB 1 // Ethernet (10Mb) +#define TCPDUMP_MAGIC 0xa1b2c3d4 +#define PCAP_VERSION_MAJOR 2 +#define PCAP_VERSION_MINOR 4 + +struct pcap_file_header { + uint32_t magic; + uint16_t version_major; + uint16_t version_minor; + int32_t thiszone; // gmt to local correction + uint32_t sigfigs; // accuracy of timestamps + uint32_t snaplen; // max length saved portion of each pkt + uint32_t linktype; // data link type (DLT_*) +}; + +struct pcap_pkthdr { + uint32_t seconds; + uint32_t microseconds; + uint32_t caplen; // length of portion present + uint32_t len; // length this packet (off wire) +}; + +void +EtherDump::init() +{ + struct pcap_file_header hdr; + hdr.magic = TCPDUMP_MAGIC; + hdr.version_major = PCAP_VERSION_MAJOR; + hdr.version_minor = PCAP_VERSION_MINOR; + + hdr.thiszone = 0; + hdr.snaplen = 1500; + hdr.sigfigs = 0; + hdr.linktype = DLT_EN10MB; + + stream->write(reinterpret_cast(&hdr), sizeof(hdr)); + + stream->flush(); +} + +void +EtherDump::dumpPacket(EthPacketPtr &packet) +{ + pcap_pkthdr pkthdr; + pkthdr.seconds = curTick() / SimClock::Int::s; + pkthdr.microseconds = (curTick() / SimClock::Int::us) % ULL(1000000); + pkthdr.caplen = std::min(packet->length, maxlen); + pkthdr.len = packet->length; + stream->write(reinterpret_cast(&pkthdr), sizeof(pkthdr)); + stream->write(reinterpret_cast(packet->data), pkthdr.caplen); + stream->flush(); +} + +EtherDump * +EtherDumpParams::create() +{ + return new EtherDump(this); +} diff --git a/src/dev/net/etherdump.hh b/src/dev/net/etherdump.hh new file mode 100644 index 000000000..8e651baf3 --- /dev/null +++ b/src/dev/net/etherdump.hh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Simple object for creating a simple pcap style packet trace + */ + +#ifndef __DEV_NET_ETHERDUMP_HH__ +#define __DEV_NET_ETHERDUMP_HH__ + +#include + +#include "dev/net/etherpkt.hh" +#include "params/EtherDump.hh" +#include "sim/sim_object.hh" + +/* + * Simple object for creating a simple pcap style packet trace + */ +class EtherDump : public SimObject +{ + private: + std::ostream *stream; + const unsigned maxlen; + void dumpPacket(EthPacketPtr &packet); + void init(); + + public: + typedef EtherDumpParams Params; + EtherDump(const Params *p); + + inline void dump(EthPacketPtr &pkt) { dumpPacket(pkt); } +}; + +#endif // __DEV_NET_ETHERDUMP_HH__ diff --git a/src/dev/net/etherint.cc b/src/dev/net/etherint.cc new file mode 100644 index 000000000..b5990c7a0 --- /dev/null +++ b/src/dev/net/etherint.cc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#include "dev/net/etherint.hh" + +#include "base/misc.hh" +#include "sim/sim_object.hh" + +void +EtherInt::setPeer(EtherInt *p) +{ + if (peer && peer != p) + panic("You cannot change the peer once it is set.\n" + "Current peer=%s Desired peer=%s", peer->name(), p->name()); + + peer = p; +} diff --git a/src/dev/net/etherint.hh b/src/dev/net/etherint.hh new file mode 100644 index 000000000..a11d02681 --- /dev/null +++ b/src/dev/net/etherint.hh @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Class representing the actual interface between two ethernet + * components. + */ + +#ifndef __DEV_NET_ETHERINT_HH__ +#define __DEV_NET_ETHERINT_HH__ + +#include + +#include "dev/net/etherpkt.hh" + +/* + * Class representing the actual interface between two ethernet + * components. These components are intended to attach to another + * ethernet interface on one side and whatever device on the other. + */ +class EtherInt +{ + protected: + mutable std::string portName; + EtherInt *peer; + + public: + EtherInt(const std::string &name) + : portName(name), peer(NULL) {} + virtual ~EtherInt() {} + + /** Return port name (for DPRINTF). */ + const std::string &name() const { return portName; } + + void setPeer(EtherInt *p); + EtherInt* getPeer() { return peer; } + + void recvDone() { peer->sendDone(); } + virtual void sendDone() = 0; + + bool sendPacket(EthPacketPtr packet) + { return peer ? peer->recvPacket(packet) : true; } + virtual bool recvPacket(EthPacketPtr packet) = 0; + + bool askBusy() {return peer->isBusy(); } + virtual bool isBusy() { return false; } +}; + +#endif // __DEV_NET_ETHERINT_HH__ diff --git a/src/dev/net/etherlink.cc b/src/dev/net/etherlink.cc new file mode 100644 index 000000000..c327a0168 --- /dev/null +++ b/src/dev/net/etherlink.cc @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + * Ron Dreslinski + */ + +/* @file + * Device module for modelling a fixed bandwidth full duplex ethernet link + */ + +#include "dev/net/etherlink.hh" + +#include +#include +#include +#include + +#include "base/random.hh" +#include "base/trace.hh" +#include "debug/Ethernet.hh" +#include "debug/EthernetData.hh" +#include "dev/net/etherdump.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherpkt.hh" +#include "params/EtherLink.hh" +#include "sim/core.hh" +#include "sim/serialize.hh" +#include "sim/system.hh" + +using namespace std; + +EtherLink::EtherLink(const Params *p) + : EtherObject(p) +{ + link[0] = new Link(name() + ".link0", this, 0, p->speed, + p->delay, p->delay_var, p->dump); + link[1] = new Link(name() + ".link1", this, 1, p->speed, + p->delay, p->delay_var, p->dump); + + interface[0] = new Interface(name() + ".int0", link[0], link[1]); + interface[1] = new Interface(name() + ".int1", link[1], link[0]); +} + + +EtherLink::~EtherLink() +{ + delete link[0]; + delete link[1]; + + delete interface[0]; + delete interface[1]; +} + +EtherInt* +EtherLink::getEthPort(const std::string &if_name, int idx) +{ + Interface *i; + if (if_name == "int0") + i = interface[0]; + else if (if_name == "int1") + i = interface[1]; + else + return NULL; + if (i->getPeer()) + panic("interface already connected to\n"); + + return i; +} + + +EtherLink::Interface::Interface(const string &name, Link *tx, Link *rx) + : EtherInt(name), txlink(tx) +{ + tx->setTxInt(this); + rx->setRxInt(this); +} + +EtherLink::Link::Link(const string &name, EtherLink *p, int num, + double rate, Tick delay, Tick delay_var, EtherDump *d) + : objName(name), parent(p), number(num), txint(NULL), rxint(NULL), + ticksPerByte(rate), linkDelay(delay), delayVar(delay_var), dump(d), + doneEvent(this), txQueueEvent(this) +{ } + +void +EtherLink::serialize(CheckpointOut &cp) const +{ + link[0]->serialize("link0", cp); + link[1]->serialize("link1", cp); +} + +void +EtherLink::unserialize(CheckpointIn &cp) +{ + link[0]->unserialize("link0", cp); + link[1]->unserialize("link1", cp); +} + +void +EtherLink::Link::txComplete(EthPacketPtr packet) +{ + DPRINTF(Ethernet, "packet received: len=%d\n", packet->length); + DDUMP(EthernetData, packet->data, packet->length); + rxint->sendPacket(packet); +} + +void +EtherLink::Link::txDone() +{ + if (dump) + dump->dump(packet); + + if (linkDelay > 0) { + DPRINTF(Ethernet, "packet delayed: delay=%d\n", linkDelay); + txQueue.emplace_back(std::make_pair(curTick() + linkDelay, packet)); + if (!txQueueEvent.scheduled()) + parent->schedule(txQueueEvent, txQueue.front().first); + } else { + assert(txQueue.empty()); + txComplete(packet); + } + + packet = 0; + assert(!busy()); + + txint->sendDone(); +} + +void +EtherLink::Link::processTxQueue() +{ + auto cur(txQueue.front()); + txQueue.pop_front(); + + // Schedule a new event to process the next packet in the queue. + if (!txQueue.empty()) { + auto next(txQueue.front()); + assert(next.first > curTick()); + parent->schedule(txQueueEvent, next.first); + } + + assert(cur.first == curTick()); + txComplete(cur.second); +} + +bool +EtherLink::Link::transmit(EthPacketPtr pkt) +{ + if (busy()) { + DPRINTF(Ethernet, "packet not sent, link busy\n"); + return false; + } + + DPRINTF(Ethernet, "packet sent: len=%d\n", pkt->length); + DDUMP(EthernetData, pkt->data, pkt->length); + + packet = pkt; + Tick delay = (Tick)ceil(((double)pkt->length * ticksPerByte) + 1.0); + if (delayVar != 0) + delay += random_mt.random(0, delayVar); + + DPRINTF(Ethernet, "scheduling packet: delay=%d, (rate=%f)\n", + delay, ticksPerByte); + parent->schedule(doneEvent, curTick() + delay); + + return true; +} + +void +EtherLink::Link::serialize(const string &base, CheckpointOut &cp) const +{ + bool packet_exists = packet != nullptr; + paramOut(cp, base + ".packet_exists", packet_exists); + if (packet_exists) + packet->serialize(base + ".packet", cp); + + bool event_scheduled = doneEvent.scheduled(); + paramOut(cp, base + ".event_scheduled", event_scheduled); + if (event_scheduled) { + Tick event_time = doneEvent.when(); + paramOut(cp, base + ".event_time", event_time); + } + + const size_t tx_queue_size(txQueue.size()); + paramOut(cp, base + ".tx_queue_size", tx_queue_size); + unsigned idx(0); + for (const auto &pe : txQueue) { + paramOut(cp, csprintf("%s.txQueue[%i].tick", base, idx), pe.first); + pe.second->serialize(csprintf("%s.txQueue[%i].packet", base, idx), cp); + + ++idx; + } +} + +void +EtherLink::Link::unserialize(const string &base, CheckpointIn &cp) +{ + bool packet_exists; + paramIn(cp, base + ".packet_exists", packet_exists); + if (packet_exists) { + packet = make_shared(16384); + packet->unserialize(base + ".packet", cp); + } + + bool event_scheduled; + paramIn(cp, base + ".event_scheduled", event_scheduled); + if (event_scheduled) { + Tick event_time; + paramIn(cp, base + ".event_time", event_time); + parent->schedule(doneEvent, event_time); + } + + size_t tx_queue_size; + if (optParamIn(cp, base + ".tx_queue_size", tx_queue_size)) { + for (size_t idx = 0; idx < tx_queue_size; ++idx) { + Tick tick; + EthPacketPtr delayed_packet = make_shared(16384); + + paramIn(cp, csprintf("%s.txQueue[%i].tick", base, idx), tick); + delayed_packet->unserialize( + csprintf("%s.txQueue[%i].packet", base, idx), cp); + + fatal_if(!txQueue.empty() && txQueue.back().first > tick, + "Invalid txQueue packet order in EtherLink!\n"); + txQueue.emplace_back(std::make_pair(tick, delayed_packet)); + } + + if (!txQueue.empty()) + parent->schedule(txQueueEvent, txQueue.front().first); + } else { + // We can't reliably convert in-flight packets from old + // checkpoints. In fact, gem5 hasn't been able to load these + // packets for at least two years before the format change. + warn("Old-style EtherLink serialization format detected, " + "in-flight packets may have been dropped.\n"); + } +} + +EtherLink * +EtherLinkParams::create() +{ + return new EtherLink(this); +} diff --git a/src/dev/net/etherlink.hh b/src/dev/net/etherlink.hh new file mode 100644 index 000000000..1d02dec3b --- /dev/null +++ b/src/dev/net/etherlink.hh @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Device module for modelling a fixed bandwidth full duplex ethernet link + */ + +#ifndef __DEV_NET_ETHERLINK_HH__ +#define __DEV_NET_ETHERLINK_HH__ + +#include + +#include "base/types.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherobject.hh" +#include "dev/net/etherpkt.hh" +#include "params/EtherLink.hh" +#include "sim/eventq.hh" +#include "sim/sim_object.hh" + +class EtherDump; +class Checkpoint; +/* + * Model for a fixed bandwidth full duplex ethernet link + */ +class EtherLink : public EtherObject +{ + protected: + class Interface; + + /* + * Model for a single uni-directional link + */ + class Link + { + protected: + const std::string objName; + + EtherLink *const parent; + const int number; + + Interface *txint; + Interface *rxint; + + const double ticksPerByte; + const Tick linkDelay; + const Tick delayVar; + EtherDump *const dump; + + protected: + /* + * Transfer is complete + */ + EthPacketPtr packet; + void txDone(); + typedef EventWrapper DoneEvent; + friend void DoneEvent::process(); + DoneEvent doneEvent; + + /** + * Maintain a queue of in-flight packets. Assume that the + * delay is non-zero and constant (i.e., at most one packet + * per tick). + */ + std::deque> txQueue; + + void processTxQueue(); + typedef EventWrapper TxQueueEvent; + friend void TxQueueEvent::process(); + TxQueueEvent txQueueEvent; + + void txComplete(EthPacketPtr packet); + + public: + Link(const std::string &name, EtherLink *p, int num, + double rate, Tick delay, Tick delay_var, EtherDump *dump); + ~Link() {} + + const std::string name() const { return objName; } + + bool busy() const { return (bool)packet; } + bool transmit(EthPacketPtr packet); + + void setTxInt(Interface *i) { assert(!txint); txint = i; } + void setRxInt(Interface *i) { assert(!rxint); rxint = i; } + + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); + }; + + /* + * Interface at each end of the link + */ + class Interface : public EtherInt + { + private: + Link *txlink; + + public: + Interface(const std::string &name, Link *txlink, Link *rxlink); + bool recvPacket(EthPacketPtr packet) { return txlink->transmit(packet); } + void sendDone() { peer->sendDone(); } + bool isBusy() { return txlink->busy(); } + }; + + Link *link[2]; + Interface *interface[2]; + + public: + typedef EtherLinkParams Params; + EtherLink(const Params *p); + virtual ~EtherLink(); + + const Params * + params() const + { + return dynamic_cast(_params); + } + + EtherInt *getEthPort(const std::string &if_name, int idx) override; + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + +}; + +#endif // __DEV_NET_ETHERLINK_HH__ diff --git a/src/dev/net/etherobject.hh b/src/dev/net/etherobject.hh new file mode 100644 index 000000000..55cfa97e6 --- /dev/null +++ b/src/dev/net/etherobject.hh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2007 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +/** + * @file + * Base Ethernet Object declaration. + */ + +#ifndef __DEV_NET_ETHEROBJECT_HH__ +#define __DEV_NET_ETHEROBJECT_HH__ + +#include "params/EtherObject.hh" +#include "sim/sim_object.hh" + +class EtherInt; + +/** + * The base EtherObject class, allows for an accesor function to a + * simobj that returns the Port. + */ +class EtherObject : public SimObject +{ + public: + typedef EtherObjectParams Params; + EtherObject(const Params *params) + : SimObject(params) {} + + const Params * + params() const + { + return dynamic_cast(_params); + } + + public: + /** Additional function to return the Port of a memory object. */ + virtual EtherInt *getEthPort(const std::string &if_name, int idx = -1) = 0; + +}; + +#endif // __DEV_NET_ETHEROBJECT_HH__ diff --git a/src/dev/net/etherpkt.cc b/src/dev/net/etherpkt.cc new file mode 100644 index 000000000..a16f572c5 --- /dev/null +++ b/src/dev/net/etherpkt.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#include "dev/net/etherpkt.hh" + +#include + +#include "base/inet.hh" +#include "base/misc.hh" +#include "sim/serialize.hh" + +using namespace std; + +void +EthPacketData::serialize(const string &base, CheckpointOut &cp) const +{ + paramOut(cp, base + ".length", length); + arrayParamOut(cp, base + ".data", data, length); +} + +void +EthPacketData::unserialize(const string &base, CheckpointIn &cp) +{ + paramIn(cp, base + ".length", length); + if (length) + arrayParamIn(cp, base + ".data", data, length); +} + +void +EthPacketData::packAddress(uint8_t *src_addr, + uint8_t *dst_addr, + unsigned &nbytes) +{ + Net::EthHdr *hdr = (Net::EthHdr *)data; + assert(hdr->src().size() == hdr->dst().size()); + if (nbytes < hdr->src().size()) + panic("EthPacketData::packAddress() Buffer overflow"); + + memcpy(dst_addr, hdr->dst().bytes(), hdr->dst().size()); + memcpy(src_addr, hdr->src().bytes(), hdr->src().size()); + + nbytes = hdr->src().size(); +} + diff --git a/src/dev/net/etherpkt.hh b/src/dev/net/etherpkt.hh new file mode 100644 index 000000000..4119578c3 --- /dev/null +++ b/src/dev/net/etherpkt.hh @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + * Lisa Hsu + */ + +/* @file + * Reference counted class containing ethernet packet data + */ + +#ifndef __DEV_NET_ETHERPKT_HH__ +#define __DEV_NET_ETHERPKT_HH__ + +#include +#include +#include + +#include "base/types.hh" +#include "sim/serialize.hh" + +/* + * Reference counted class containing ethernet packet data + */ +class EthPacketData +{ + public: + /* + * Pointer to packet data will be deleted + */ + uint8_t *data; + + /* + * Length of the current packet + */ + unsigned length; + + public: + EthPacketData() + : data(NULL), length(0) + { } + + explicit EthPacketData(unsigned size) + : data(new uint8_t[size]), length(0) + { } + + ~EthPacketData() { if (data) delete [] data; } + + public: + /** + * This function pulls out the MAC source and destination addresses from + * the packet data and stores them in the caller specified buffers. + * + * @param src_addr The buffer to store the source MAC address. + * @param dst_addr The buffer to store the destination MAC address. + * @param length This is an inout parameter. The caller stores in this + * the size of the address buffers. On return, this will contain the + * actual address size stored in the buffers. (We assume that source + * address size is equal to that of the destination address.) + */ + void packAddress(uint8_t *src_addr, uint8_t *dst_addr, unsigned &length); + + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); + + unsigned size() const { return length; } +}; + +typedef std::shared_ptr EthPacketPtr; + +#endif // __DEV_NET_ETHERPKT_HH__ diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc new file mode 100644 index 000000000..e8d6a363c --- /dev/null +++ b/src/dev/net/ethertap.cc @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Interface to connect a simulated ethernet device to the real world + */ + +#include "dev/net/ethertap.hh" + +#if defined(__OpenBSD__) || defined(__APPLE__) +#include + +#endif +#include +#include + +#include +#include + +#include "base/misc.hh" +#include "base/pollevent.hh" +#include "base/socket.hh" +#include "base/trace.hh" +#include "debug/Ethernet.hh" +#include "debug/EthernetData.hh" +#include "dev/net/etherdump.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherpkt.hh" + +using namespace std; + +/** + */ +class TapListener +{ + protected: + /** + */ + class Event : public PollEvent + { + protected: + TapListener *listener; + + public: + Event(TapListener *l, int fd, int e) + : PollEvent(fd, e), listener(l) {} + + virtual void process(int revent) { listener->accept(); } + }; + + friend class Event; + Event *event; + + protected: + ListenSocket listener; + EtherTap *tap; + int port; + + public: + TapListener(EtherTap *t, int p) + : event(NULL), tap(t), port(p) {} + ~TapListener() { if (event) delete event; } + + void accept(); + void listen(); +}; + +void +TapListener::listen() +{ + while (!listener.listen(port, true)) { + DPRINTF(Ethernet, "TapListener(listen): Can't bind port %d\n", port); + port++; + } + + ccprintf(cerr, "Listening for tap connection on port %d\n", port); + event = new Event(this, listener.getfd(), POLLIN|POLLERR); + pollQueue.schedule(event); +} + +void +TapListener::accept() +{ + // As a consequence of being called from the PollQueue, we might + // have been called from a different thread. Migrate to "our" + // thread. + EventQueue::ScopedMigration migrate(tap->eventQueue()); + + if (!listener.islistening()) + panic("TapListener(accept): cannot accept if we're not listening!"); + + int sfd = listener.accept(true); + if (sfd != -1) + tap->attach(sfd); +} + +/** + */ +class TapEvent : public PollEvent +{ + protected: + EtherTap *tap; + + public: + TapEvent(EtherTap *_tap, int fd, int e) + : PollEvent(fd, e), tap(_tap) {} + virtual void process(int revent) { tap->process(revent); } +}; + +EtherTap::EtherTap(const Params *p) + : EtherObject(p), event(NULL), socket(-1), buflen(p->bufsz), dump(p->dump), + interface(NULL), txEvent(this) +{ + if (ListenSocket::allDisabled()) + fatal("All listeners are disabled! EtherTap can't work!"); + + buffer = new char[buflen]; + listener = new TapListener(this, p->port); + listener->listen(); + interface = new EtherTapInt(name() + ".interface", this); +} + +EtherTap::~EtherTap() +{ + if (event) + delete event; + if (buffer) + delete [] buffer; + + delete interface; + delete listener; +} + +void +EtherTap::attach(int fd) +{ + if (socket != -1) + close(fd); + + buffer_offset = 0; + data_len = 0; + socket = fd; + DPRINTF(Ethernet, "EtherTap attached\n"); + event = new TapEvent(this, socket, POLLIN|POLLERR); + pollQueue.schedule(event); +} + +void +EtherTap::detach() +{ + DPRINTF(Ethernet, "EtherTap detached\n"); + delete event; + event = 0; + close(socket); + socket = -1; +} + +bool +EtherTap::recvPacket(EthPacketPtr packet) +{ + if (dump) + dump->dump(packet); + + DPRINTF(Ethernet, "EtherTap output len=%d\n", packet->length); + DDUMP(EthernetData, packet->data, packet->length); + uint32_t len = htonl(packet->length); + ssize_t ret = write(socket, &len, sizeof(len)); + if (ret != sizeof(len)) + return false; + ret = write(socket, packet->data, packet->length); + if (ret != packet->length) + return false; + + interface->recvDone(); + + return true; +} + +void +EtherTap::sendDone() +{} + +void +EtherTap::process(int revent) +{ + if (revent & POLLERR) { + detach(); + return; + } + + char *data = buffer + sizeof(uint32_t); + if (!(revent & POLLIN)) + return; + + if (buffer_offset < data_len + sizeof(uint32_t)) { + int len = read(socket, buffer + buffer_offset, buflen - buffer_offset); + if (len == 0) { + detach(); + return; + } + + buffer_offset += len; + + if (data_len == 0) + data_len = ntohl(*(uint32_t *)buffer); + + DPRINTF(Ethernet, "Received data from peer: len=%d buffer_offset=%d " + "data_len=%d\n", len, buffer_offset, data_len); + } + + while (data_len != 0 && buffer_offset >= data_len + sizeof(uint32_t)) { + EthPacketPtr packet; + packet = make_shared(data_len); + packet->length = data_len; + memcpy(packet->data, data, data_len); + + buffer_offset -= data_len + sizeof(uint32_t); + assert(buffer_offset >= 0); + if (buffer_offset > 0) { + memmove(buffer, data + data_len, buffer_offset); + data_len = ntohl(*(uint32_t *)buffer); + } else + data_len = 0; + + DPRINTF(Ethernet, "EtherTap input len=%d\n", packet->length); + DDUMP(EthernetData, packet->data, packet->length); + if (!interface->sendPacket(packet)) { + DPRINTF(Ethernet, "bus busy...buffer for retransmission\n"); + packetBuffer.push(packet); + if (!txEvent.scheduled()) + schedule(txEvent, curTick() + retryTime); + } else if (dump) { + dump->dump(packet); + } + } +} + +void +EtherTap::retransmit() +{ + if (packetBuffer.empty()) + return; + + EthPacketPtr packet = packetBuffer.front(); + if (interface->sendPacket(packet)) { + if (dump) + dump->dump(packet); + DPRINTF(Ethernet, "EtherTap retransmit\n"); + packetBuffer.front() = NULL; + packetBuffer.pop(); + } + + if (!packetBuffer.empty() && !txEvent.scheduled()) + schedule(txEvent, curTick() + retryTime); +} + +EtherInt* +EtherTap::getEthPort(const std::string &if_name, int idx) +{ + if (if_name == "tap") { + if (interface->getPeer()) + panic("Interface already connected to\n"); + return interface; + } + return NULL; +} + + +//===================================================================== + +void +EtherTap::serialize(CheckpointOut &cp) const +{ + SERIALIZE_SCALAR(socket); + SERIALIZE_SCALAR(buflen); + uint8_t *buffer = (uint8_t *)this->buffer; + SERIALIZE_ARRAY(buffer, buflen); + SERIALIZE_SCALAR(buffer_offset); + SERIALIZE_SCALAR(data_len); + + bool tapevent_present = false; + if (event) { + tapevent_present = true; + SERIALIZE_SCALAR(tapevent_present); + event->serialize(cp); + } + else { + SERIALIZE_SCALAR(tapevent_present); + } +} + +void +EtherTap::unserialize(CheckpointIn &cp) +{ + UNSERIALIZE_SCALAR(socket); + UNSERIALIZE_SCALAR(buflen); + uint8_t *buffer = (uint8_t *)this->buffer; + UNSERIALIZE_ARRAY(buffer, buflen); + UNSERIALIZE_SCALAR(buffer_offset); + UNSERIALIZE_SCALAR(data_len); + + bool tapevent_present; + UNSERIALIZE_SCALAR(tapevent_present); + if (tapevent_present) { + event = new TapEvent(this, socket, POLLIN|POLLERR); + + event->unserialize(cp); + + if (event->queued()) { + pollQueue.schedule(event); + } + } +} + +//===================================================================== + +EtherTap * +EtherTapParams::create() +{ + return new EtherTap(this); +} diff --git a/src/dev/net/ethertap.hh b/src/dev/net/ethertap.hh new file mode 100644 index 000000000..b27b80f84 --- /dev/null +++ b/src/dev/net/ethertap.hh @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/* @file + * Interface to connect a simulated ethernet device to the real world + */ + +#ifndef __DEV_NET_ETHERTAP_HH__ +#define __DEV_NET_ETHERTAP_HH__ + +#include +#include + +#include "base/pollevent.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherobject.hh" +#include "dev/net/etherpkt.hh" +#include "params/EtherTap.hh" +#include "sim/eventq.hh" +#include "sim/sim_object.hh" + +class TapEvent; +class TapListener; +class EtherTapInt; + +/* + * Interface to connect a simulated ethernet device to the real world + */ +class EtherTap : public EtherObject +{ + protected: + friend class TapEvent; + TapEvent *event; + + protected: + friend class TapListener; + TapListener *listener; + int socket; + char *buffer; + int buflen; + uint32_t buffer_offset; + uint32_t data_len; + + EtherDump *dump; + + void attach(int fd); + void detach(); + + protected: + std::string device; + std::queue packetBuffer; + EtherTapInt *interface; + + void process(int revent); + void enqueue(EthPacketData *packet); + void retransmit(); + + /* + */ + class TxEvent : public Event + { + protected: + EtherTap *tap; + + public: + TxEvent(EtherTap *_tap) : tap(_tap) {} + void process() { tap->retransmit(); } + virtual const char *description() const + { return "EtherTap retransmit"; } + }; + + friend class TxEvent; + TxEvent txEvent; + + public: + typedef EtherTapParams Params; + EtherTap(const Params *p); + virtual ~EtherTap(); + + const Params * + params() const + { + return dynamic_cast(_params); + } + + EtherInt *getEthPort(const std::string &if_name, int idx) override; + + virtual bool recvPacket(EthPacketPtr packet); + virtual void sendDone(); + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; +}; + +class EtherTapInt : public EtherInt +{ + private: + EtherTap *tap; + public: + EtherTapInt(const std::string &name, EtherTap *t) + : EtherInt(name), tap(t) + { } + + virtual bool recvPacket(EthPacketPtr pkt) { return tap->recvPacket(pkt); } + virtual void sendDone() { tap->sendDone(); } +}; + + +#endif // __DEV_NET_ETHERTAP_HH__ diff --git a/src/dev/net/i8254xGBe.cc b/src/dev/net/i8254xGBe.cc new file mode 100644 index 000000000..cba773f39 --- /dev/null +++ b/src/dev/net/i8254xGBe.cc @@ -0,0 +1,2562 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +/* @file + * Device model for Intel's 8254x line of gigabit ethernet controllers. + * In particular an 82547 revision 2 (82547GI) MAC because it seems to have the + * fewest workarounds in the driver. It will probably work with most of the + * other MACs with slight modifications. + */ + +#include "dev/net/i8254xGBe.hh" + +/* + * @todo really there are multiple dma engines.. we should implement them. + */ + +#include +#include + +#include "base/inet.hh" +#include "base/trace.hh" +#include "debug/Drain.hh" +#include "debug/EthernetAll.hh" +#include "mem/packet.hh" +#include "mem/packet_access.hh" +#include "params/IGbE.hh" +#include "sim/stats.hh" +#include "sim/system.hh" + +using namespace iGbReg; +using namespace Net; + +IGbE::IGbE(const Params *p) + : EtherDevice(p), etherInt(NULL), cpa(NULL), + rxFifo(p->rx_fifo_size), txFifo(p->tx_fifo_size), rxTick(false), + txTick(false), txFifoTick(false), rxDmaPacket(false), pktOffset(0), + fetchDelay(p->fetch_delay), wbDelay(p->wb_delay), + fetchCompDelay(p->fetch_comp_delay), wbCompDelay(p->wb_comp_delay), + rxWriteDelay(p->rx_write_delay), txReadDelay(p->tx_read_delay), + rdtrEvent(this), radvEvent(this), + tadvEvent(this), tidvEvent(this), tickEvent(this), interEvent(this), + rxDescCache(this, name()+".RxDesc", p->rx_desc_cache_size), + txDescCache(this, name()+".TxDesc", p->tx_desc_cache_size), + lastInterrupt(0) +{ + etherInt = new IGbEInt(name() + ".int", this); + + // Initialized internal registers per Intel documentation + // All registers intialized to 0 by per register constructor + regs.ctrl.fd(1); + regs.ctrl.lrst(1); + regs.ctrl.speed(2); + regs.ctrl.frcspd(1); + regs.sts.speed(3); // Say we're 1000Mbps + regs.sts.fd(1); // full duplex + regs.sts.lu(1); // link up + regs.eecd.fwe(1); + regs.eecd.ee_type(1); + regs.imr = 0; + regs.iam = 0; + regs.rxdctl.gran(1); + regs.rxdctl.wthresh(1); + regs.fcrth(1); + regs.tdwba = 0; + regs.rlpml = 0; + regs.sw_fw_sync = 0; + + regs.pba.rxa(0x30); + regs.pba.txa(0x10); + + eeOpBits = 0; + eeAddrBits = 0; + eeDataBits = 0; + eeOpcode = 0; + + // clear all 64 16 bit words of the eeprom + memset(&flash, 0, EEPROM_SIZE*2); + + // Set the MAC address + memcpy(flash, p->hardware_address.bytes(), ETH_ADDR_LEN); + for (int x = 0; x < ETH_ADDR_LEN/2; x++) + flash[x] = htobe(flash[x]); + + uint16_t csum = 0; + for (int x = 0; x < EEPROM_SIZE; x++) + csum += htobe(flash[x]); + + + // Magic happy checksum value + flash[EEPROM_SIZE-1] = htobe((uint16_t)(EEPROM_CSUM - csum)); + + // Store the MAC address as queue ID + macAddr = p->hardware_address; + + rxFifo.clear(); + txFifo.clear(); +} + +IGbE::~IGbE() +{ + delete etherInt; +} + +void +IGbE::init() +{ + cpa = CPA::cpa(); + PciDevice::init(); +} + +EtherInt* +IGbE::getEthPort(const std::string &if_name, int idx) +{ + + if (if_name == "interface") { + if (etherInt->getPeer()) + panic("Port already connected to\n"); + return etherInt; + } + return NULL; +} + +Tick +IGbE::writeConfig(PacketPtr pkt) +{ + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; + if (offset < PCI_DEVICE_SPECIFIC) + PciDevice::writeConfig(pkt); + else + panic("Device specific PCI config space not implemented.\n"); + + // + // Some work may need to be done here based for the pci COMMAND bits. + // + + return configDelay; +} + +// Handy macro for range-testing register access addresses +#define IN_RANGE(val, base, len) (val >= base && val < (base + len)) + +Tick +IGbE::read(PacketPtr pkt) +{ + int bar; + Addr daddr; + + if (!getBAR(pkt->getAddr(), bar, daddr)) + panic("Invalid PCI memory access to unmapped memory.\n"); + + // Only Memory register BAR is allowed + assert(bar == 0); + + // Only 32bit accesses allowed + assert(pkt->getSize() == 4); + + DPRINTF(Ethernet, "Read device register %#X\n", daddr); + + // + // Handle read of register here + // + + + switch (daddr) { + case REG_CTRL: + pkt->set(regs.ctrl()); + break; + case REG_STATUS: + pkt->set(regs.sts()); + break; + case REG_EECD: + pkt->set(regs.eecd()); + break; + case REG_EERD: + pkt->set(regs.eerd()); + break; + case REG_CTRL_EXT: + pkt->set(regs.ctrl_ext()); + break; + case REG_MDIC: + pkt->set(regs.mdic()); + break; + case REG_ICR: + DPRINTF(Ethernet, "Reading ICR. ICR=%#x IMR=%#x IAM=%#x IAME=%d\n", + regs.icr(), regs.imr, regs.iam, regs.ctrl_ext.iame()); + pkt->set(regs.icr()); + if (regs.icr.int_assert() || regs.imr == 0) { + regs.icr = regs.icr() & ~mask(30); + DPRINTF(Ethernet, "Cleared ICR. ICR=%#x\n", regs.icr()); + } + if (regs.ctrl_ext.iame() && regs.icr.int_assert()) + regs.imr &= ~regs.iam; + chkInterrupt(); + break; + case REG_EICR: + // This is only useful for MSI, but the driver reads it every time + // Just don't do anything + pkt->set(0); + break; + case REG_ITR: + pkt->set(regs.itr()); + break; + case REG_RCTL: + pkt->set(regs.rctl()); + break; + case REG_FCTTV: + pkt->set(regs.fcttv()); + break; + case REG_TCTL: + pkt->set(regs.tctl()); + break; + case REG_PBA: + pkt->set(regs.pba()); + break; + case REG_WUC: + case REG_LEDCTL: + pkt->set(0); // We don't care, so just return 0 + break; + case REG_FCRTL: + pkt->set(regs.fcrtl()); + break; + case REG_FCRTH: + pkt->set(regs.fcrth()); + break; + case REG_RDBAL: + pkt->set(regs.rdba.rdbal()); + break; + case REG_RDBAH: + pkt->set(regs.rdba.rdbah()); + break; + case REG_RDLEN: + pkt->set(regs.rdlen()); + break; + case REG_SRRCTL: + pkt->set(regs.srrctl()); + break; + case REG_RDH: + pkt->set(regs.rdh()); + break; + case REG_RDT: + pkt->set(regs.rdt()); + break; + case REG_RDTR: + pkt->set(regs.rdtr()); + if (regs.rdtr.fpd()) { + rxDescCache.writeback(0); + DPRINTF(EthernetIntr, + "Posting interrupt because of RDTR.FPD write\n"); + postInterrupt(IT_RXT); + regs.rdtr.fpd(0); + } + break; + case REG_RXDCTL: + pkt->set(regs.rxdctl()); + break; + case REG_RADV: + pkt->set(regs.radv()); + break; + case REG_TDBAL: + pkt->set(regs.tdba.tdbal()); + break; + case REG_TDBAH: + pkt->set(regs.tdba.tdbah()); + break; + case REG_TDLEN: + pkt->set(regs.tdlen()); + break; + case REG_TDH: + pkt->set(regs.tdh()); + break; + case REG_TXDCA_CTL: + pkt->set(regs.txdca_ctl()); + break; + case REG_TDT: + pkt->set(regs.tdt()); + break; + case REG_TIDV: + pkt->set(regs.tidv()); + break; + case REG_TXDCTL: + pkt->set(regs.txdctl()); + break; + case REG_TADV: + pkt->set(regs.tadv()); + break; + case REG_TDWBAL: + pkt->set(regs.tdwba & mask(32)); + break; + case REG_TDWBAH: + pkt->set(regs.tdwba >> 32); + break; + case REG_RXCSUM: + pkt->set(regs.rxcsum()); + break; + case REG_RLPML: + pkt->set(regs.rlpml); + break; + case REG_RFCTL: + pkt->set(regs.rfctl()); + break; + case REG_MANC: + pkt->set(regs.manc()); + break; + case REG_SWSM: + pkt->set(regs.swsm()); + regs.swsm.smbi(1); + break; + case REG_FWSM: + pkt->set(regs.fwsm()); + break; + case REG_SWFWSYNC: + pkt->set(regs.sw_fw_sync); + break; + default: + if (!IN_RANGE(daddr, REG_VFTA, VLAN_FILTER_TABLE_SIZE*4) && + !IN_RANGE(daddr, REG_RAL, RCV_ADDRESS_TABLE_SIZE*8) && + !IN_RANGE(daddr, REG_MTA, MULTICAST_TABLE_SIZE*4) && + !IN_RANGE(daddr, REG_CRCERRS, STATS_REGS_SIZE)) + panic("Read request to unknown register number: %#x\n", daddr); + else + pkt->set(0); + }; + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +IGbE::write(PacketPtr pkt) +{ + int bar; + Addr daddr; + + + if (!getBAR(pkt->getAddr(), bar, daddr)) + panic("Invalid PCI memory access to unmapped memory.\n"); + + // Only Memory register BAR is allowed + assert(bar == 0); + + // Only 32bit accesses allowed + assert(pkt->getSize() == sizeof(uint32_t)); + + DPRINTF(Ethernet, "Wrote device register %#X value %#X\n", + daddr, pkt->get()); + + // + // Handle write of register here + // + uint32_t val = pkt->get(); + + Regs::RCTL oldrctl; + Regs::TCTL oldtctl; + + switch (daddr) { + case REG_CTRL: + regs.ctrl = val; + if (regs.ctrl.tfce()) + warn("TX Flow control enabled, should implement\n"); + if (regs.ctrl.rfce()) + warn("RX Flow control enabled, should implement\n"); + break; + case REG_CTRL_EXT: + regs.ctrl_ext = val; + break; + case REG_STATUS: + regs.sts = val; + break; + case REG_EECD: + int oldClk; + oldClk = regs.eecd.sk(); + regs.eecd = val; + // See if this is a eeprom access and emulate accordingly + if (!oldClk && regs.eecd.sk()) { + if (eeOpBits < 8) { + eeOpcode = eeOpcode << 1 | regs.eecd.din(); + eeOpBits++; + } else if (eeAddrBits < 8 && eeOpcode == EEPROM_READ_OPCODE_SPI) { + eeAddr = eeAddr << 1 | regs.eecd.din(); + eeAddrBits++; + } else if (eeDataBits < 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) { + assert(eeAddr>>1 < EEPROM_SIZE); + DPRINTF(EthernetEEPROM, "EEPROM bit read: %d word: %#X\n", + flash[eeAddr>>1] >> eeDataBits & 0x1, + flash[eeAddr>>1]); + regs.eecd.dout((flash[eeAddr>>1] >> (15-eeDataBits)) & 0x1); + eeDataBits++; + } else if (eeDataBits < 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI) { + regs.eecd.dout(0); + eeDataBits++; + } else + panic("What's going on with eeprom interface? opcode:" + " %#x:%d addr: %#x:%d, data: %d\n", (uint32_t)eeOpcode, + (uint32_t)eeOpBits, (uint32_t)eeAddr, + (uint32_t)eeAddrBits, (uint32_t)eeDataBits); + + // Reset everything for the next command + if ((eeDataBits == 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) || + (eeDataBits == 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI)) { + eeOpBits = 0; + eeAddrBits = 0; + eeDataBits = 0; + eeOpcode = 0; + eeAddr = 0; + } + + DPRINTF(EthernetEEPROM, "EEPROM: opcode: %#X:%d addr: %#X:%d\n", + (uint32_t)eeOpcode, (uint32_t) eeOpBits, + (uint32_t)eeAddr>>1, (uint32_t)eeAddrBits); + if (eeOpBits == 8 && !(eeOpcode == EEPROM_READ_OPCODE_SPI || + eeOpcode == EEPROM_RDSR_OPCODE_SPI )) + panic("Unknown eeprom opcode: %#X:%d\n", (uint32_t)eeOpcode, + (uint32_t)eeOpBits); + + + } + // If driver requests eeprom access, immediately give it to it + regs.eecd.ee_gnt(regs.eecd.ee_req()); + break; + case REG_EERD: + regs.eerd = val; + if (regs.eerd.start()) { + regs.eerd.done(1); + assert(regs.eerd.addr() < EEPROM_SIZE); + regs.eerd.data(flash[regs.eerd.addr()]); + regs.eerd.start(0); + DPRINTF(EthernetEEPROM, "EEPROM: read addr: %#X data %#x\n", + regs.eerd.addr(), regs.eerd.data()); + } + break; + case REG_MDIC: + regs.mdic = val; + if (regs.mdic.i()) + panic("No support for interrupt on mdic complete\n"); + if (regs.mdic.phyadd() != 1) + panic("No support for reading anything but phy\n"); + DPRINTF(Ethernet, "%s phy address %x\n", + regs.mdic.op() == 1 ? "Writing" : "Reading", + regs.mdic.regadd()); + switch (regs.mdic.regadd()) { + case PHY_PSTATUS: + regs.mdic.data(0x796D); // link up + break; + case PHY_PID: + regs.mdic.data(params()->phy_pid); + break; + case PHY_EPID: + regs.mdic.data(params()->phy_epid); + break; + case PHY_GSTATUS: + regs.mdic.data(0x7C00); + break; + case PHY_EPSTATUS: + regs.mdic.data(0x3000); + break; + case PHY_AGC: + regs.mdic.data(0x180); // some random length + break; + default: + regs.mdic.data(0); + } + regs.mdic.r(1); + break; + case REG_ICR: + DPRINTF(Ethernet, "Writing ICR. ICR=%#x IMR=%#x IAM=%#x IAME=%d\n", + regs.icr(), regs.imr, regs.iam, regs.ctrl_ext.iame()); + if (regs.ctrl_ext.iame()) + regs.imr &= ~regs.iam; + regs.icr = ~bits(val,30,0) & regs.icr(); + chkInterrupt(); + break; + case REG_ITR: + regs.itr = val; + break; + case REG_ICS: + DPRINTF(EthernetIntr, "Posting interrupt because of ICS write\n"); + postInterrupt((IntTypes)val); + break; + case REG_IMS: + regs.imr |= val; + chkInterrupt(); + break; + case REG_IMC: + regs.imr &= ~val; + chkInterrupt(); + break; + case REG_IAM: + regs.iam = val; + break; + case REG_RCTL: + oldrctl = regs.rctl; + regs.rctl = val; + if (regs.rctl.rst()) { + rxDescCache.reset(); + DPRINTF(EthernetSM, "RXS: Got RESET!\n"); + rxFifo.clear(); + regs.rctl.rst(0); + } + if (regs.rctl.en()) + rxTick = true; + restartClock(); + break; + case REG_FCTTV: + regs.fcttv = val; + break; + case REG_TCTL: + regs.tctl = val; + oldtctl = regs.tctl; + regs.tctl = val; + if (regs.tctl.en()) + txTick = true; + restartClock(); + if (regs.tctl.en() && !oldtctl.en()) { + txDescCache.reset(); + } + break; + case REG_PBA: + regs.pba.rxa(val); + regs.pba.txa(64 - regs.pba.rxa()); + break; + case REG_WUC: + case REG_LEDCTL: + case REG_FCAL: + case REG_FCAH: + case REG_FCT: + case REG_VET: + case REG_AIFS: + case REG_TIPG: + ; // We don't care, so don't store anything + break; + case REG_IVAR0: + warn("Writing to IVAR0, ignoring...\n"); + break; + case REG_FCRTL: + regs.fcrtl = val; + break; + case REG_FCRTH: + regs.fcrth = val; + break; + case REG_RDBAL: + regs.rdba.rdbal( val & ~mask(4)); + rxDescCache.areaChanged(); + break; + case REG_RDBAH: + regs.rdba.rdbah(val); + rxDescCache.areaChanged(); + break; + case REG_RDLEN: + regs.rdlen = val & ~mask(7); + rxDescCache.areaChanged(); + break; + case REG_SRRCTL: + regs.srrctl = val; + break; + case REG_RDH: + regs.rdh = val; + rxDescCache.areaChanged(); + break; + case REG_RDT: + regs.rdt = val; + DPRINTF(EthernetSM, "RXS: RDT Updated.\n"); + if (drainState() == DrainState::Running) { + DPRINTF(EthernetSM, "RXS: RDT Fetching Descriptors!\n"); + rxDescCache.fetchDescriptors(); + } else { + DPRINTF(EthernetSM, "RXS: RDT NOT Fetching Desc b/c draining!\n"); + } + break; + case REG_RDTR: + regs.rdtr = val; + break; + case REG_RADV: + regs.radv = val; + break; + case REG_RXDCTL: + regs.rxdctl = val; + break; + case REG_TDBAL: + regs.tdba.tdbal( val & ~mask(4)); + txDescCache.areaChanged(); + break; + case REG_TDBAH: + regs.tdba.tdbah(val); + txDescCache.areaChanged(); + break; + case REG_TDLEN: + regs.tdlen = val & ~mask(7); + txDescCache.areaChanged(); + break; + case REG_TDH: + regs.tdh = val; + txDescCache.areaChanged(); + break; + case REG_TXDCA_CTL: + regs.txdca_ctl = val; + if (regs.txdca_ctl.enabled()) + panic("No support for DCA\n"); + break; + case REG_TDT: + regs.tdt = val; + DPRINTF(EthernetSM, "TXS: TX Tail pointer updated\n"); + if (drainState() == DrainState::Running) { + DPRINTF(EthernetSM, "TXS: TDT Fetching Descriptors!\n"); + txDescCache.fetchDescriptors(); + } else { + DPRINTF(EthernetSM, "TXS: TDT NOT Fetching Desc b/c draining!\n"); + } + break; + case REG_TIDV: + regs.tidv = val; + break; + case REG_TXDCTL: + regs.txdctl = val; + break; + case REG_TADV: + regs.tadv = val; + break; + case REG_TDWBAL: + regs.tdwba &= ~mask(32); + regs.tdwba |= val; + txDescCache.completionWriteback(regs.tdwba & ~mask(1), + regs.tdwba & mask(1)); + break; + case REG_TDWBAH: + regs.tdwba &= mask(32); + regs.tdwba |= (uint64_t)val << 32; + txDescCache.completionWriteback(regs.tdwba & ~mask(1), + regs.tdwba & mask(1)); + break; + case REG_RXCSUM: + regs.rxcsum = val; + break; + case REG_RLPML: + regs.rlpml = val; + break; + case REG_RFCTL: + regs.rfctl = val; + if (regs.rfctl.exsten()) + panic("Extended RX descriptors not implemented\n"); + break; + case REG_MANC: + regs.manc = val; + break; + case REG_SWSM: + regs.swsm = val; + if (regs.fwsm.eep_fw_semaphore()) + regs.swsm.swesmbi(0); + break; + case REG_SWFWSYNC: + regs.sw_fw_sync = val; + break; + default: + if (!IN_RANGE(daddr, REG_VFTA, VLAN_FILTER_TABLE_SIZE*4) && + !IN_RANGE(daddr, REG_RAL, RCV_ADDRESS_TABLE_SIZE*8) && + !IN_RANGE(daddr, REG_MTA, MULTICAST_TABLE_SIZE*4)) + panic("Write request to unknown register number: %#x\n", daddr); + }; + + pkt->makeAtomicResponse(); + return pioDelay; +} + +void +IGbE::postInterrupt(IntTypes t, bool now) +{ + assert(t); + + // Interrupt is already pending + if (t & regs.icr() && !now) + return; + + regs.icr = regs.icr() | t; + + Tick itr_interval = SimClock::Int::ns * 256 * regs.itr.interval(); + DPRINTF(EthernetIntr, + "EINT: postInterrupt() curTick(): %d itr: %d interval: %d\n", + curTick(), regs.itr.interval(), itr_interval); + + if (regs.itr.interval() == 0 || now || + lastInterrupt + itr_interval <= curTick()) { + if (interEvent.scheduled()) { + deschedule(interEvent); + } + cpuPostInt(); + } else { + Tick int_time = lastInterrupt + itr_interval; + assert(int_time > 0); + DPRINTF(EthernetIntr, "EINT: Scheduling timer interrupt for tick %d\n", + int_time); + if (!interEvent.scheduled()) { + schedule(interEvent, int_time); + } + } +} + +void +IGbE::delayIntEvent() +{ + cpuPostInt(); +} + + +void +IGbE::cpuPostInt() +{ + + postedInterrupts++; + + if (!(regs.icr() & regs.imr)) { + DPRINTF(Ethernet, "Interrupt Masked. Not Posting\n"); + return; + } + + DPRINTF(Ethernet, "Posting Interrupt\n"); + + + if (interEvent.scheduled()) { + deschedule(interEvent); + } + + if (rdtrEvent.scheduled()) { + regs.icr.rxt0(1); + deschedule(rdtrEvent); + } + if (radvEvent.scheduled()) { + regs.icr.rxt0(1); + deschedule(radvEvent); + } + if (tadvEvent.scheduled()) { + regs.icr.txdw(1); + deschedule(tadvEvent); + } + if (tidvEvent.scheduled()) { + regs.icr.txdw(1); + deschedule(tidvEvent); + } + + regs.icr.int_assert(1); + DPRINTF(EthernetIntr, "EINT: Posting interrupt to CPU now. Vector %#x\n", + regs.icr()); + + intrPost(); + + lastInterrupt = curTick(); +} + +void +IGbE::cpuClearInt() +{ + if (regs.icr.int_assert()) { + regs.icr.int_assert(0); + DPRINTF(EthernetIntr, + "EINT: Clearing interrupt to CPU now. Vector %#x\n", + regs.icr()); + intrClear(); + } +} + +void +IGbE::chkInterrupt() +{ + DPRINTF(Ethernet, "Checking interrupts icr: %#x imr: %#x\n", regs.icr(), + regs.imr); + // Check if we need to clear the cpu interrupt + if (!(regs.icr() & regs.imr)) { + DPRINTF(Ethernet, "Mask cleaned all interrupts\n"); + if (interEvent.scheduled()) + deschedule(interEvent); + if (regs.icr.int_assert()) + cpuClearInt(); + } + DPRINTF(Ethernet, "ITR = %#X itr.interval = %#X\n", + regs.itr(), regs.itr.interval()); + + if (regs.icr() & regs.imr) { + if (regs.itr.interval() == 0) { + cpuPostInt(); + } else { + DPRINTF(Ethernet, + "Possibly scheduling interrupt because of imr write\n"); + if (!interEvent.scheduled()) { + Tick t = curTick() + SimClock::Int::ns * 256 * regs.itr.interval(); + DPRINTF(Ethernet, "Scheduling for %d\n", t); + schedule(interEvent, t); + } + } + } +} + + +///////////////////////////// IGbE::DescCache ////////////////////////////// + +template +IGbE::DescCache::DescCache(IGbE *i, const std::string n, int s) + : igbe(i), _name(n), cachePnt(0), size(s), curFetching(0), + wbOut(0), moreToWb(false), wbAlignment(0), pktPtr(NULL), + wbDelayEvent(this), fetchDelayEvent(this), fetchEvent(this), + wbEvent(this) +{ + fetchBuf = new T[size]; + wbBuf = new T[size]; +} + +template +IGbE::DescCache::~DescCache() +{ + reset(); + delete[] fetchBuf; + delete[] wbBuf; +} + +template +void +IGbE::DescCache::areaChanged() +{ + if (usedCache.size() > 0 || curFetching || wbOut) + panic("Descriptor Address, Length or Head changed. Bad\n"); + reset(); + +} + +template +void +IGbE::DescCache::writeback(Addr aMask) +{ + int curHead = descHead(); + int max_to_wb = usedCache.size(); + + // Check if this writeback is less restrictive that the previous + // and if so setup another one immediately following it + if (wbOut) { + if (aMask < wbAlignment) { + moreToWb = true; + wbAlignment = aMask; + } + DPRINTF(EthernetDesc, + "Writing back already in process, returning\n"); + return; + } + + moreToWb = false; + wbAlignment = aMask; + + + DPRINTF(EthernetDesc, "Writing back descriptors head: %d tail: " + "%d len: %d cachePnt: %d max_to_wb: %d descleft: %d\n", + curHead, descTail(), descLen(), cachePnt, max_to_wb, + descLeft()); + + if (max_to_wb + curHead >= descLen()) { + max_to_wb = descLen() - curHead; + moreToWb = true; + // this is by definition aligned correctly + } else if (wbAlignment != 0) { + // align the wb point to the mask + max_to_wb = max_to_wb & ~wbAlignment; + } + + DPRINTF(EthernetDesc, "Writing back %d descriptors\n", max_to_wb); + + if (max_to_wb <= 0) { + if (usedCache.size()) + igbe->anBegin(annSmWb, "Wait Alignment", CPA::FL_WAIT); + else + igbe->anWe(annSmWb, annUsedCacheQ); + return; + } + + wbOut = max_to_wb; + + assert(!wbDelayEvent.scheduled()); + igbe->schedule(wbDelayEvent, curTick() + igbe->wbDelay); + igbe->anBegin(annSmWb, "Prepare Writeback Desc"); +} + +template +void +IGbE::DescCache::writeback1() +{ + // If we're draining delay issuing this DMA + if (igbe->drainState() != DrainState::Running) { + igbe->schedule(wbDelayEvent, curTick() + igbe->wbDelay); + return; + } + + DPRINTF(EthernetDesc, "Begining DMA of %d descriptors\n", wbOut); + + for (int x = 0; x < wbOut; x++) { + assert(usedCache.size()); + memcpy(&wbBuf[x], usedCache[x], sizeof(T)); + igbe->anPq(annSmWb, annUsedCacheQ); + igbe->anPq(annSmWb, annDescQ); + igbe->anQ(annSmWb, annUsedDescQ); + } + + + igbe->anBegin(annSmWb, "Writeback Desc DMA"); + + assert(wbOut); + igbe->dmaWrite(pciToDma(descBase() + descHead() * sizeof(T)), + wbOut * sizeof(T), &wbEvent, (uint8_t*)wbBuf, + igbe->wbCompDelay); +} + +template +void +IGbE::DescCache::fetchDescriptors() +{ + size_t max_to_fetch; + + if (curFetching) { + DPRINTF(EthernetDesc, + "Currently fetching %d descriptors, returning\n", + curFetching); + return; + } + + if (descTail() >= cachePnt) + max_to_fetch = descTail() - cachePnt; + else + max_to_fetch = descLen() - cachePnt; + + size_t free_cache = size - usedCache.size() - unusedCache.size(); + + if (!max_to_fetch) + igbe->anWe(annSmFetch, annUnusedDescQ); + else + igbe->anPq(annSmFetch, annUnusedDescQ, max_to_fetch); + + if (max_to_fetch) { + if (!free_cache) + igbe->anWf(annSmFetch, annDescQ); + else + igbe->anRq(annSmFetch, annDescQ, free_cache); + } + + max_to_fetch = std::min(max_to_fetch, free_cache); + + + DPRINTF(EthernetDesc, "Fetching descriptors head: %d tail: " + "%d len: %d cachePnt: %d max_to_fetch: %d descleft: %d\n", + descHead(), descTail(), descLen(), cachePnt, + max_to_fetch, descLeft()); + + // Nothing to do + if (max_to_fetch == 0) + return; + + // So we don't have two descriptor fetches going on at once + curFetching = max_to_fetch; + + assert(!fetchDelayEvent.scheduled()); + igbe->schedule(fetchDelayEvent, curTick() + igbe->fetchDelay); + igbe->anBegin(annSmFetch, "Prepare Fetch Desc"); +} + +template +void +IGbE::DescCache::fetchDescriptors1() +{ + // If we're draining delay issuing this DMA + if (igbe->drainState() != DrainState::Running) { + igbe->schedule(fetchDelayEvent, curTick() + igbe->fetchDelay); + return; + } + + igbe->anBegin(annSmFetch, "Fetch Desc"); + + DPRINTF(EthernetDesc, "Fetching descriptors at %#x (%#x), size: %#x\n", + descBase() + cachePnt * sizeof(T), + pciToDma(descBase() + cachePnt * sizeof(T)), + curFetching * sizeof(T)); + assert(curFetching); + igbe->dmaRead(pciToDma(descBase() + cachePnt * sizeof(T)), + curFetching * sizeof(T), &fetchEvent, (uint8_t*)fetchBuf, + igbe->fetchCompDelay); +} + +template +void +IGbE::DescCache::fetchComplete() +{ + T *newDesc; + igbe->anBegin(annSmFetch, "Fetch Complete"); + for (int x = 0; x < curFetching; x++) { + newDesc = new T; + memcpy(newDesc, &fetchBuf[x], sizeof(T)); + unusedCache.push_back(newDesc); + igbe->anDq(annSmFetch, annUnusedDescQ); + igbe->anQ(annSmFetch, annUnusedCacheQ); + igbe->anQ(annSmFetch, annDescQ); + } + + +#ifndef NDEBUG + int oldCp = cachePnt; +#endif + + cachePnt += curFetching; + assert(cachePnt <= descLen()); + if (cachePnt == descLen()) + cachePnt = 0; + + curFetching = 0; + + DPRINTF(EthernetDesc, "Fetching complete cachePnt %d -> %d\n", + oldCp, cachePnt); + + if ((descTail() >= cachePnt ? (descTail() - cachePnt) : (descLen() - + cachePnt)) == 0) + { + igbe->anWe(annSmFetch, annUnusedDescQ); + } else if (!(size - usedCache.size() - unusedCache.size())) { + igbe->anWf(annSmFetch, annDescQ); + } else { + igbe->anBegin(annSmFetch, "Wait", CPA::FL_WAIT); + } + + enableSm(); + igbe->checkDrain(); +} + +template +void +IGbE::DescCache::wbComplete() +{ + + igbe->anBegin(annSmWb, "Finish Writeback"); + + long curHead = descHead(); +#ifndef NDEBUG + long oldHead = curHead; +#endif + + for (int x = 0; x < wbOut; x++) { + assert(usedCache.size()); + delete usedCache[0]; + usedCache.pop_front(); + + igbe->anDq(annSmWb, annUsedCacheQ); + igbe->anDq(annSmWb, annDescQ); + } + + curHead += wbOut; + wbOut = 0; + + if (curHead >= descLen()) + curHead -= descLen(); + + // Update the head + updateHead(curHead); + + DPRINTF(EthernetDesc, "Writeback complete curHead %d -> %d\n", + oldHead, curHead); + + // If we still have more to wb, call wb now + actionAfterWb(); + if (moreToWb) { + moreToWb = false; + DPRINTF(EthernetDesc, "Writeback has more todo\n"); + writeback(wbAlignment); + } + + if (!wbOut) { + igbe->checkDrain(); + if (usedCache.size()) + igbe->anBegin(annSmWb, "Wait", CPA::FL_WAIT); + else + igbe->anWe(annSmWb, annUsedCacheQ); + } + fetchAfterWb(); +} + +template +void +IGbE::DescCache::reset() +{ + DPRINTF(EthernetDesc, "Reseting descriptor cache\n"); + for (typename CacheType::size_type x = 0; x < usedCache.size(); x++) + delete usedCache[x]; + for (typename CacheType::size_type x = 0; x < unusedCache.size(); x++) + delete unusedCache[x]; + + usedCache.clear(); + unusedCache.clear(); + + cachePnt = 0; + +} + +template +void +IGbE::DescCache::serialize(CheckpointOut &cp) const +{ + SERIALIZE_SCALAR(cachePnt); + SERIALIZE_SCALAR(curFetching); + SERIALIZE_SCALAR(wbOut); + SERIALIZE_SCALAR(moreToWb); + SERIALIZE_SCALAR(wbAlignment); + + typename CacheType::size_type usedCacheSize = usedCache.size(); + SERIALIZE_SCALAR(usedCacheSize); + for (typename CacheType::size_type x = 0; x < usedCacheSize; x++) { + arrayParamOut(cp, csprintf("usedCache_%d", x), + (uint8_t*)usedCache[x],sizeof(T)); + } + + typename CacheType::size_type unusedCacheSize = unusedCache.size(); + SERIALIZE_SCALAR(unusedCacheSize); + for (typename CacheType::size_type x = 0; x < unusedCacheSize; x++) { + arrayParamOut(cp, csprintf("unusedCache_%d", x), + (uint8_t*)unusedCache[x],sizeof(T)); + } + + Tick fetch_delay = 0, wb_delay = 0; + if (fetchDelayEvent.scheduled()) + fetch_delay = fetchDelayEvent.when(); + SERIALIZE_SCALAR(fetch_delay); + if (wbDelayEvent.scheduled()) + wb_delay = wbDelayEvent.when(); + SERIALIZE_SCALAR(wb_delay); + + +} + +template +void +IGbE::DescCache::unserialize(CheckpointIn &cp) +{ + UNSERIALIZE_SCALAR(cachePnt); + UNSERIALIZE_SCALAR(curFetching); + UNSERIALIZE_SCALAR(wbOut); + UNSERIALIZE_SCALAR(moreToWb); + UNSERIALIZE_SCALAR(wbAlignment); + + typename CacheType::size_type usedCacheSize; + UNSERIALIZE_SCALAR(usedCacheSize); + T *temp; + for (typename CacheType::size_type x = 0; x < usedCacheSize; x++) { + temp = new T; + arrayParamIn(cp, csprintf("usedCache_%d", x), + (uint8_t*)temp,sizeof(T)); + usedCache.push_back(temp); + } + + typename CacheType::size_type unusedCacheSize; + UNSERIALIZE_SCALAR(unusedCacheSize); + for (typename CacheType::size_type x = 0; x < unusedCacheSize; x++) { + temp = new T; + arrayParamIn(cp, csprintf("unusedCache_%d", x), + (uint8_t*)temp,sizeof(T)); + unusedCache.push_back(temp); + } + Tick fetch_delay = 0, wb_delay = 0; + UNSERIALIZE_SCALAR(fetch_delay); + UNSERIALIZE_SCALAR(wb_delay); + if (fetch_delay) + igbe->schedule(fetchDelayEvent, fetch_delay); + if (wb_delay) + igbe->schedule(wbDelayEvent, wb_delay); + + +} + +///////////////////////////// IGbE::RxDescCache ////////////////////////////// + +IGbE::RxDescCache::RxDescCache(IGbE *i, const std::string n, int s) + : DescCache(i, n, s), pktDone(false), splitCount(0), + pktEvent(this), pktHdrEvent(this), pktDataEvent(this) + +{ + annSmFetch = "RX Desc Fetch"; + annSmWb = "RX Desc Writeback"; + annUnusedDescQ = "RX Unused Descriptors"; + annUnusedCacheQ = "RX Unused Descriptor Cache"; + annUsedCacheQ = "RX Used Descriptor Cache"; + annUsedDescQ = "RX Used Descriptors"; + annDescQ = "RX Descriptors"; +} + +void +IGbE::RxDescCache::pktSplitDone() +{ + splitCount++; + DPRINTF(EthernetDesc, + "Part of split packet done: splitcount now %d\n", splitCount); + assert(splitCount <= 2); + if (splitCount != 2) + return; + splitCount = 0; + DPRINTF(EthernetDesc, + "Part of split packet done: calling pktComplete()\n"); + pktComplete(); +} + +int +IGbE::RxDescCache::writePacket(EthPacketPtr packet, int pkt_offset) +{ + assert(unusedCache.size()); + //if (!unusedCache.size()) + // return false; + + pktPtr = packet; + pktDone = false; + unsigned buf_len, hdr_len; + + RxDesc *desc = unusedCache.front(); + switch (igbe->regs.srrctl.desctype()) { + case RXDT_LEGACY: + assert(pkt_offset == 0); + bytesCopied = packet->length; + DPRINTF(EthernetDesc, "Packet Length: %d Desc Size: %d\n", + packet->length, igbe->regs.rctl.descSize()); + assert(packet->length < igbe->regs.rctl.descSize()); + igbe->dmaWrite(pciToDma(desc->legacy.buf), + packet->length, &pktEvent, packet->data, + igbe->rxWriteDelay); + break; + case RXDT_ADV_ONEBUF: + assert(pkt_offset == 0); + bytesCopied = packet->length; + buf_len = igbe->regs.rctl.lpe() ? igbe->regs.srrctl.bufLen() : + igbe->regs.rctl.descSize(); + DPRINTF(EthernetDesc, "Packet Length: %d srrctl: %#x Desc Size: %d\n", + packet->length, igbe->regs.srrctl(), buf_len); + assert(packet->length < buf_len); + igbe->dmaWrite(pciToDma(desc->adv_read.pkt), + packet->length, &pktEvent, packet->data, + igbe->rxWriteDelay); + desc->adv_wb.header_len = htole(0); + desc->adv_wb.sph = htole(0); + desc->adv_wb.pkt_len = htole((uint16_t)(pktPtr->length)); + break; + case RXDT_ADV_SPLIT_A: + int split_point; + + buf_len = igbe->regs.rctl.lpe() ? igbe->regs.srrctl.bufLen() : + igbe->regs.rctl.descSize(); + hdr_len = igbe->regs.rctl.lpe() ? igbe->regs.srrctl.hdrLen() : 0; + DPRINTF(EthernetDesc, + "lpe: %d Packet Length: %d offset: %d srrctl: %#x " + "hdr addr: %#x Hdr Size: %d desc addr: %#x Desc Size: %d\n", + igbe->regs.rctl.lpe(), packet->length, pkt_offset, + igbe->regs.srrctl(), desc->adv_read.hdr, hdr_len, + desc->adv_read.pkt, buf_len); + + split_point = hsplit(pktPtr); + + if (packet->length <= hdr_len) { + bytesCopied = packet->length; + assert(pkt_offset == 0); + DPRINTF(EthernetDesc, "Hdr split: Entire packet in header\n"); + igbe->dmaWrite(pciToDma(desc->adv_read.hdr), + packet->length, &pktEvent, packet->data, + igbe->rxWriteDelay); + desc->adv_wb.header_len = htole((uint16_t)packet->length); + desc->adv_wb.sph = htole(0); + desc->adv_wb.pkt_len = htole(0); + } else if (split_point) { + if (pkt_offset) { + // we are only copying some data, header/data has already been + // copied + int max_to_copy = + std::min(packet->length - pkt_offset, buf_len); + bytesCopied += max_to_copy; + DPRINTF(EthernetDesc, + "Hdr split: Continuing data buffer copy\n"); + igbe->dmaWrite(pciToDma(desc->adv_read.pkt), + max_to_copy, &pktEvent, + packet->data + pkt_offset, igbe->rxWriteDelay); + desc->adv_wb.header_len = htole(0); + desc->adv_wb.pkt_len = htole((uint16_t)max_to_copy); + desc->adv_wb.sph = htole(0); + } else { + int max_to_copy = + std::min(packet->length - split_point, buf_len); + bytesCopied += max_to_copy + split_point; + + DPRINTF(EthernetDesc, "Hdr split: splitting at %d\n", + split_point); + igbe->dmaWrite(pciToDma(desc->adv_read.hdr), + split_point, &pktHdrEvent, + packet->data, igbe->rxWriteDelay); + igbe->dmaWrite(pciToDma(desc->adv_read.pkt), + max_to_copy, &pktDataEvent, + packet->data + split_point, igbe->rxWriteDelay); + desc->adv_wb.header_len = htole(split_point); + desc->adv_wb.sph = 1; + desc->adv_wb.pkt_len = htole((uint16_t)(max_to_copy)); + } + } else { + panic("Header split not fitting within header buffer or " + "undecodable packet not fitting in header unsupported\n"); + } + break; + default: + panic("Unimplemnted RX receive buffer type: %d\n", + igbe->regs.srrctl.desctype()); + } + return bytesCopied; + +} + +void +IGbE::RxDescCache::pktComplete() +{ + assert(unusedCache.size()); + RxDesc *desc; + desc = unusedCache.front(); + + igbe->anBegin("RXS", "Update Desc"); + + uint16_t crcfixup = igbe->regs.rctl.secrc() ? 0 : 4 ; + DPRINTF(EthernetDesc, "pktPtr->length: %d bytesCopied: %d " + "stripcrc offset: %d value written: %d %d\n", + pktPtr->length, bytesCopied, crcfixup, + htole((uint16_t)(pktPtr->length + crcfixup)), + (uint16_t)(pktPtr->length + crcfixup)); + + // no support for anything but starting at 0 + assert(igbe->regs.rxcsum.pcss() == 0); + + DPRINTF(EthernetDesc, "Packet written to memory updating Descriptor\n"); + + uint16_t status = RXDS_DD; + uint8_t err = 0; + uint16_t ext_err = 0; + uint16_t csum = 0; + uint16_t ptype = 0; + uint16_t ip_id = 0; + + assert(bytesCopied <= pktPtr->length); + if (bytesCopied == pktPtr->length) + status |= RXDS_EOP; + + IpPtr ip(pktPtr); + + if (ip) { + DPRINTF(EthernetDesc, "Proccesing Ip packet with Id=%d\n", ip->id()); + ptype |= RXDP_IPV4; + ip_id = ip->id(); + + if (igbe->regs.rxcsum.ipofld()) { + DPRINTF(EthernetDesc, "Checking IP checksum\n"); + status |= RXDS_IPCS; + csum = htole(cksum(ip)); + igbe->rxIpChecksums++; + if (cksum(ip) != 0) { + err |= RXDE_IPE; + ext_err |= RXDEE_IPE; + DPRINTF(EthernetDesc, "Checksum is bad!!\n"); + } + } + TcpPtr tcp(ip); + if (tcp && igbe->regs.rxcsum.tuofld()) { + DPRINTF(EthernetDesc, "Checking TCP checksum\n"); + status |= RXDS_TCPCS; + ptype |= RXDP_TCP; + csum = htole(cksum(tcp)); + igbe->rxTcpChecksums++; + if (cksum(tcp) != 0) { + DPRINTF(EthernetDesc, "Checksum is bad!!\n"); + err |= RXDE_TCPE; + ext_err |= RXDEE_TCPE; + } + } + + UdpPtr udp(ip); + if (udp && igbe->regs.rxcsum.tuofld()) { + DPRINTF(EthernetDesc, "Checking UDP checksum\n"); + status |= RXDS_UDPCS; + ptype |= RXDP_UDP; + csum = htole(cksum(udp)); + igbe->rxUdpChecksums++; + if (cksum(udp) != 0) { + DPRINTF(EthernetDesc, "Checksum is bad!!\n"); + ext_err |= RXDEE_TCPE; + err |= RXDE_TCPE; + } + } + } else { // if ip + DPRINTF(EthernetSM, "Proccesing Non-Ip packet\n"); + } + + switch (igbe->regs.srrctl.desctype()) { + case RXDT_LEGACY: + desc->legacy.len = htole((uint16_t)(pktPtr->length + crcfixup)); + desc->legacy.status = htole(status); + desc->legacy.errors = htole(err); + // No vlan support at this point... just set it to 0 + desc->legacy.vlan = 0; + break; + case RXDT_ADV_SPLIT_A: + case RXDT_ADV_ONEBUF: + desc->adv_wb.rss_type = htole(0); + desc->adv_wb.pkt_type = htole(ptype); + if (igbe->regs.rxcsum.pcsd()) { + // no rss support right now + desc->adv_wb.rss_hash = htole(0); + } else { + desc->adv_wb.id = htole(ip_id); + desc->adv_wb.csum = htole(csum); + } + desc->adv_wb.status = htole(status); + desc->adv_wb.errors = htole(ext_err); + // no vlan support + desc->adv_wb.vlan_tag = htole(0); + break; + default: + panic("Unimplemnted RX receive buffer type %d\n", + igbe->regs.srrctl.desctype()); + } + + DPRINTF(EthernetDesc, "Descriptor complete w0: %#x w1: %#x\n", + desc->adv_read.pkt, desc->adv_read.hdr); + + if (bytesCopied == pktPtr->length) { + DPRINTF(EthernetDesc, + "Packet completely written to descriptor buffers\n"); + // Deal with the rx timer interrupts + if (igbe->regs.rdtr.delay()) { + Tick delay = igbe->regs.rdtr.delay() * igbe->intClock(); + DPRINTF(EthernetSM, "RXS: Scheduling DTR for %d\n", delay); + igbe->reschedule(igbe->rdtrEvent, curTick() + delay); + } + + if (igbe->regs.radv.idv()) { + Tick delay = igbe->regs.radv.idv() * igbe->intClock(); + DPRINTF(EthernetSM, "RXS: Scheduling ADV for %d\n", delay); + if (!igbe->radvEvent.scheduled()) { + igbe->schedule(igbe->radvEvent, curTick() + delay); + } + } + + // if neither radv or rdtr, maybe itr is set... + if (!igbe->regs.rdtr.delay() && !igbe->regs.radv.idv()) { + DPRINTF(EthernetSM, + "RXS: Receive interrupt delay disabled, posting IT_RXT\n"); + igbe->postInterrupt(IT_RXT); + } + + // If the packet is small enough, interrupt appropriately + // I wonder if this is delayed or not?! + if (pktPtr->length <= igbe->regs.rsrpd.idv()) { + DPRINTF(EthernetSM, + "RXS: Posting IT_SRPD beacuse small packet received\n"); + igbe->postInterrupt(IT_SRPD); + } + bytesCopied = 0; + } + + pktPtr = NULL; + igbe->checkDrain(); + enableSm(); + pktDone = true; + + igbe->anBegin("RXS", "Done Updating Desc"); + DPRINTF(EthernetDesc, "Processing of this descriptor complete\n"); + igbe->anDq("RXS", annUnusedCacheQ); + unusedCache.pop_front(); + igbe->anQ("RXS", annUsedCacheQ); + usedCache.push_back(desc); +} + +void +IGbE::RxDescCache::enableSm() +{ + if (igbe->drainState() != DrainState::Draining) { + igbe->rxTick = true; + igbe->restartClock(); + } +} + +bool +IGbE::RxDescCache::packetDone() +{ + if (pktDone) { + pktDone = false; + return true; + } + return false; +} + +bool +IGbE::RxDescCache::hasOutstandingEvents() +{ + return pktEvent.scheduled() || wbEvent.scheduled() || + fetchEvent.scheduled() || pktHdrEvent.scheduled() || + pktDataEvent.scheduled(); + +} + +void +IGbE::RxDescCache::serialize(CheckpointOut &cp) const +{ + DescCache::serialize(cp); + SERIALIZE_SCALAR(pktDone); + SERIALIZE_SCALAR(splitCount); + SERIALIZE_SCALAR(bytesCopied); +} + +void +IGbE::RxDescCache::unserialize(CheckpointIn &cp) +{ + DescCache::unserialize(cp); + UNSERIALIZE_SCALAR(pktDone); + UNSERIALIZE_SCALAR(splitCount); + UNSERIALIZE_SCALAR(bytesCopied); +} + + +///////////////////////////// IGbE::TxDescCache ////////////////////////////// + +IGbE::TxDescCache::TxDescCache(IGbE *i, const std::string n, int s) + : DescCache(i,n, s), pktDone(false), isTcp(false), + pktWaiting(false), pktMultiDesc(false), + completionAddress(0), completionEnabled(false), + useTso(false), tsoHeaderLen(0), tsoMss(0), tsoTotalLen(0), tsoUsedLen(0), + tsoPrevSeq(0), tsoPktPayloadBytes(0), tsoLoadedHeader(false), + tsoPktHasHeader(false), tsoDescBytesUsed(0), tsoCopyBytes(0), tsoPkts(0), + pktEvent(this), headerEvent(this), nullEvent(this) +{ + annSmFetch = "TX Desc Fetch"; + annSmWb = "TX Desc Writeback"; + annUnusedDescQ = "TX Unused Descriptors"; + annUnusedCacheQ = "TX Unused Descriptor Cache"; + annUsedCacheQ = "TX Used Descriptor Cache"; + annUsedDescQ = "TX Used Descriptors"; + annDescQ = "TX Descriptors"; +} + +void +IGbE::TxDescCache::processContextDesc() +{ + assert(unusedCache.size()); + TxDesc *desc; + + DPRINTF(EthernetDesc, "Checking and processing context descriptors\n"); + + while (!useTso && unusedCache.size() && + TxdOp::isContext(unusedCache.front())) { + DPRINTF(EthernetDesc, "Got context descriptor type...\n"); + + desc = unusedCache.front(); + DPRINTF(EthernetDesc, "Descriptor upper: %#x lower: %#X\n", + desc->d1, desc->d2); + + + // is this going to be a tcp or udp packet? + isTcp = TxdOp::tcp(desc) ? true : false; + + // setup all the TSO variables, they'll be ignored if we don't use + // tso for this connection + tsoHeaderLen = TxdOp::hdrlen(desc); + tsoMss = TxdOp::mss(desc); + + if (TxdOp::isType(desc, TxdOp::TXD_CNXT) && TxdOp::tse(desc)) { + DPRINTF(EthernetDesc, "TCP offload enabled for packet hdrlen: " + "%d mss: %d paylen %d\n", TxdOp::hdrlen(desc), + TxdOp::mss(desc), TxdOp::getLen(desc)); + useTso = true; + tsoTotalLen = TxdOp::getLen(desc); + tsoLoadedHeader = false; + tsoDescBytesUsed = 0; + tsoUsedLen = 0; + tsoPrevSeq = 0; + tsoPktHasHeader = false; + tsoPkts = 0; + tsoCopyBytes = 0; + } + + TxdOp::setDd(desc); + unusedCache.pop_front(); + igbe->anDq("TXS", annUnusedCacheQ); + usedCache.push_back(desc); + igbe->anQ("TXS", annUsedCacheQ); + } + + if (!unusedCache.size()) + return; + + desc = unusedCache.front(); + if (!useTso && TxdOp::isType(desc, TxdOp::TXD_ADVDATA) && + TxdOp::tse(desc)) { + DPRINTF(EthernetDesc, "TCP offload(adv) enabled for packet " + "hdrlen: %d mss: %d paylen %d\n", + tsoHeaderLen, tsoMss, TxdOp::getTsoLen(desc)); + useTso = true; + tsoTotalLen = TxdOp::getTsoLen(desc); + tsoLoadedHeader = false; + tsoDescBytesUsed = 0; + tsoUsedLen = 0; + tsoPrevSeq = 0; + tsoPktHasHeader = false; + tsoPkts = 0; + } + + if (useTso && !tsoLoadedHeader) { + // we need to fetch a header + DPRINTF(EthernetDesc, "Starting DMA of TSO header\n"); + assert(TxdOp::isData(desc) && TxdOp::getLen(desc) >= tsoHeaderLen); + pktWaiting = true; + assert(tsoHeaderLen <= 256); + igbe->dmaRead(pciToDma(TxdOp::getBuf(desc)), + tsoHeaderLen, &headerEvent, tsoHeader, 0); + } +} + +void +IGbE::TxDescCache::headerComplete() +{ + DPRINTF(EthernetDesc, "TSO: Fetching TSO header complete\n"); + pktWaiting = false; + + assert(unusedCache.size()); + TxDesc *desc = unusedCache.front(); + DPRINTF(EthernetDesc, "TSO: len: %d tsoHeaderLen: %d\n", + TxdOp::getLen(desc), tsoHeaderLen); + + if (TxdOp::getLen(desc) == tsoHeaderLen) { + tsoDescBytesUsed = 0; + tsoLoadedHeader = true; + unusedCache.pop_front(); + usedCache.push_back(desc); + } else { + DPRINTF(EthernetDesc, "TSO: header part of larger payload\n"); + tsoDescBytesUsed = tsoHeaderLen; + tsoLoadedHeader = true; + } + enableSm(); + igbe->checkDrain(); +} + +unsigned +IGbE::TxDescCache::getPacketSize(EthPacketPtr p) +{ + if (!unusedCache.size()) + return 0; + + DPRINTF(EthernetDesc, "Starting processing of descriptor\n"); + + assert(!useTso || tsoLoadedHeader); + TxDesc *desc = unusedCache.front(); + + if (useTso) { + DPRINTF(EthernetDesc, "getPacket(): TxDescriptor data " + "d1: %#llx d2: %#llx\n", desc->d1, desc->d2); + DPRINTF(EthernetDesc, "TSO: use: %d hdrlen: %d mss: %d total: %d " + "used: %d loaded hdr: %d\n", useTso, tsoHeaderLen, tsoMss, + tsoTotalLen, tsoUsedLen, tsoLoadedHeader); + + if (tsoPktHasHeader) + tsoCopyBytes = std::min((tsoMss + tsoHeaderLen) - p->length, + TxdOp::getLen(desc) - tsoDescBytesUsed); + else + tsoCopyBytes = std::min(tsoMss, + TxdOp::getLen(desc) - tsoDescBytesUsed); + unsigned pkt_size = + tsoCopyBytes + (tsoPktHasHeader ? 0 : tsoHeaderLen); + + DPRINTF(EthernetDesc, "TSO: descBytesUsed: %d copyBytes: %d " + "this descLen: %d\n", + tsoDescBytesUsed, tsoCopyBytes, TxdOp::getLen(desc)); + DPRINTF(EthernetDesc, "TSO: pktHasHeader: %d\n", tsoPktHasHeader); + DPRINTF(EthernetDesc, "TSO: Next packet is %d bytes\n", pkt_size); + return pkt_size; + } + + DPRINTF(EthernetDesc, "Next TX packet is %d bytes\n", + TxdOp::getLen(unusedCache.front())); + return TxdOp::getLen(desc); +} + +void +IGbE::TxDescCache::getPacketData(EthPacketPtr p) +{ + assert(unusedCache.size()); + + TxDesc *desc; + desc = unusedCache.front(); + + DPRINTF(EthernetDesc, "getPacketData(): TxDescriptor data " + "d1: %#llx d2: %#llx\n", desc->d1, desc->d2); + assert((TxdOp::isLegacy(desc) || TxdOp::isData(desc)) && + TxdOp::getLen(desc)); + + pktPtr = p; + + pktWaiting = true; + + DPRINTF(EthernetDesc, "Starting DMA of packet at offset %d\n", p->length); + + if (useTso) { + assert(tsoLoadedHeader); + if (!tsoPktHasHeader) { + DPRINTF(EthernetDesc, + "Loading TSO header (%d bytes) into start of packet\n", + tsoHeaderLen); + memcpy(p->data, &tsoHeader,tsoHeaderLen); + p->length +=tsoHeaderLen; + tsoPktHasHeader = true; + } + } + + if (useTso) { + DPRINTF(EthernetDesc, + "Starting DMA of packet at offset %d length: %d\n", + p->length, tsoCopyBytes); + igbe->dmaRead(pciToDma(TxdOp::getBuf(desc)) + + tsoDescBytesUsed, + tsoCopyBytes, &pktEvent, p->data + p->length, + igbe->txReadDelay); + tsoDescBytesUsed += tsoCopyBytes; + assert(tsoDescBytesUsed <= TxdOp::getLen(desc)); + } else { + igbe->dmaRead(pciToDma(TxdOp::getBuf(desc)), + TxdOp::getLen(desc), &pktEvent, p->data + p->length, + igbe->txReadDelay); + } +} + +void +IGbE::TxDescCache::pktComplete() +{ + + TxDesc *desc; + assert(unusedCache.size()); + assert(pktPtr); + + igbe->anBegin("TXS", "Update Desc"); + + DPRINTF(EthernetDesc, "DMA of packet complete\n"); + + + desc = unusedCache.front(); + assert((TxdOp::isLegacy(desc) || TxdOp::isData(desc)) && + TxdOp::getLen(desc)); + + DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", + desc->d1, desc->d2); + + // Set the length of the data in the EtherPacket + if (useTso) { + DPRINTF(EthernetDesc, "TSO: use: %d hdrlen: %d mss: %d total: %d " + "used: %d loaded hdr: %d\n", useTso, tsoHeaderLen, tsoMss, + tsoTotalLen, tsoUsedLen, tsoLoadedHeader); + pktPtr->length += tsoCopyBytes; + tsoUsedLen += tsoCopyBytes; + DPRINTF(EthernetDesc, "TSO: descBytesUsed: %d copyBytes: %d\n", + tsoDescBytesUsed, tsoCopyBytes); + } else + pktPtr->length += TxdOp::getLen(desc); + + + + if ((!TxdOp::eop(desc) && !useTso) || + (pktPtr->length < ( tsoMss + tsoHeaderLen) && + tsoTotalLen != tsoUsedLen && useTso)) { + assert(!useTso || (tsoDescBytesUsed == TxdOp::getLen(desc))); + igbe->anDq("TXS", annUnusedCacheQ); + unusedCache.pop_front(); + igbe->anQ("TXS", annUsedCacheQ); + usedCache.push_back(desc); + + tsoDescBytesUsed = 0; + pktDone = true; + pktWaiting = false; + pktMultiDesc = true; + + DPRINTF(EthernetDesc, "Partial Packet Descriptor of %d bytes Done\n", + pktPtr->length); + pktPtr = NULL; + + enableSm(); + igbe->checkDrain(); + return; + } + + + pktMultiDesc = false; + // no support for vlans + assert(!TxdOp::vle(desc)); + + // we only support single packet descriptors at this point + if (!useTso) + assert(TxdOp::eop(desc)); + + // set that this packet is done + if (TxdOp::rs(desc)) + TxdOp::setDd(desc); + + DPRINTF(EthernetDesc, "TxDescriptor data d1: %#llx d2: %#llx\n", + desc->d1, desc->d2); + + if (useTso) { + IpPtr ip(pktPtr); + if (ip) { + DPRINTF(EthernetDesc, "TSO: Modifying IP header. Id + %d\n", + tsoPkts); + ip->id(ip->id() + tsoPkts++); + ip->len(pktPtr->length - EthPtr(pktPtr)->size()); + + TcpPtr tcp(ip); + if (tcp) { + DPRINTF(EthernetDesc, + "TSO: Modifying TCP header. old seq %d + %d\n", + tcp->seq(), tsoPrevSeq); + tcp->seq(tcp->seq() + tsoPrevSeq); + if (tsoUsedLen != tsoTotalLen) + tcp->flags(tcp->flags() & ~9); // clear fin & psh + } + UdpPtr udp(ip); + if (udp) { + DPRINTF(EthernetDesc, "TSO: Modifying UDP header.\n"); + udp->len(pktPtr->length - EthPtr(pktPtr)->size()); + } + } + tsoPrevSeq = tsoUsedLen; + } + + if (DTRACE(EthernetDesc)) { + IpPtr ip(pktPtr); + if (ip) + DPRINTF(EthernetDesc, "Proccesing Ip packet with Id=%d\n", + ip->id()); + else + DPRINTF(EthernetSM, "Proccesing Non-Ip packet\n"); + } + + // Checksums are only ofloaded for new descriptor types + if (TxdOp::isData(desc) && ( TxdOp::ixsm(desc) || TxdOp::txsm(desc)) ) { + DPRINTF(EthernetDesc, "Calculating checksums for packet\n"); + IpPtr ip(pktPtr); + assert(ip); + if (TxdOp::ixsm(desc)) { + ip->sum(0); + ip->sum(cksum(ip)); + igbe->txIpChecksums++; + DPRINTF(EthernetDesc, "Calculated IP checksum\n"); + } + if (TxdOp::txsm(desc)) { + TcpPtr tcp(ip); + UdpPtr udp(ip); + if (tcp) { + tcp->sum(0); + tcp->sum(cksum(tcp)); + igbe->txTcpChecksums++; + DPRINTF(EthernetDesc, "Calculated TCP checksum\n"); + } else if (udp) { + assert(udp); + udp->sum(0); + udp->sum(cksum(udp)); + igbe->txUdpChecksums++; + DPRINTF(EthernetDesc, "Calculated UDP checksum\n"); + } else { + panic("Told to checksum, but don't know how\n"); + } + } + } + + if (TxdOp::ide(desc)) { + // Deal with the rx timer interrupts + DPRINTF(EthernetDesc, "Descriptor had IDE set\n"); + if (igbe->regs.tidv.idv()) { + Tick delay = igbe->regs.tidv.idv() * igbe->intClock(); + DPRINTF(EthernetDesc, "setting tidv\n"); + igbe->reschedule(igbe->tidvEvent, curTick() + delay, true); + } + + if (igbe->regs.tadv.idv() && igbe->regs.tidv.idv()) { + Tick delay = igbe->regs.tadv.idv() * igbe->intClock(); + DPRINTF(EthernetDesc, "setting tadv\n"); + if (!igbe->tadvEvent.scheduled()) { + igbe->schedule(igbe->tadvEvent, curTick() + delay); + } + } + } + + + if (!useTso || TxdOp::getLen(desc) == tsoDescBytesUsed) { + DPRINTF(EthernetDesc, "Descriptor Done\n"); + igbe->anDq("TXS", annUnusedCacheQ); + unusedCache.pop_front(); + igbe->anQ("TXS", annUsedCacheQ); + usedCache.push_back(desc); + tsoDescBytesUsed = 0; + } + + if (useTso && tsoUsedLen == tsoTotalLen) + useTso = false; + + + DPRINTF(EthernetDesc, + "------Packet of %d bytes ready for transmission-------\n", + pktPtr->length); + pktDone = true; + pktWaiting = false; + pktPtr = NULL; + tsoPktHasHeader = false; + + if (igbe->regs.txdctl.wthresh() == 0) { + igbe->anBegin("TXS", "Desc Writeback"); + DPRINTF(EthernetDesc, "WTHRESH == 0, writing back descriptor\n"); + writeback(0); + } else if (!igbe->regs.txdctl.gran() && igbe->regs.txdctl.wthresh() <= + descInBlock(usedCache.size())) { + DPRINTF(EthernetDesc, "used > WTHRESH, writing back descriptor\n"); + igbe->anBegin("TXS", "Desc Writeback"); + writeback((igbe->cacheBlockSize()-1)>>4); + } else if (igbe->regs.txdctl.wthresh() <= usedCache.size()) { + DPRINTF(EthernetDesc, "used > WTHRESH, writing back descriptor\n"); + igbe->anBegin("TXS", "Desc Writeback"); + writeback((igbe->cacheBlockSize()-1)>>4); + } + + enableSm(); + igbe->checkDrain(); +} + +void +IGbE::TxDescCache::actionAfterWb() +{ + DPRINTF(EthernetDesc, "actionAfterWb() completionEnabled: %d\n", + completionEnabled); + igbe->postInterrupt(iGbReg::IT_TXDW); + if (completionEnabled) { + descEnd = igbe->regs.tdh(); + DPRINTF(EthernetDesc, + "Completion writing back value: %d to addr: %#x\n", descEnd, + completionAddress); + igbe->dmaWrite(pciToDma(mbits(completionAddress, 63, 2)), + sizeof(descEnd), &nullEvent, (uint8_t*)&descEnd, 0); + } +} + +void +IGbE::TxDescCache::serialize(CheckpointOut &cp) const +{ + DescCache::serialize(cp); + + SERIALIZE_SCALAR(pktDone); + SERIALIZE_SCALAR(isTcp); + SERIALIZE_SCALAR(pktWaiting); + SERIALIZE_SCALAR(pktMultiDesc); + + SERIALIZE_SCALAR(useTso); + SERIALIZE_SCALAR(tsoHeaderLen); + SERIALIZE_SCALAR(tsoMss); + SERIALIZE_SCALAR(tsoTotalLen); + SERIALIZE_SCALAR(tsoUsedLen); + SERIALIZE_SCALAR(tsoPrevSeq);; + SERIALIZE_SCALAR(tsoPktPayloadBytes); + SERIALIZE_SCALAR(tsoLoadedHeader); + SERIALIZE_SCALAR(tsoPktHasHeader); + SERIALIZE_ARRAY(tsoHeader, 256); + SERIALIZE_SCALAR(tsoDescBytesUsed); + SERIALIZE_SCALAR(tsoCopyBytes); + SERIALIZE_SCALAR(tsoPkts); + + SERIALIZE_SCALAR(completionAddress); + SERIALIZE_SCALAR(completionEnabled); + SERIALIZE_SCALAR(descEnd); +} + +void +IGbE::TxDescCache::unserialize(CheckpointIn &cp) +{ + DescCache::unserialize(cp); + + UNSERIALIZE_SCALAR(pktDone); + UNSERIALIZE_SCALAR(isTcp); + UNSERIALIZE_SCALAR(pktWaiting); + UNSERIALIZE_SCALAR(pktMultiDesc); + + UNSERIALIZE_SCALAR(useTso); + UNSERIALIZE_SCALAR(tsoHeaderLen); + UNSERIALIZE_SCALAR(tsoMss); + UNSERIALIZE_SCALAR(tsoTotalLen); + UNSERIALIZE_SCALAR(tsoUsedLen); + UNSERIALIZE_SCALAR(tsoPrevSeq);; + UNSERIALIZE_SCALAR(tsoPktPayloadBytes); + UNSERIALIZE_SCALAR(tsoLoadedHeader); + UNSERIALIZE_SCALAR(tsoPktHasHeader); + UNSERIALIZE_ARRAY(tsoHeader, 256); + UNSERIALIZE_SCALAR(tsoDescBytesUsed); + UNSERIALIZE_SCALAR(tsoCopyBytes); + UNSERIALIZE_SCALAR(tsoPkts); + + UNSERIALIZE_SCALAR(completionAddress); + UNSERIALIZE_SCALAR(completionEnabled); + UNSERIALIZE_SCALAR(descEnd); +} + +bool +IGbE::TxDescCache::packetAvailable() +{ + if (pktDone) { + pktDone = false; + return true; + } + return false; +} + +void +IGbE::TxDescCache::enableSm() +{ + if (igbe->drainState() != DrainState::Draining) { + igbe->txTick = true; + igbe->restartClock(); + } +} + +bool +IGbE::TxDescCache::hasOutstandingEvents() +{ + return pktEvent.scheduled() || wbEvent.scheduled() || + fetchEvent.scheduled(); +} + + +///////////////////////////////////// IGbE ///////////////////////////////// + +void +IGbE::restartClock() +{ + if (!tickEvent.scheduled() && (rxTick || txTick || txFifoTick) && + drainState() == DrainState::Running) + schedule(tickEvent, clockEdge(Cycles(1))); +} + +DrainState +IGbE::drain() +{ + unsigned int count(0); + if (rxDescCache.hasOutstandingEvents() || + txDescCache.hasOutstandingEvents()) { + count++; + } + + txFifoTick = false; + txTick = false; + rxTick = false; + + if (tickEvent.scheduled()) + deschedule(tickEvent); + + if (count) { + DPRINTF(Drain, "IGbE not drained\n"); + return DrainState::Draining; + } else + return DrainState::Drained; +} + +void +IGbE::drainResume() +{ + Drainable::drainResume(); + + txFifoTick = true; + txTick = true; + rxTick = true; + + restartClock(); + DPRINTF(EthernetSM, "resuming from drain"); +} + +void +IGbE::checkDrain() +{ + if (drainState() != DrainState::Draining) + return; + + txFifoTick = false; + txTick = false; + rxTick = false; + if (!rxDescCache.hasOutstandingEvents() && + !txDescCache.hasOutstandingEvents()) { + DPRINTF(Drain, "IGbE done draining, processing drain event\n"); + signalDrainDone(); + } +} + +void +IGbE::txStateMachine() +{ + if (!regs.tctl.en()) { + txTick = false; + DPRINTF(EthernetSM, "TXS: TX disabled, stopping ticking\n"); + return; + } + + // If we have a packet available and it's length is not 0 (meaning it's not + // a multidescriptor packet) put it in the fifo, otherwise an the next + // iteration we'll get the rest of the data + if (txPacket && txDescCache.packetAvailable() + && !txDescCache.packetMultiDesc() && txPacket->length) { + anQ("TXS", "TX FIFO Q"); + DPRINTF(EthernetSM, "TXS: packet placed in TX FIFO\n"); +#ifndef NDEBUG + bool success = +#endif + txFifo.push(txPacket); + txFifoTick = true && drainState() != DrainState::Draining; + assert(success); + txPacket = NULL; + anBegin("TXS", "Desc Writeback"); + txDescCache.writeback((cacheBlockSize()-1)>>4); + return; + } + + // Only support descriptor granularity + if (regs.txdctl.lwthresh() && + txDescCache.descLeft() < (regs.txdctl.lwthresh() * 8)) { + DPRINTF(EthernetSM, "TXS: LWTHRESH caused posting of TXDLOW\n"); + postInterrupt(IT_TXDLOW); + } + + if (!txPacket) { + txPacket = std::make_shared(16384); + } + + if (!txDescCache.packetWaiting()) { + if (txDescCache.descLeft() == 0) { + postInterrupt(IT_TXQE); + anBegin("TXS", "Desc Writeback"); + txDescCache.writeback(0); + anBegin("TXS", "Desc Fetch"); + anWe("TXS", txDescCache.annUnusedCacheQ); + txDescCache.fetchDescriptors(); + DPRINTF(EthernetSM, "TXS: No descriptors left in ring, forcing " + "writeback stopping ticking and posting TXQE\n"); + txTick = false; + return; + } + + + if (!(txDescCache.descUnused())) { + anBegin("TXS", "Desc Fetch"); + txDescCache.fetchDescriptors(); + anWe("TXS", txDescCache.annUnusedCacheQ); + DPRINTF(EthernetSM, "TXS: No descriptors available in cache, " + "fetching and stopping ticking\n"); + txTick = false; + return; + } + anPq("TXS", txDescCache.annUnusedCacheQ); + + + txDescCache.processContextDesc(); + if (txDescCache.packetWaiting()) { + DPRINTF(EthernetSM, + "TXS: Fetching TSO header, stopping ticking\n"); + txTick = false; + return; + } + + unsigned size = txDescCache.getPacketSize(txPacket); + if (size > 0 && txFifo.avail() > size) { + anRq("TXS", "TX FIFO Q"); + anBegin("TXS", "DMA Packet"); + DPRINTF(EthernetSM, "TXS: Reserving %d bytes in FIFO and " + "beginning DMA of next packet\n", size); + txFifo.reserve(size); + txDescCache.getPacketData(txPacket); + } else if (size == 0) { + DPRINTF(EthernetSM, "TXS: getPacketSize returned: %d\n", size); + DPRINTF(EthernetSM, + "TXS: No packets to get, writing back used descriptors\n"); + anBegin("TXS", "Desc Writeback"); + txDescCache.writeback(0); + } else { + anWf("TXS", "TX FIFO Q"); + DPRINTF(EthernetSM, "TXS: FIFO full, stopping ticking until space " + "available in FIFO\n"); + txTick = false; + } + + + return; + } + DPRINTF(EthernetSM, "TXS: Nothing to do, stopping ticking\n"); + txTick = false; +} + +bool +IGbE::ethRxPkt(EthPacketPtr pkt) +{ + rxBytes += pkt->length; + rxPackets++; + + DPRINTF(Ethernet, "RxFIFO: Receiving pcakte from wire\n"); + anBegin("RXQ", "Wire Recv"); + + + if (!regs.rctl.en()) { + DPRINTF(Ethernet, "RxFIFO: RX not enabled, dropping\n"); + anBegin("RXQ", "FIFO Drop", CPA::FL_BAD); + return true; + } + + // restart the state machines if they are stopped + rxTick = true && drainState() != DrainState::Draining; + if ((rxTick || txTick) && !tickEvent.scheduled()) { + DPRINTF(EthernetSM, + "RXS: received packet into fifo, starting ticking\n"); + restartClock(); + } + + if (!rxFifo.push(pkt)) { + DPRINTF(Ethernet, "RxFIFO: Packet won't fit in fifo... dropped\n"); + postInterrupt(IT_RXO, true); + anBegin("RXQ", "FIFO Drop", CPA::FL_BAD); + return false; + } + + if (CPA::available() && cpa->enabled()) { + assert(sys->numSystemsRunning <= 2); + System *other_sys; + if (sys->systemList[0] == sys) + other_sys = sys->systemList[1]; + else + other_sys = sys->systemList[0]; + + cpa->hwDq(CPA::FL_NONE, sys, macAddr, "RXQ", "WireQ", 0, other_sys); + anQ("RXQ", "RX FIFO Q"); + cpa->hwWe(CPA::FL_NONE, sys, macAddr, "RXQ", "WireQ", 0, other_sys); + } + + return true; +} + + +void +IGbE::rxStateMachine() +{ + if (!regs.rctl.en()) { + rxTick = false; + DPRINTF(EthernetSM, "RXS: RX disabled, stopping ticking\n"); + return; + } + + // If the packet is done check for interrupts/descriptors/etc + if (rxDescCache.packetDone()) { + rxDmaPacket = false; + DPRINTF(EthernetSM, "RXS: Packet completed DMA to memory\n"); + int descLeft = rxDescCache.descLeft(); + DPRINTF(EthernetSM, "RXS: descLeft: %d rdmts: %d rdlen: %d\n", + descLeft, regs.rctl.rdmts(), regs.rdlen()); + switch (regs.rctl.rdmts()) { + case 2: if (descLeft > .125 * regs.rdlen()) break; + case 1: if (descLeft > .250 * regs.rdlen()) break; + case 0: if (descLeft > .500 * regs.rdlen()) break; + DPRINTF(Ethernet, "RXS: Interrupting (RXDMT) " + "because of descriptors left\n"); + postInterrupt(IT_RXDMT); + break; + } + + if (rxFifo.empty()) + rxDescCache.writeback(0); + + if (descLeft == 0) { + anBegin("RXS", "Writeback Descriptors"); + rxDescCache.writeback(0); + DPRINTF(EthernetSM, "RXS: No descriptors left in ring, forcing" + " writeback and stopping ticking\n"); + rxTick = false; + } + + // only support descriptor granulaties + assert(regs.rxdctl.gran()); + + if (regs.rxdctl.wthresh() >= rxDescCache.descUsed()) { + DPRINTF(EthernetSM, + "RXS: Writing back because WTHRESH >= descUsed\n"); + anBegin("RXS", "Writeback Descriptors"); + if (regs.rxdctl.wthresh() < (cacheBlockSize()>>4)) + rxDescCache.writeback(regs.rxdctl.wthresh()-1); + else + rxDescCache.writeback((cacheBlockSize()-1)>>4); + } + + if ((rxDescCache.descUnused() < regs.rxdctl.pthresh()) && + ((rxDescCache.descLeft() - rxDescCache.descUnused()) > + regs.rxdctl.hthresh())) { + DPRINTF(EthernetSM, "RXS: Fetching descriptors because " + "descUnused < PTHRESH\n"); + anBegin("RXS", "Fetch Descriptors"); + rxDescCache.fetchDescriptors(); + } + + if (rxDescCache.descUnused() == 0) { + anBegin("RXS", "Fetch Descriptors"); + rxDescCache.fetchDescriptors(); + anWe("RXS", rxDescCache.annUnusedCacheQ); + DPRINTF(EthernetSM, "RXS: No descriptors available in cache, " + "fetching descriptors and stopping ticking\n"); + rxTick = false; + } + return; + } + + if (rxDmaPacket) { + DPRINTF(EthernetSM, + "RXS: stopping ticking until packet DMA completes\n"); + rxTick = false; + return; + } + + if (!rxDescCache.descUnused()) { + anBegin("RXS", "Fetch Descriptors"); + rxDescCache.fetchDescriptors(); + anWe("RXS", rxDescCache.annUnusedCacheQ); + DPRINTF(EthernetSM, "RXS: No descriptors available in cache, " + "stopping ticking\n"); + rxTick = false; + DPRINTF(EthernetSM, "RXS: No descriptors available, fetching\n"); + return; + } + anPq("RXS", rxDescCache.annUnusedCacheQ); + + if (rxFifo.empty()) { + anWe("RXS", "RX FIFO Q"); + DPRINTF(EthernetSM, "RXS: RxFIFO empty, stopping ticking\n"); + rxTick = false; + return; + } + anPq("RXS", "RX FIFO Q"); + anBegin("RXS", "Get Desc"); + + EthPacketPtr pkt; + pkt = rxFifo.front(); + + + pktOffset = rxDescCache.writePacket(pkt, pktOffset); + DPRINTF(EthernetSM, "RXS: Writing packet into memory\n"); + if (pktOffset == pkt->length) { + anBegin( "RXS", "FIFO Dequeue"); + DPRINTF(EthernetSM, "RXS: Removing packet from FIFO\n"); + pktOffset = 0; + anDq("RXS", "RX FIFO Q"); + rxFifo.pop(); + } + + DPRINTF(EthernetSM, "RXS: stopping ticking until packet DMA completes\n"); + rxTick = false; + rxDmaPacket = true; + anBegin("RXS", "DMA Packet"); +} + +void +IGbE::txWire() +{ + if (txFifo.empty()) { + anWe("TXQ", "TX FIFO Q"); + txFifoTick = false; + return; + } + + + anPq("TXQ", "TX FIFO Q"); + if (etherInt->sendPacket(txFifo.front())) { + anQ("TXQ", "WireQ"); + if (DTRACE(EthernetSM)) { + IpPtr ip(txFifo.front()); + if (ip) + DPRINTF(EthernetSM, "Transmitting Ip packet with Id=%d\n", + ip->id()); + else + DPRINTF(EthernetSM, "Transmitting Non-Ip packet\n"); + } + anDq("TXQ", "TX FIFO Q"); + anBegin("TXQ", "Wire Send"); + DPRINTF(EthernetSM, + "TxFIFO: Successful transmit, bytes available in fifo: %d\n", + txFifo.avail()); + + txBytes += txFifo.front()->length; + txPackets++; + txFifoTick = false; + + txFifo.pop(); + } else { + // We'll get woken up when the packet ethTxDone() gets called + txFifoTick = false; + } +} + +void +IGbE::tick() +{ + DPRINTF(EthernetSM, "IGbE: -------------- Cycle --------------\n"); + + if (rxTick) + rxStateMachine(); + + if (txTick) + txStateMachine(); + + if (txFifoTick) + txWire(); + + + if (rxTick || txTick || txFifoTick) + schedule(tickEvent, curTick() + clockPeriod()); +} + +void +IGbE::ethTxDone() +{ + anBegin("TXQ", "Send Done"); + // restart the tx state machines if they are stopped + // fifo to send another packet + // tx sm to put more data into the fifo + txFifoTick = true && drainState() != DrainState::Draining; + if (txDescCache.descLeft() != 0 && drainState() != DrainState::Draining) + txTick = true; + + restartClock(); + txWire(); + DPRINTF(EthernetSM, "TxFIFO: Transmission complete\n"); +} + +void +IGbE::serialize(CheckpointOut &cp) const +{ + PciDevice::serialize(cp); + + regs.serialize(cp); + SERIALIZE_SCALAR(eeOpBits); + SERIALIZE_SCALAR(eeAddrBits); + SERIALIZE_SCALAR(eeDataBits); + SERIALIZE_SCALAR(eeOpcode); + SERIALIZE_SCALAR(eeAddr); + SERIALIZE_SCALAR(lastInterrupt); + SERIALIZE_ARRAY(flash,iGbReg::EEPROM_SIZE); + + rxFifo.serialize("rxfifo", cp); + txFifo.serialize("txfifo", cp); + + bool txPktExists = txPacket != nullptr; + SERIALIZE_SCALAR(txPktExists); + if (txPktExists) + txPacket->serialize("txpacket", cp); + + Tick rdtr_time = 0, radv_time = 0, tidv_time = 0, tadv_time = 0, + inter_time = 0; + + if (rdtrEvent.scheduled()) + rdtr_time = rdtrEvent.when(); + SERIALIZE_SCALAR(rdtr_time); + + if (radvEvent.scheduled()) + radv_time = radvEvent.when(); + SERIALIZE_SCALAR(radv_time); + + if (tidvEvent.scheduled()) + tidv_time = tidvEvent.when(); + SERIALIZE_SCALAR(tidv_time); + + if (tadvEvent.scheduled()) + tadv_time = tadvEvent.when(); + SERIALIZE_SCALAR(tadv_time); + + if (interEvent.scheduled()) + inter_time = interEvent.when(); + SERIALIZE_SCALAR(inter_time); + + SERIALIZE_SCALAR(pktOffset); + + txDescCache.serializeSection(cp, "TxDescCache"); + rxDescCache.serializeSection(cp, "RxDescCache"); +} + +void +IGbE::unserialize(CheckpointIn &cp) +{ + PciDevice::unserialize(cp); + + regs.unserialize(cp); + UNSERIALIZE_SCALAR(eeOpBits); + UNSERIALIZE_SCALAR(eeAddrBits); + UNSERIALIZE_SCALAR(eeDataBits); + UNSERIALIZE_SCALAR(eeOpcode); + UNSERIALIZE_SCALAR(eeAddr); + UNSERIALIZE_SCALAR(lastInterrupt); + UNSERIALIZE_ARRAY(flash,iGbReg::EEPROM_SIZE); + + rxFifo.unserialize("rxfifo", cp); + txFifo.unserialize("txfifo", cp); + + bool txPktExists; + UNSERIALIZE_SCALAR(txPktExists); + if (txPktExists) { + txPacket = std::make_shared(16384); + txPacket->unserialize("txpacket", cp); + } + + rxTick = true; + txTick = true; + txFifoTick = true; + + Tick rdtr_time, radv_time, tidv_time, tadv_time, inter_time; + UNSERIALIZE_SCALAR(rdtr_time); + UNSERIALIZE_SCALAR(radv_time); + UNSERIALIZE_SCALAR(tidv_time); + UNSERIALIZE_SCALAR(tadv_time); + UNSERIALIZE_SCALAR(inter_time); + + if (rdtr_time) + schedule(rdtrEvent, rdtr_time); + + if (radv_time) + schedule(radvEvent, radv_time); + + if (tidv_time) + schedule(tidvEvent, tidv_time); + + if (tadv_time) + schedule(tadvEvent, tadv_time); + + if (inter_time) + schedule(interEvent, inter_time); + + UNSERIALIZE_SCALAR(pktOffset); + + txDescCache.unserializeSection(cp, "TxDescCache"); + rxDescCache.unserializeSection(cp, "RxDescCache"); +} + +IGbE * +IGbEParams::create() +{ + return new IGbE(this); +} diff --git a/src/dev/net/i8254xGBe.hh b/src/dev/net/i8254xGBe.hh new file mode 100644 index 000000000..e35744459 --- /dev/null +++ b/src/dev/net/i8254xGBe.hh @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +/* @file + * Device model for Intel's 8254x line of gigabit ethernet controllers. + */ + +#ifndef __DEV_NET_I8254XGBE_HH__ +#define __DEV_NET_I8254XGBE_HH__ + +#include +#include + +#include "base/cp_annotate.hh" +#include "base/inet.hh" +#include "debug/EthernetDesc.hh" +#include "debug/EthernetIntr.hh" +#include "dev/net/etherdevice.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherpkt.hh" +#include "dev/net/i8254xGBe_defs.hh" +#include "dev/net/pktfifo.hh" +#include "dev/pci/device.hh" +#include "params/IGbE.hh" +#include "sim/eventq.hh" + +class IGbEInt; + +class IGbE : public EtherDevice +{ + private: + IGbEInt *etherInt; + CPA *cpa; + + // device registers + iGbReg::Regs regs; + + // eeprom data, status and control bits + int eeOpBits, eeAddrBits, eeDataBits; + uint8_t eeOpcode, eeAddr; + uint16_t flash[iGbReg::EEPROM_SIZE]; + + // packet fifos + PacketFifo rxFifo; + PacketFifo txFifo; + + // Packet that we are currently putting into the txFifo + EthPacketPtr txPacket; + + // Should to Rx/Tx State machine tick? + bool rxTick; + bool txTick; + bool txFifoTick; + + bool rxDmaPacket; + + // Number of bytes copied from current RX packet + unsigned pktOffset; + + // Delays in managaging descriptors + Tick fetchDelay, wbDelay; + Tick fetchCompDelay, wbCompDelay; + Tick rxWriteDelay, txReadDelay; + + // Event and function to deal with RDTR timer expiring + void rdtrProcess() { + rxDescCache.writeback(0); + DPRINTF(EthernetIntr, + "Posting RXT interrupt because RDTR timer expired\n"); + postInterrupt(iGbReg::IT_RXT); + } + + //friend class EventWrapper; + EventWrapper rdtrEvent; + + // Event and function to deal with RADV timer expiring + void radvProcess() { + rxDescCache.writeback(0); + DPRINTF(EthernetIntr, + "Posting RXT interrupt because RADV timer expired\n"); + postInterrupt(iGbReg::IT_RXT); + } + + //friend class EventWrapper; + EventWrapper radvEvent; + + // Event and function to deal with TADV timer expiring + void tadvProcess() { + txDescCache.writeback(0); + DPRINTF(EthernetIntr, + "Posting TXDW interrupt because TADV timer expired\n"); + postInterrupt(iGbReg::IT_TXDW); + } + + //friend class EventWrapper; + EventWrapper tadvEvent; + + // Event and function to deal with TIDV timer expiring + void tidvProcess() { + txDescCache.writeback(0); + DPRINTF(EthernetIntr, + "Posting TXDW interrupt because TIDV timer expired\n"); + postInterrupt(iGbReg::IT_TXDW); + } + //friend class EventWrapper; + EventWrapper tidvEvent; + + // Main event to tick the device + void tick(); + //friend class EventWrapper; + EventWrapper tickEvent; + + + uint64_t macAddr; + + void rxStateMachine(); + void txStateMachine(); + void txWire(); + + /** Write an interrupt into the interrupt pending register and check mask + * and interrupt limit timer before sending interrupt to CPU + * @param t the type of interrupt we are posting + * @param now should we ignore the interrupt limiting timer + */ + void postInterrupt(iGbReg::IntTypes t, bool now = false); + + /** Check and see if changes to the mask register have caused an interrupt + * to need to be sent or perhaps removed an interrupt cause. + */ + void chkInterrupt(); + + /** Send an interrupt to the cpu + */ + void delayIntEvent(); + void cpuPostInt(); + // Event to moderate interrupts + EventWrapper interEvent; + + /** Clear the interupt line to the cpu + */ + void cpuClearInt(); + + Tick intClock() { return SimClock::Int::ns * 1024; } + + /** This function is used to restart the clock so it can handle things like + * draining and resume in one place. */ + void restartClock(); + + /** Check if all the draining things that need to occur have occured and + * handle the drain event if so. + */ + void checkDrain(); + + void anBegin(std::string sm, std::string st, int flags = CPA::FL_NONE) { + if (cpa) + cpa->hwBegin((CPA::flags)flags, sys, macAddr, sm, st); + } + + void anQ(std::string sm, std::string q) { + if (cpa) + cpa->hwQ(CPA::FL_NONE, sys, macAddr, sm, q, macAddr); + } + + void anDq(std::string sm, std::string q) { + if (cpa) + cpa->hwDq(CPA::FL_NONE, sys, macAddr, sm, q, macAddr); + } + + void anPq(std::string sm, std::string q, int num = 1) { + if (cpa) + cpa->hwPq(CPA::FL_NONE, sys, macAddr, sm, q, macAddr, NULL, num); + } + + void anRq(std::string sm, std::string q, int num = 1) { + if (cpa) + cpa->hwRq(CPA::FL_NONE, sys, macAddr, sm, q, macAddr, NULL, num); + } + + void anWe(std::string sm, std::string q) { + if (cpa) + cpa->hwWe(CPA::FL_NONE, sys, macAddr, sm, q, macAddr); + } + + void anWf(std::string sm, std::string q) { + if (cpa) + cpa->hwWf(CPA::FL_NONE, sys, macAddr, sm, q, macAddr); + } + + + template + class DescCache : public Serializable + { + protected: + virtual Addr descBase() const = 0; + virtual long descHead() const = 0; + virtual long descTail() const = 0; + virtual long descLen() const = 0; + virtual void updateHead(long h) = 0; + virtual void enableSm() = 0; + virtual void actionAfterWb() {} + virtual void fetchAfterWb() = 0; + + typedef std::deque CacheType; + CacheType usedCache; + CacheType unusedCache; + + T *fetchBuf; + T *wbBuf; + + // Pointer to the device we cache for + IGbE *igbe; + + // Name of this descriptor cache + std::string _name; + + // How far we've cached + int cachePnt; + + // The size of the descriptor cache + int size; + + // How many descriptors we are currently fetching + int curFetching; + + // How many descriptors we are currently writing back + int wbOut; + + // if the we wrote back to the end of the descriptor ring and are going + // to have to wrap and write more + bool moreToWb; + + // What the alignment is of the next descriptor writeback + Addr wbAlignment; + + /** The packet that is currently being dmad to memory if any */ + EthPacketPtr pktPtr; + + /** Shortcut for DMA address translation */ + Addr pciToDma(Addr a) { return igbe->pciToDma(a); } + + public: + /** Annotate sm*/ + std::string annSmFetch, annSmWb, annUnusedDescQ, annUsedCacheQ, + annUsedDescQ, annUnusedCacheQ, annDescQ; + + DescCache(IGbE *i, const std::string n, int s); + virtual ~DescCache(); + + std::string name() { return _name; } + + /** If the address/len/head change when we've got descriptors that are + * dirty that is very bad. This function checks that we don't and if we + * do panics. + */ + void areaChanged(); + + void writeback(Addr aMask); + void writeback1(); + EventWrapper wbDelayEvent; + + /** Fetch a chunk of descriptors into the descriptor cache. + * Calls fetchComplete when the memory system returns the data + */ + void fetchDescriptors(); + void fetchDescriptors1(); + EventWrapper fetchDelayEvent; + + /** Called by event when dma to read descriptors is completed + */ + void fetchComplete(); + EventWrapper fetchEvent; + + /** Called by event when dma to writeback descriptors is completed + */ + void wbComplete(); + EventWrapper wbEvent; + + /* Return the number of descriptors left in the ring, so the device has + * a way to figure out if it needs to interrupt. + */ + unsigned + descLeft() const + { + unsigned left = unusedCache.size(); + if (cachePnt > descTail()) + left += (descLen() - cachePnt + descTail()); + else + left += (descTail() - cachePnt); + + return left; + } + + /* Return the number of descriptors used and not written back. + */ + unsigned descUsed() const { return usedCache.size(); } + + /* Return the number of cache unused descriptors we have. */ + unsigned descUnused() const { return unusedCache.size(); } + + /* Get into a state where the descriptor address/head/etc colud be + * changed */ + void reset(); + + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + virtual bool hasOutstandingEvents() { + return wbEvent.scheduled() || fetchEvent.scheduled(); + } + + }; + + + class RxDescCache : public DescCache + { + protected: + Addr descBase() const override { return igbe->regs.rdba(); } + long descHead() const override { return igbe->regs.rdh(); } + long descLen() const override { return igbe->regs.rdlen() >> 4; } + long descTail() const override { return igbe->regs.rdt(); } + void updateHead(long h) override { igbe->regs.rdh(h); } + void enableSm() override; + void fetchAfterWb() override { + if (!igbe->rxTick && igbe->drainState() == DrainState::Running) + fetchDescriptors(); + } + + bool pktDone; + + /** Variable to head with header/data completion events */ + int splitCount; + + /** Bytes of packet that have been copied, so we know when to + set EOP */ + unsigned bytesCopied; + + public: + RxDescCache(IGbE *i, std::string n, int s); + + /** Write the given packet into the buffer(s) pointed to by the + * descriptor and update the book keeping. Should only be called when + * there are no dma's pending. + * @param packet ethernet packet to write + * @param pkt_offset bytes already copied from the packet to memory + * @return pkt_offset + number of bytes copied during this call + */ + int writePacket(EthPacketPtr packet, int pkt_offset); + + /** Called by event when dma to write packet is completed + */ + void pktComplete(); + + /** Check if the dma on the packet has completed and RX state machine + * can continue + */ + bool packetDone(); + + EventWrapper pktEvent; + + // Event to handle issuing header and data write at the same time + // and only callking pktComplete() when both are completed + void pktSplitDone(); + EventWrapper pktHdrEvent; + EventWrapper pktDataEvent; + + bool hasOutstandingEvents() override; + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + }; + friend class RxDescCache; + + RxDescCache rxDescCache; + + class TxDescCache : public DescCache + { + protected: + Addr descBase() const override { return igbe->regs.tdba(); } + long descHead() const override { return igbe->regs.tdh(); } + long descTail() const override { return igbe->regs.tdt(); } + long descLen() const override { return igbe->regs.tdlen() >> 4; } + void updateHead(long h) override { igbe->regs.tdh(h); } + void enableSm() override; + void actionAfterWb() override; + void fetchAfterWb() override { + if (!igbe->txTick && igbe->drainState() == DrainState::Running) + fetchDescriptors(); + } + + + + bool pktDone; + bool isTcp; + bool pktWaiting; + bool pktMultiDesc; + Addr completionAddress; + bool completionEnabled; + uint32_t descEnd; + + + // tso variables + bool useTso; + Addr tsoHeaderLen; + Addr tsoMss; + Addr tsoTotalLen; + Addr tsoUsedLen; + Addr tsoPrevSeq; + Addr tsoPktPayloadBytes; + bool tsoLoadedHeader; + bool tsoPktHasHeader; + uint8_t tsoHeader[256]; + Addr tsoDescBytesUsed; + Addr tsoCopyBytes; + int tsoPkts; + + public: + TxDescCache(IGbE *i, std::string n, int s); + + /** Tell the cache to DMA a packet from main memory into its buffer and + * return the size the of the packet to reserve space in tx fifo. + * @return size of the packet + */ + unsigned getPacketSize(EthPacketPtr p); + void getPacketData(EthPacketPtr p); + void processContextDesc(); + + /** Return the number of dsecriptors in a cache block for threshold + * operations. + */ + unsigned + descInBlock(unsigned num_desc) + { + return num_desc / igbe->cacheBlockSize() / sizeof(iGbReg::TxDesc); + } + + /** Ask if the packet has been transfered so the state machine can give + * it to the fifo. + * @return packet available in descriptor cache + */ + bool packetAvailable(); + + /** Ask if we are still waiting for the packet to be transfered. + * @return packet still in transit. + */ + bool packetWaiting() { return pktWaiting; } + + /** Ask if this packet is composed of multiple descriptors + * so even if we've got data, we need to wait for more before + * we can send it out. + * @return packet can't be sent out because it's a multi-descriptor + * packet + */ + bool packetMultiDesc() { return pktMultiDesc;} + + /** Called by event when dma to write packet is completed + */ + void pktComplete(); + EventWrapper pktEvent; + + void headerComplete(); + EventWrapper headerEvent; + + + void completionWriteback(Addr a, bool enabled) { + DPRINTF(EthernetDesc, + "Completion writeback Addr: %#x enabled: %d\n", + a, enabled); + completionAddress = a; + completionEnabled = enabled; + } + + bool hasOutstandingEvents() override; + + void nullCallback() { + DPRINTF(EthernetDesc, "Completion writeback complete\n"); + } + EventWrapper nullEvent; + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + }; + + friend class TxDescCache; + + TxDescCache txDescCache; + + public: + typedef IGbEParams Params; + const Params * + params() const { + return dynamic_cast(_params); + } + + IGbE(const Params *params); + ~IGbE(); + void init() override; + + EtherInt *getEthPort(const std::string &if_name, int idx) override; + + Tick lastInterrupt; + + Tick read(PacketPtr pkt) override; + Tick write(PacketPtr pkt) override; + + Tick writeConfig(PacketPtr pkt) override; + + bool ethRxPkt(EthPacketPtr packet); + void ethTxDone(); + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + DrainState drain() override; + void drainResume() override; + +}; + +class IGbEInt : public EtherInt +{ + private: + IGbE *dev; + + public: + IGbEInt(const std::string &name, IGbE *d) + : EtherInt(name), dev(d) + { } + + virtual bool recvPacket(EthPacketPtr pkt) { return dev->ethRxPkt(pkt); } + virtual void sendDone() { dev->ethTxDone(); } +}; + +#endif //__DEV_NET_I8254XGBE_HH__ diff --git a/src/dev/net/i8254xGBe_defs.hh b/src/dev/net/i8254xGBe_defs.hh new file mode 100644 index 000000000..79a9413da --- /dev/null +++ b/src/dev/net/i8254xGBe_defs.hh @@ -0,0 +1,854 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + */ + +/* @file + * Register and structure descriptions for Intel's 8254x line of gigabit ethernet controllers. + */ +#include "base/bitfield.hh" + +namespace iGbReg { + + +// Registers used by the Intel GbE NIC +const uint32_t REG_CTRL = 0x00000; +const uint32_t REG_STATUS = 0x00008; +const uint32_t REG_EECD = 0x00010; +const uint32_t REG_EERD = 0x00014; +const uint32_t REG_CTRL_EXT = 0x00018; +const uint32_t REG_MDIC = 0x00020; +const uint32_t REG_FCAL = 0x00028; +const uint32_t REG_FCAH = 0x0002C; +const uint32_t REG_FCT = 0x00030; +const uint32_t REG_VET = 0x00038; +const uint32_t REG_PBA = 0x01000; +const uint32_t REG_ICR = 0x000C0; +const uint32_t REG_ITR = 0x000C4; +const uint32_t REG_ICS = 0x000C8; +const uint32_t REG_IMS = 0x000D0; +const uint32_t REG_IMC = 0x000D8; +const uint32_t REG_IAM = 0x000E0; +const uint32_t REG_RCTL = 0x00100; +const uint32_t REG_FCTTV = 0x00170; +const uint32_t REG_TIPG = 0x00410; +const uint32_t REG_AIFS = 0x00458; +const uint32_t REG_LEDCTL = 0x00e00; +const uint32_t REG_EICR = 0x01580; +const uint32_t REG_IVAR0 = 0x01700; +const uint32_t REG_FCRTL = 0x02160; +const uint32_t REG_FCRTH = 0x02168; +const uint32_t REG_RDBAL = 0x02800; +const uint32_t REG_RDBAH = 0x02804; +const uint32_t REG_RDLEN = 0x02808; +const uint32_t REG_SRRCTL = 0x0280C; +const uint32_t REG_RDH = 0x02810; +const uint32_t REG_RDT = 0x02818; +const uint32_t REG_RDTR = 0x02820; +const uint32_t REG_RXDCTL = 0x02828; +const uint32_t REG_RADV = 0x0282C; +const uint32_t REG_TCTL = 0x00400; +const uint32_t REG_TDBAL = 0x03800; +const uint32_t REG_TDBAH = 0x03804; +const uint32_t REG_TDLEN = 0x03808; +const uint32_t REG_TDH = 0x03810; +const uint32_t REG_TXDCA_CTL = 0x03814; +const uint32_t REG_TDT = 0x03818; +const uint32_t REG_TIDV = 0x03820; +const uint32_t REG_TXDCTL = 0x03828; +const uint32_t REG_TADV = 0x0382C; +const uint32_t REG_TDWBAL = 0x03838; +const uint32_t REG_TDWBAH = 0x0383C; +const uint32_t REG_CRCERRS = 0x04000; +const uint32_t REG_RXCSUM = 0x05000; +const uint32_t REG_RLPML = 0x05004; +const uint32_t REG_RFCTL = 0x05008; +const uint32_t REG_MTA = 0x05200; +const uint32_t REG_RAL = 0x05400; +const uint32_t REG_RAH = 0x05404; +const uint32_t REG_VFTA = 0x05600; + +const uint32_t REG_WUC = 0x05800; +const uint32_t REG_MANC = 0x05820; +const uint32_t REG_SWSM = 0x05B50; +const uint32_t REG_FWSM = 0x05B54; +const uint32_t REG_SWFWSYNC = 0x05B5C; + +const uint8_t EEPROM_READ_OPCODE_SPI = 0x03; +const uint8_t EEPROM_RDSR_OPCODE_SPI = 0x05; +const uint8_t EEPROM_SIZE = 64; +const uint16_t EEPROM_CSUM = 0xBABA; + +const uint8_t VLAN_FILTER_TABLE_SIZE = 128; +const uint8_t RCV_ADDRESS_TABLE_SIZE = 24; +const uint8_t MULTICAST_TABLE_SIZE = 128; +const uint32_t STATS_REGS_SIZE = 0x228; + + +// Registers in that are accessed in the PHY +const uint8_t PHY_PSTATUS = 0x1; +const uint8_t PHY_PID = 0x2; +const uint8_t PHY_EPID = 0x3; +const uint8_t PHY_GSTATUS = 10; +const uint8_t PHY_EPSTATUS = 15; +const uint8_t PHY_AGC = 18; + +// Receive Descriptor Status Flags +const uint16_t RXDS_DYNINT = 0x800; +const uint16_t RXDS_UDPV = 0x400; +const uint16_t RXDS_CRCV = 0x100; +const uint16_t RXDS_PIF = 0x080; +const uint16_t RXDS_IPCS = 0x040; +const uint16_t RXDS_TCPCS = 0x020; +const uint16_t RXDS_UDPCS = 0x010; +const uint16_t RXDS_VP = 0x008; +const uint16_t RXDS_IXSM = 0x004; +const uint16_t RXDS_EOP = 0x002; +const uint16_t RXDS_DD = 0x001; + +// Receive Descriptor Error Flags +const uint8_t RXDE_RXE = 0x80; +const uint8_t RXDE_IPE = 0x40; +const uint8_t RXDE_TCPE = 0x20; +const uint8_t RXDE_SEQ = 0x04; +const uint8_t RXDE_SE = 0x02; +const uint8_t RXDE_CE = 0x01; + +// Receive Descriptor Extended Error Flags +const uint16_t RXDEE_HBO = 0x008; +const uint16_t RXDEE_CE = 0x010; +const uint16_t RXDEE_LE = 0x020; +const uint16_t RXDEE_PE = 0x080; +const uint16_t RXDEE_OSE = 0x100; +const uint16_t RXDEE_USE = 0x200; +const uint16_t RXDEE_TCPE = 0x400; +const uint16_t RXDEE_IPE = 0x800; + + +// Receive Descriptor Types +const uint8_t RXDT_LEGACY = 0x00; +const uint8_t RXDT_ADV_ONEBUF = 0x01; +const uint8_t RXDT_ADV_SPLIT_A = 0x05; + +// Receive Descriptor Packet Types +const uint16_t RXDP_IPV4 = 0x001; +const uint16_t RXDP_IPV4E = 0x002; +const uint16_t RXDP_IPV6 = 0x004; +const uint16_t RXDP_IPV6E = 0x008; +const uint16_t RXDP_TCP = 0x010; +const uint16_t RXDP_UDP = 0x020; +const uint16_t RXDP_SCTP = 0x040; +const uint16_t RXDP_NFS = 0x080; + +// Interrupt types +enum IntTypes +{ + IT_NONE = 0x00000, //dummy value + IT_TXDW = 0x00001, + IT_TXQE = 0x00002, + IT_LSC = 0x00004, + IT_RXSEQ = 0x00008, + IT_RXDMT = 0x00010, + IT_RXO = 0x00040, + IT_RXT = 0x00080, + IT_MADC = 0x00200, + IT_RXCFG = 0x00400, + IT_GPI0 = 0x02000, + IT_GPI1 = 0x04000, + IT_TXDLOW = 0x08000, + IT_SRPD = 0x10000, + IT_ACK = 0x20000 +}; + +// Receive Descriptor struct +struct RxDesc { + union { + struct { + Addr buf; + uint16_t len; + uint16_t csum; + uint8_t status; + uint8_t errors; + uint16_t vlan; + } legacy; + struct { + Addr pkt; + Addr hdr; + } adv_read; + struct { + uint16_t rss_type:4; + uint16_t pkt_type:12; + uint16_t __reserved1:5; + uint16_t header_len:10; + uint16_t sph:1; + union { + struct { + uint16_t id; + uint16_t csum; + }; + uint32_t rss_hash; + }; + uint32_t status:20; + uint32_t errors:12; + uint16_t pkt_len; + uint16_t vlan_tag; + } adv_wb ; + }; +}; + +struct TxDesc { + uint64_t d1; + uint64_t d2; +}; + +namespace TxdOp { +const uint8_t TXD_CNXT = 0x0; +const uint8_t TXD_DATA = 0x1; +const uint8_t TXD_ADVCNXT = 0x2; +const uint8_t TXD_ADVDATA = 0x3; + +inline bool isLegacy(TxDesc *d) { return !bits(d->d2,29,29); } +inline uint8_t getType(TxDesc *d) { return bits(d->d2, 23,20); } +inline bool isType(TxDesc *d, uint8_t type) { return getType(d) == type; } +inline bool isTypes(TxDesc *d, uint8_t t1, uint8_t t2) { return isType(d, t1) || isType(d, t2); } +inline bool isAdvDesc(TxDesc *d) { return !isLegacy(d) && isTypes(d, TXD_ADVDATA,TXD_ADVCNXT); } +inline bool isContext(TxDesc *d) { return !isLegacy(d) && isTypes(d,TXD_CNXT, TXD_ADVCNXT); } +inline bool isData(TxDesc *d) { return !isLegacy(d) && isTypes(d, TXD_DATA, TXD_ADVDATA); } + +inline Addr getBuf(TxDesc *d) { assert(isLegacy(d) || isData(d)); return d->d1; } +inline Addr getLen(TxDesc *d) { if (isLegacy(d)) return bits(d->d2,15,0); else return bits(d->d2, 19,0); } +inline void setDd(TxDesc *d) { replaceBits(d->d2, 35, 32, ULL(1)); } + +inline bool ide(TxDesc *d) { return bits(d->d2, 31,31) && (getType(d) == TXD_DATA || isLegacy(d)); } +inline bool vle(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 30,30); } +inline bool rs(TxDesc *d) { return bits(d->d2, 27,27); } +inline bool ic(TxDesc *d) { assert(isLegacy(d) || isData(d)); return isLegacy(d) && bits(d->d2, 26,26); } +inline bool tse(TxDesc *d) { + if (isTypes(d, TXD_CNXT, TXD_DATA)) + return bits(d->d2, 26,26); + if (isType(d, TXD_ADVDATA)) + return bits(d->d2, 31, 31); + return false; +} + +inline bool ifcs(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 25,25); } +inline bool eop(TxDesc *d) { assert(isLegacy(d) || isData(d)); return bits(d->d2, 24,24); } +inline bool ip(TxDesc *d) { assert(isContext(d)); return bits(d->d2, 25,25); } +inline bool tcp(TxDesc *d) { assert(isContext(d)); return bits(d->d2, 24,24); } + +inline uint8_t getCso(TxDesc *d) { assert(isLegacy(d)); return bits(d->d2, 23,16); } +inline uint8_t getCss(TxDesc *d) { assert(isLegacy(d)); return bits(d->d2, 47,40); } + +inline bool ixsm(TxDesc *d) { return isData(d) && bits(d->d2, 40,40); } +inline bool txsm(TxDesc *d) { return isData(d) && bits(d->d2, 41,41); } + +inline int tucse(TxDesc *d) { assert(isContext(d)); return bits(d->d1,63,48); } +inline int tucso(TxDesc *d) { assert(isContext(d)); return bits(d->d1,47,40); } +inline int tucss(TxDesc *d) { assert(isContext(d)); return bits(d->d1,39,32); } +inline int ipcse(TxDesc *d) { assert(isContext(d)); return bits(d->d1,31,16); } +inline int ipcso(TxDesc *d) { assert(isContext(d)); return bits(d->d1,15,8); } +inline int ipcss(TxDesc *d) { assert(isContext(d)); return bits(d->d1,7,0); } +inline int mss(TxDesc *d) { assert(isContext(d)); return bits(d->d2,63,48); } +inline int hdrlen(TxDesc *d) { + assert(isContext(d)); + if (!isAdvDesc(d)) + return bits(d->d2,47,40); + return bits(d->d2, 47,40) + bits(d->d1, 8,0) + bits(d->d1, 15, 9); +} + +inline int getTsoLen(TxDesc *d) { assert(isType(d, TXD_ADVDATA)); return bits(d->d2, 63,46); } +inline int utcmd(TxDesc *d) { assert(isContext(d)); return bits(d->d2,24,31); } +} // namespace TxdOp + + +#define ADD_FIELD32(NAME, OFFSET, BITS) \ + inline uint32_t NAME() { return bits(_data, OFFSET+BITS-1, OFFSET); } \ + inline void NAME(uint32_t d) { replaceBits(_data, OFFSET+BITS-1, OFFSET,d); } + +#define ADD_FIELD64(NAME, OFFSET, BITS) \ + inline uint64_t NAME() { return bits(_data, OFFSET+BITS-1, OFFSET); } \ + inline void NAME(uint64_t d) { replaceBits(_data, OFFSET+BITS-1, OFFSET,d); } + +struct Regs : public Serializable { + template + struct Reg { + T _data; + T operator()() { return _data; } + const Reg &operator=(T d) { _data = d; return *this;} + bool operator==(T d) { return d == _data; } + void operator()(T d) { _data = d; } + Reg() { _data = 0; } + void serialize(CheckpointOut &cp) const + { + SERIALIZE_SCALAR(_data); + } + void unserialize(CheckpointIn &cp) + { + UNSERIALIZE_SCALAR(_data); + } + }; + + struct CTRL : public Reg { // 0x0000 CTRL Register + using Reg::operator=; + ADD_FIELD32(fd,0,1); // full duplex + ADD_FIELD32(bem,1,1); // big endian mode + ADD_FIELD32(pcipr,2,1); // PCI priority + ADD_FIELD32(lrst,3,1); // link reset + ADD_FIELD32(tme,4,1); // test mode enable + ADD_FIELD32(asde,5,1); // Auto-speed detection + ADD_FIELD32(slu,6,1); // Set link up + ADD_FIELD32(ilos,7,1); // invert los-of-signal + ADD_FIELD32(speed,8,2); // speed selection bits + ADD_FIELD32(be32,10,1); // big endian mode 32 + ADD_FIELD32(frcspd,11,1); // force speed + ADD_FIELD32(frcdpx,12,1); // force duplex + ADD_FIELD32(duden,13,1); // dock/undock enable + ADD_FIELD32(dudpol,14,1); // dock/undock polarity + ADD_FIELD32(fphyrst,15,1); // force phy reset + ADD_FIELD32(extlen,16,1); // external link status enable + ADD_FIELD32(rsvd,17,1); // reserved + ADD_FIELD32(sdp0d,18,1); // software controlled pin data + ADD_FIELD32(sdp1d,19,1); // software controlled pin data + ADD_FIELD32(sdp2d,20,1); // software controlled pin data + ADD_FIELD32(sdp3d,21,1); // software controlled pin data + ADD_FIELD32(sdp0i,22,1); // software controlled pin dir + ADD_FIELD32(sdp1i,23,1); // software controlled pin dir + ADD_FIELD32(sdp2i,24,1); // software controlled pin dir + ADD_FIELD32(sdp3i,25,1); // software controlled pin dir + ADD_FIELD32(rst,26,1); // reset + ADD_FIELD32(rfce,27,1); // receive flow control enable + ADD_FIELD32(tfce,28,1); // transmit flow control enable + ADD_FIELD32(rte,29,1); // routing tag enable + ADD_FIELD32(vme,30,1); // vlan enable + ADD_FIELD32(phyrst,31,1); // phy reset + }; + CTRL ctrl; + + struct STATUS : public Reg { // 0x0008 STATUS Register + using Reg::operator=; + ADD_FIELD32(fd,0,1); // full duplex + ADD_FIELD32(lu,1,1); // link up + ADD_FIELD32(func,2,2); // function id + ADD_FIELD32(txoff,4,1); // transmission paused + ADD_FIELD32(tbimode,5,1); // tbi mode + ADD_FIELD32(speed,6,2); // link speed + ADD_FIELD32(asdv,8,2); // auto speed detection value + ADD_FIELD32(mtxckok,10,1); // mtx clock running ok + ADD_FIELD32(pci66,11,1); // In 66Mhz pci slot + ADD_FIELD32(bus64,12,1); // in 64 bit slot + ADD_FIELD32(pcix,13,1); // Pci mode + ADD_FIELD32(pcixspd,14,2); // pci x speed + }; + STATUS sts; + + struct EECD : public Reg { // 0x0010 EECD Register + using Reg::operator=; + ADD_FIELD32(sk,0,1); // clack input to the eeprom + ADD_FIELD32(cs,1,1); // chip select to eeprom + ADD_FIELD32(din,2,1); // data input to eeprom + ADD_FIELD32(dout,3,1); // data output bit + ADD_FIELD32(fwe,4,2); // flash write enable + ADD_FIELD32(ee_req,6,1); // request eeprom access + ADD_FIELD32(ee_gnt,7,1); // grant eeprom access + ADD_FIELD32(ee_pres,8,1); // eeprom present + ADD_FIELD32(ee_size,9,1); // eeprom size + ADD_FIELD32(ee_sz1,10,1); // eeprom size + ADD_FIELD32(rsvd,11,2); // reserved + ADD_FIELD32(ee_type,13,1); // type of eeprom + } ; + EECD eecd; + + struct EERD : public Reg { // 0x0014 EERD Register + using Reg::operator=; + ADD_FIELD32(start,0,1); // start read + ADD_FIELD32(done,1,1); // done read + ADD_FIELD32(addr,2,14); // address + ADD_FIELD32(data,16,16); // data + }; + EERD eerd; + + struct CTRL_EXT : public Reg { // 0x0018 CTRL_EXT Register + using Reg::operator=; + ADD_FIELD32(gpi_en,0,4); // enable interrupts from gpio + ADD_FIELD32(phyint,5,1); // reads the phy internal int status + ADD_FIELD32(sdp2_data,6,1); // data from gpio sdp + ADD_FIELD32(spd3_data,7,1); // data frmo gpio sdp + ADD_FIELD32(spd2_iodir,10,1); // direction of sdp2 + ADD_FIELD32(spd3_iodir,11,1); // direction of sdp2 + ADD_FIELD32(asdchk,12,1); // initiate auto-speed-detection + ADD_FIELD32(eerst,13,1); // reset the eeprom + ADD_FIELD32(spd_byps,15,1); // bypass speed select + ADD_FIELD32(ro_dis,17,1); // disable relaxed memory ordering + ADD_FIELD32(vreg,21,1); // power down the voltage regulator + ADD_FIELD32(link_mode,22,2); // interface to talk to the link + ADD_FIELD32(iame, 27,1); // interrupt acknowledge auto-mask ?? + ADD_FIELD32(drv_loaded, 28,1);// driver is loaded and incharge of device + ADD_FIELD32(timer_clr, 29,1); // clear interrupt timers after IMS clear ?? + }; + CTRL_EXT ctrl_ext; + + struct MDIC : public Reg { // 0x0020 MDIC Register + using Reg::operator=; + ADD_FIELD32(data,0,16); // data + ADD_FIELD32(regadd,16,5); // register address + ADD_FIELD32(phyadd,21,5); // phy addresses + ADD_FIELD32(op,26,2); // opcode + ADD_FIELD32(r,28,1); // ready + ADD_FIELD32(i,29,1); // interrupt + ADD_FIELD32(e,30,1); // error + }; + MDIC mdic; + + struct ICR : public Reg { // 0x00C0 ICR Register + using Reg::operator=; + ADD_FIELD32(txdw,0,1) // tx descr witten back + ADD_FIELD32(txqe,1,1) // tx queue empty + ADD_FIELD32(lsc,2,1) // link status change + ADD_FIELD32(rxseq,3,1) // rcv sequence error + ADD_FIELD32(rxdmt0,4,1) // rcv descriptor min thresh + ADD_FIELD32(rsvd1,5,1) // reserved + ADD_FIELD32(rxo,6,1) // receive overrunn + ADD_FIELD32(rxt0,7,1) // receiver timer interrupt + ADD_FIELD32(mdac,9,1) // mdi/o access complete + ADD_FIELD32(rxcfg,10,1) // recv /c/ ordered sets + ADD_FIELD32(phyint,12,1) // phy interrupt + ADD_FIELD32(gpi1,13,1) // gpi int 1 + ADD_FIELD32(gpi2,14,1) // gpi int 2 + ADD_FIELD32(txdlow,15,1) // transmit desc low thresh + ADD_FIELD32(srpd,16,1) // small receive packet detected + ADD_FIELD32(ack,17,1); // receive ack frame + ADD_FIELD32(int_assert, 31,1); // interrupt caused a system interrupt + }; + ICR icr; + + uint32_t imr; // register that contains the current interrupt mask + + struct ITR : public Reg { // 0x00C4 ITR Register + using Reg::operator=; + ADD_FIELD32(interval, 0,16); // minimum inter-interrutp inteval + // specified in 256ns interrupts + }; + ITR itr; + + // When CTRL_EXT.IAME and the ICR.INT_ASSERT is 1 an ICR read or write + // causes the IAM register contents to be written into the IMC + // automatically clearing all interrupts that have a bit in the IAM set + uint32_t iam; + + struct RCTL : public Reg { // 0x0100 RCTL Register + using Reg::operator=; + ADD_FIELD32(rst,0,1); // Reset + ADD_FIELD32(en,1,1); // Enable + ADD_FIELD32(sbp,2,1); // Store bad packets + ADD_FIELD32(upe,3,1); // Unicast Promiscuous enabled + ADD_FIELD32(mpe,4,1); // Multicast promiscuous enabled + ADD_FIELD32(lpe,5,1); // long packet reception enabled + ADD_FIELD32(lbm,6,2); // + ADD_FIELD32(rdmts,8,2); // + ADD_FIELD32(mo,12,2); // + ADD_FIELD32(mdr,14,1); // + ADD_FIELD32(bam,15,1); // + ADD_FIELD32(bsize,16,2); // + ADD_FIELD32(vfe,18,1); // + ADD_FIELD32(cfien,19,1); // + ADD_FIELD32(cfi,20,1); // + ADD_FIELD32(dpf,22,1); // discard pause frames + ADD_FIELD32(pmcf,23,1); // pass mac control frames + ADD_FIELD32(bsex,25,1); // buffer size extension + ADD_FIELD32(secrc,26,1); // strip ethernet crc from incoming packet + unsigned descSize() + { + switch(bsize()) { + case 0: return bsex() == 0 ? 2048 : 0; + case 1: return bsex() == 0 ? 1024 : 16384; + case 2: return bsex() == 0 ? 512 : 8192; + case 3: return bsex() == 0 ? 256 : 4096; + default: + return 0; + } + } + }; + RCTL rctl; + + struct FCTTV : public Reg { // 0x0170 FCTTV + using Reg::operator=; + ADD_FIELD32(ttv,0,16); // Transmit Timer Value + }; + FCTTV fcttv; + + struct TCTL : public Reg { // 0x0400 TCTL Register + using Reg::operator=; + ADD_FIELD32(rst,0,1); // Reset + ADD_FIELD32(en,1,1); // Enable + ADD_FIELD32(bce,2,1); // busy check enable + ADD_FIELD32(psp,3,1); // pad short packets + ADD_FIELD32(ct,4,8); // collision threshold + ADD_FIELD32(cold,12,10); // collision distance + ADD_FIELD32(swxoff,22,1); // software xoff transmission + ADD_FIELD32(pbe,23,1); // packet burst enable + ADD_FIELD32(rtlc,24,1); // retransmit late collisions + ADD_FIELD32(nrtu,25,1); // on underrun no TX + ADD_FIELD32(mulr,26,1); // multiple request + }; + TCTL tctl; + + struct PBA : public Reg { // 0x1000 PBA Register + using Reg::operator=; + ADD_FIELD32(rxa,0,16); + ADD_FIELD32(txa,16,16); + }; + PBA pba; + + struct FCRTL : public Reg { // 0x2160 FCRTL Register + using Reg::operator=; + ADD_FIELD32(rtl,3,28); // make this bigger than the spec so we can have + // a larger buffer + ADD_FIELD32(xone, 31,1); + }; + FCRTL fcrtl; + + struct FCRTH : public Reg { // 0x2168 FCRTL Register + using Reg::operator=; + ADD_FIELD32(rth,3,13); // make this bigger than the spec so we can have + //a larger buffer + ADD_FIELD32(xfce, 31,1); + }; + FCRTH fcrth; + + struct RDBA : public Reg { // 0x2800 RDBA Register + using Reg::operator=; + ADD_FIELD64(rdbal,0,32); // base address of rx descriptor ring + ADD_FIELD64(rdbah,32,32); // base address of rx descriptor ring + }; + RDBA rdba; + + struct RDLEN : public Reg { // 0x2808 RDLEN Register + using Reg::operator=; + ADD_FIELD32(len,7,13); // number of bytes in the descriptor buffer + }; + RDLEN rdlen; + + struct SRRCTL : public Reg { // 0x280C SRRCTL Register + using Reg::operator=; + ADD_FIELD32(pktlen, 0, 8); + ADD_FIELD32(hdrlen, 8, 8); // guess based on header, not documented + ADD_FIELD32(desctype, 25,3); // type of descriptor 000 legacy, 001 adv, + //101 hdr split + unsigned bufLen() { return pktlen() << 10; } + unsigned hdrLen() { return hdrlen() << 6; } + }; + SRRCTL srrctl; + + struct RDH : public Reg { // 0x2810 RDH Register + using Reg::operator=; + ADD_FIELD32(rdh,0,16); // head of the descriptor ring + }; + RDH rdh; + + struct RDT : public Reg { // 0x2818 RDT Register + using Reg::operator=; + ADD_FIELD32(rdt,0,16); // tail of the descriptor ring + }; + RDT rdt; + + struct RDTR : public Reg { // 0x2820 RDTR Register + using Reg::operator=; + ADD_FIELD32(delay,0,16); // receive delay timer + ADD_FIELD32(fpd, 31,1); // flush partial descriptor block ?? + }; + RDTR rdtr; + + struct RXDCTL : public Reg { // 0x2828 RXDCTL Register + using Reg::operator=; + ADD_FIELD32(pthresh,0,6); // prefetch threshold, less that this + // consider prefetch + ADD_FIELD32(hthresh,8,6); // number of descriptors in host mem to + // consider prefetch + ADD_FIELD32(wthresh,16,6); // writeback threshold + ADD_FIELD32(gran,24,1); // granularity 0 = desc, 1 = cacheline + }; + RXDCTL rxdctl; + + struct RADV : public Reg { // 0x282C RADV Register + using Reg::operator=; + ADD_FIELD32(idv,0,16); // absolute interrupt delay + }; + RADV radv; + + struct RSRPD : public Reg { // 0x2C00 RSRPD Register + using Reg::operator=; + ADD_FIELD32(idv,0,12); // size to interrutp on small packets + }; + RSRPD rsrpd; + + struct TDBA : public Reg { // 0x3800 TDBAL Register + using Reg::operator=; + ADD_FIELD64(tdbal,0,32); // base address of transmit descriptor ring + ADD_FIELD64(tdbah,32,32); // base address of transmit descriptor ring + }; + TDBA tdba; + + struct TDLEN : public Reg { // 0x3808 TDLEN Register + using Reg::operator=; + ADD_FIELD32(len,7,13); // number of bytes in the descriptor buffer + }; + TDLEN tdlen; + + struct TDH : public Reg { // 0x3810 TDH Register + using Reg::operator=; + ADD_FIELD32(tdh,0,16); // head of the descriptor ring + }; + TDH tdh; + + struct TXDCA_CTL : public Reg { // 0x3814 TXDCA_CTL Register + using Reg::operator=; + ADD_FIELD32(cpu_mask, 0, 5); + ADD_FIELD32(enabled, 5,1); + ADD_FIELD32(relax_ordering, 6, 1); + }; + TXDCA_CTL txdca_ctl; + + struct TDT : public Reg { // 0x3818 TDT Register + using Reg::operator=; + ADD_FIELD32(tdt,0,16); // tail of the descriptor ring + }; + TDT tdt; + + struct TIDV : public Reg { // 0x3820 TIDV Register + using Reg::operator=; + ADD_FIELD32(idv,0,16); // interrupt delay + }; + TIDV tidv; + + struct TXDCTL : public Reg { // 0x3828 TXDCTL Register + using Reg::operator=; + ADD_FIELD32(pthresh, 0,6); // if number of descriptors control has is + // below this number, a prefetch is considered + ADD_FIELD32(hthresh,8,8); // number of valid descriptors is host memory + // before a prefetch is considered + ADD_FIELD32(wthresh,16,6); // number of descriptors to keep until + // writeback is considered + ADD_FIELD32(gran, 24,1); // granulatiry of above values (0 = cacheline, + // 1 == desscriptor) + ADD_FIELD32(lwthresh,25,7); // xmit descriptor low thresh, interrupt + // below this level + }; + TXDCTL txdctl; + + struct TADV : public Reg { // 0x382C TADV Register + using Reg::operator=; + ADD_FIELD32(idv,0,16); // absolute interrupt delay + }; + TADV tadv; +/* + struct TDWBA : public Reg { // 0x3838 TDWBA Register + using Reg::operator=; + ADD_FIELD64(en,0,1); // enable transmit description ring address writeback + ADD_FIELD64(tdwbal,2,32); // base address of transmit descriptor ring address writeback + ADD_FIELD64(tdwbah,32,32); // base address of transmit descriptor ring + }; + TDWBA tdwba;*/ + uint64_t tdwba; + + struct RXCSUM : public Reg { // 0x5000 RXCSUM Register + using Reg::operator=; + ADD_FIELD32(pcss,0,8); + ADD_FIELD32(ipofld,8,1); + ADD_FIELD32(tuofld,9,1); + ADD_FIELD32(pcsd, 13,1); + }; + RXCSUM rxcsum; + + uint32_t rlpml; // 0x5004 RLPML probably maximum accepted packet size + + struct RFCTL : public Reg { // 0x5008 RFCTL Register + using Reg::operator=; + ADD_FIELD32(iscsi_dis,0,1); + ADD_FIELD32(iscsi_dwc,1,5); + ADD_FIELD32(nfsw_dis,6,1); + ADD_FIELD32(nfsr_dis,7,1); + ADD_FIELD32(nfs_ver,8,2); + ADD_FIELD32(ipv6_dis,10,1); + ADD_FIELD32(ipv6xsum_dis,11,1); + ADD_FIELD32(ackdis,13,1); + ADD_FIELD32(ipfrsp_dis,14,1); + ADD_FIELD32(exsten,15,1); + }; + RFCTL rfctl; + + struct MANC : public Reg { // 0x5820 MANC Register + using Reg::operator=; + ADD_FIELD32(smbus,0,1); // SMBus enabled ##### + ADD_FIELD32(asf,1,1); // ASF enabled ##### + ADD_FIELD32(ronforce,2,1); // reset of force + ADD_FIELD32(rsvd,3,5); // reserved + ADD_FIELD32(rmcp1,8,1); // rcmp1 filtering + ADD_FIELD32(rmcp2,9,1); // rcmp2 filtering + ADD_FIELD32(ipv4,10,1); // enable ipv4 + ADD_FIELD32(ipv6,11,1); // enable ipv6 + ADD_FIELD32(snap,12,1); // accept snap + ADD_FIELD32(arp,13,1); // filter arp ##### + ADD_FIELD32(neighbor,14,1); // neighbor discovery + ADD_FIELD32(arp_resp,15,1); // arp response + ADD_FIELD32(tcorst,16,1); // tco reset happened + ADD_FIELD32(rcvtco,17,1); // receive tco enabled ###### + ADD_FIELD32(blkphyrst,18,1);// block phy resets ######## + ADD_FIELD32(rcvall,19,1); // receive all + ADD_FIELD32(macaddrfltr,20,1); // mac address filtering ###### + ADD_FIELD32(mng2host,21,1); // mng2 host packets ####### + ADD_FIELD32(ipaddrfltr,22,1); // ip address filtering + ADD_FIELD32(xsumfilter,23,1); // checksum filtering + ADD_FIELD32(brfilter,24,1); // broadcast filtering + ADD_FIELD32(smbreq,25,1); // smb request + ADD_FIELD32(smbgnt,26,1); // smb grant + ADD_FIELD32(smbclkin,27,1); // smbclkin + ADD_FIELD32(smbdatain,28,1); // smbdatain + ADD_FIELD32(smbdataout,29,1); // smb data out + ADD_FIELD32(smbclkout,30,1); // smb clock out + }; + MANC manc; + + struct SWSM : public Reg { // 0x5B50 SWSM register + using Reg::operator=; + ADD_FIELD32(smbi,0,1); // Semaphone bit + ADD_FIELD32(swesmbi, 1,1); // Software eeporm semaphore + ADD_FIELD32(wmng, 2,1); // Wake MNG clock + ADD_FIELD32(reserved, 3, 29); + }; + SWSM swsm; + + struct FWSM : public Reg { // 0x5B54 FWSM register + using Reg::operator=; + ADD_FIELD32(eep_fw_semaphore,0,1); + ADD_FIELD32(fw_mode, 1,3); + ADD_FIELD32(ide, 4,1); + ADD_FIELD32(sol, 5,1); + ADD_FIELD32(eep_roload, 6,1); + ADD_FIELD32(reserved, 7,8); + ADD_FIELD32(fw_val_bit, 15, 1); + ADD_FIELD32(reset_cnt, 16, 3); + ADD_FIELD32(ext_err_ind, 19, 6); + ADD_FIELD32(reserved2, 25, 7); + }; + FWSM fwsm; + + uint32_t sw_fw_sync; + + void serialize(CheckpointOut &cp) const override + { + paramOut(cp, "ctrl", ctrl._data); + paramOut(cp, "sts", sts._data); + paramOut(cp, "eecd", eecd._data); + paramOut(cp, "eerd", eerd._data); + paramOut(cp, "ctrl_ext", ctrl_ext._data); + paramOut(cp, "mdic", mdic._data); + paramOut(cp, "icr", icr._data); + SERIALIZE_SCALAR(imr); + paramOut(cp, "itr", itr._data); + SERIALIZE_SCALAR(iam); + paramOut(cp, "rctl", rctl._data); + paramOut(cp, "fcttv", fcttv._data); + paramOut(cp, "tctl", tctl._data); + paramOut(cp, "pba", pba._data); + paramOut(cp, "fcrtl", fcrtl._data); + paramOut(cp, "fcrth", fcrth._data); + paramOut(cp, "rdba", rdba._data); + paramOut(cp, "rdlen", rdlen._data); + paramOut(cp, "srrctl", srrctl._data); + paramOut(cp, "rdh", rdh._data); + paramOut(cp, "rdt", rdt._data); + paramOut(cp, "rdtr", rdtr._data); + paramOut(cp, "rxdctl", rxdctl._data); + paramOut(cp, "radv", radv._data); + paramOut(cp, "rsrpd", rsrpd._data); + paramOut(cp, "tdba", tdba._data); + paramOut(cp, "tdlen", tdlen._data); + paramOut(cp, "tdh", tdh._data); + paramOut(cp, "txdca_ctl", txdca_ctl._data); + paramOut(cp, "tdt", tdt._data); + paramOut(cp, "tidv", tidv._data); + paramOut(cp, "txdctl", txdctl._data); + paramOut(cp, "tadv", tadv._data); + //paramOut(cp, "tdwba", tdwba._data); + SERIALIZE_SCALAR(tdwba); + paramOut(cp, "rxcsum", rxcsum._data); + SERIALIZE_SCALAR(rlpml); + paramOut(cp, "rfctl", rfctl._data); + paramOut(cp, "manc", manc._data); + paramOut(cp, "swsm", swsm._data); + paramOut(cp, "fwsm", fwsm._data); + SERIALIZE_SCALAR(sw_fw_sync); + } + + void unserialize(CheckpointIn &cp) override + { + paramIn(cp, "ctrl", ctrl._data); + paramIn(cp, "sts", sts._data); + paramIn(cp, "eecd", eecd._data); + paramIn(cp, "eerd", eerd._data); + paramIn(cp, "ctrl_ext", ctrl_ext._data); + paramIn(cp, "mdic", mdic._data); + paramIn(cp, "icr", icr._data); + UNSERIALIZE_SCALAR(imr); + paramIn(cp, "itr", itr._data); + UNSERIALIZE_SCALAR(iam); + paramIn(cp, "rctl", rctl._data); + paramIn(cp, "fcttv", fcttv._data); + paramIn(cp, "tctl", tctl._data); + paramIn(cp, "pba", pba._data); + paramIn(cp, "fcrtl", fcrtl._data); + paramIn(cp, "fcrth", fcrth._data); + paramIn(cp, "rdba", rdba._data); + paramIn(cp, "rdlen", rdlen._data); + paramIn(cp, "srrctl", srrctl._data); + paramIn(cp, "rdh", rdh._data); + paramIn(cp, "rdt", rdt._data); + paramIn(cp, "rdtr", rdtr._data); + paramIn(cp, "rxdctl", rxdctl._data); + paramIn(cp, "radv", radv._data); + paramIn(cp, "rsrpd", rsrpd._data); + paramIn(cp, "tdba", tdba._data); + paramIn(cp, "tdlen", tdlen._data); + paramIn(cp, "tdh", tdh._data); + paramIn(cp, "txdca_ctl", txdca_ctl._data); + paramIn(cp, "tdt", tdt._data); + paramIn(cp, "tidv", tidv._data); + paramIn(cp, "txdctl", txdctl._data); + paramIn(cp, "tadv", tadv._data); + UNSERIALIZE_SCALAR(tdwba); + //paramIn(cp, "tdwba", tdwba._data); + paramIn(cp, "rxcsum", rxcsum._data); + UNSERIALIZE_SCALAR(rlpml); + paramIn(cp, "rfctl", rfctl._data); + paramIn(cp, "manc", manc._data); + paramIn(cp, "swsm", swsm._data); + paramIn(cp, "fwsm", fwsm._data); + UNSERIALIZE_SCALAR(sw_fw_sync); + } +}; +} // namespace iGbReg diff --git a/src/dev/net/multi_etherlink.cc b/src/dev/net/multi_etherlink.cc new file mode 100644 index 000000000..cf4300ddf --- /dev/null +++ b/src/dev/net/multi_etherlink.cc @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * Device module for a full duplex ethernet link for multi gem5 simulations. + */ + +#include "dev/net/multi_etherlink.hh" + +#include +#include +#include + +#include +#include +#include +#include + +#include "base/random.hh" +#include "base/trace.hh" +#include "debug/EthernetData.hh" +#include "debug/MultiEthernet.hh" +#include "debug/MultiEthernetPkt.hh" +#include "dev/net/etherdump.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherlink.hh" +#include "dev/net/etherobject.hh" +#include "dev/net/etherpkt.hh" +#include "dev/net/multi_iface.hh" +#include "dev/net/tcp_iface.hh" +#include "params/EtherLink.hh" +#include "sim/core.hh" +#include "sim/serialize.hh" +#include "sim/system.hh" + +using namespace std; + +MultiEtherLink::MultiEtherLink(const Params *p) + : EtherObject(p) +{ + DPRINTF(MultiEthernet,"MultiEtherLink::MultiEtherLink() " + "link delay:%llu\n", p->delay); + + txLink = new TxLink(name() + ".link0", this, p->speed, p->delay_var, + p->dump); + rxLink = new RxLink(name() + ".link1", this, p->delay, p->dump); + + // create the multi (TCP) interface to talk to the peer gem5 processes. + multiIface = new TCPIface(p->server_name, p->server_port, p->multi_rank, + p->sync_start, p->sync_repeat, this); + + localIface = new LocalIface(name() + ".int0", txLink, rxLink, multiIface); +} + +MultiEtherLink::~MultiEtherLink() +{ + delete txLink; + delete rxLink; + delete localIface; + delete multiIface; +} + +EtherInt* +MultiEtherLink::getEthPort(const std::string &if_name, int idx) +{ + if (if_name != "int0") { + return nullptr; + } else { + panic_if(localIface->getPeer(), "interface already connected to"); + } + return localIface; +} + +void MultiEtherLink::memWriteback() +{ + DPRINTF(MultiEthernet,"MultiEtherLink::memWriteback() called\n"); + multiIface->drainDone(); +} + +void +MultiEtherLink::serialize(CheckpointOut &cp) const +{ + multiIface->serialize("multiIface", cp); + txLink->serialize("txLink", cp); + rxLink->serialize("rxLink", cp); +} + +void +MultiEtherLink::unserialize(CheckpointIn &cp) +{ + multiIface->unserialize("multiIface", cp); + txLink->unserialize("txLink", cp); + rxLink->unserialize("rxLink", cp); +} + +void +MultiEtherLink::init() +{ + DPRINTF(MultiEthernet,"MultiEtherLink::init() called\n"); + multiIface->initRandom(); +} + +void +MultiEtherLink::startup() +{ + DPRINTF(MultiEthernet,"MultiEtherLink::startup() called\n"); + multiIface->startPeriodicSync(); +} + +void +MultiEtherLink::RxLink::setMultiInt(MultiIface *m) +{ + assert(!multiIface); + multiIface = m; + // Spawn a new receiver thread that will process messages + // coming in from peer gem5 processes. + // The receive thread will also schedule a (receive) doneEvent + // for each incoming data packet. + multiIface->spawnRecvThread(&doneEvent, linkDelay); +} + +void +MultiEtherLink::RxLink::rxDone() +{ + assert(!busy()); + + // retrieve the packet that triggered the receive done event + packet = multiIface->packetIn(); + + if (dump) + dump->dump(packet); + + DPRINTF(MultiEthernetPkt, "MultiEtherLink::MultiLink::rxDone() " + "packet received: len=%d\n", packet->length); + DDUMP(EthernetData, packet->data, packet->length); + + localIface->sendPacket(packet); + + packet = nullptr; +} + +void +MultiEtherLink::TxLink::txDone() +{ + if (dump) + dump->dump(packet); + + packet = nullptr; + assert(!busy()); + + localIface->sendDone(); +} + +bool +MultiEtherLink::TxLink::transmit(EthPacketPtr pkt) +{ + if (busy()) { + DPRINTF(MultiEthernet, "packet not sent, link busy\n"); + return false; + } + + packet = pkt; + Tick delay = (Tick)ceil(((double)pkt->length * ticksPerByte) + 1.0); + if (delayVar != 0) + delay += random_mt.random(0, delayVar); + + // send the packet to the peers + assert(multiIface); + multiIface->packetOut(pkt, delay); + + // schedule the send done event + parent->schedule(doneEvent, curTick() + delay); + + return true; +} + +void +MultiEtherLink::Link::serialize(const string &base, CheckpointOut &cp) const +{ + bool packet_exists = (packet != nullptr); + paramOut(cp, base + ".packet_exists", packet_exists); + if (packet_exists) + packet->serialize(base + ".packet", cp); + + bool event_scheduled = event->scheduled(); + paramOut(cp, base + ".event_scheduled", event_scheduled); + if (event_scheduled) { + Tick event_time = event->when(); + paramOut(cp, base + ".event_time", event_time); + } +} + +void +MultiEtherLink::Link::unserialize(const string &base, CheckpointIn &cp) +{ + bool packet_exists; + paramIn(cp, base + ".packet_exists", packet_exists); + if (packet_exists) { + packet = make_shared(16384); + packet->unserialize(base + ".packet", cp); + } + + bool event_scheduled; + paramIn(cp, base + ".event_scheduled", event_scheduled); + if (event_scheduled) { + Tick event_time; + paramIn(cp, base + ".event_time", event_time); + parent->schedule(*event, event_time); + } +} + +MultiEtherLink::LocalIface::LocalIface(const std::string &name, + TxLink *tx, + RxLink *rx, + MultiIface *m) : + EtherInt(name), txLink(tx) +{ + tx->setLocalInt(this); + rx->setLocalInt(this); + tx->setMultiInt(m); + rx->setMultiInt(m); +} + +MultiEtherLink * +MultiEtherLinkParams::create() +{ + return new MultiEtherLink(this); +} + + diff --git a/src/dev/net/multi_etherlink.hh b/src/dev/net/multi_etherlink.hh new file mode 100644 index 000000000..0a3e39bd7 --- /dev/null +++ b/src/dev/net/multi_etherlink.hh @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * Device module for a full duplex ethernet link for multi gem5 simulations. + * + * See comments in dev/multi_iface.hh for a generic description of multi + * gem5 simulations. + * + * This class is meant to be a drop in replacement for the EtherLink class for + * multi gem5 runs. + * + */ +#ifndef __DEV_NET_MULTIETHERLINK_HH__ +#define __DEV_NET_MULTIETHERLINK_HH__ + +#include + +#include "dev/net/etherlink.hh" +#include "params/MultiEtherLink.hh" + +class MultiIface; +class EthPacketData; + +/** + * Model for a fixed bandwidth full duplex ethernet link. + */ +class MultiEtherLink : public EtherObject +{ + protected: + class LocalIface; + + /** + * Model base class for a single uni-directional link. + * + * The link will encapsulate and transfer Ethernet packets to/from + * the message server. + */ + class Link + { + protected: + std::string objName; + MultiEtherLink *parent; + LocalIface *localIface; + EtherDump *dump; + MultiIface *multiIface; + Event *event; + EthPacketPtr packet; + + public: + Link(const std::string &name, MultiEtherLink *p, + EtherDump *d, Event *e) : + objName(name), parent(p), localIface(nullptr), dump(d), + multiIface(nullptr), event(e) {} + + ~Link() {} + + const std::string name() const { return objName; } + bool busy() const { return (bool)packet; } + void setLocalInt(LocalIface *i) { assert(!localIface); localIface=i; } + + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); + }; + + /** + * Model for a send link. + */ + class TxLink : public Link + { + protected: + /** + * Per byte send delay + */ + double ticksPerByte; + /** + * Random component of the send delay + */ + Tick delayVar; + + /** + * Send done callback. Called from doneEvent. + */ + void txDone(); + typedef EventWrapper DoneEvent; + friend void DoneEvent::process(); + DoneEvent doneEvent; + + public: + TxLink(const std::string &name, MultiEtherLink *p, + double invBW, Tick delay_var, EtherDump *d) : + Link(name, p, d, &doneEvent), ticksPerByte(invBW), + delayVar(delay_var), doneEvent(this) {} + ~TxLink() {} + + /** + * Register the multi interface to be used to talk to the + * peer gem5 processes. + */ + void setMultiInt(MultiIface *m) { assert(!multiIface); multiIface=m; } + + /** + * Initiate sending of a packet via this link. + * + * @param packet Ethernet packet to send + */ + bool transmit(EthPacketPtr packet); + }; + + /** + * Model for a receive link. + */ + class RxLink : public Link + { + protected: + + /** + * Transmission delay for the simulated Ethernet link. + */ + Tick linkDelay; + + /** + * Receive done callback method. Called from doneEvent. + */ + void rxDone() ; + typedef EventWrapper DoneEvent; + friend void DoneEvent::process(); + DoneEvent doneEvent; + + public: + + RxLink(const std::string &name, MultiEtherLink *p, + Tick delay, EtherDump *d) : + Link(name, p, d, &doneEvent), + linkDelay(delay), doneEvent(this) {} + ~RxLink() {} + + /** + * Register our multi interface to talk to the peer gem5 processes. + */ + void setMultiInt(MultiIface *m); + }; + + /** + * Interface to the local simulated system + */ + class LocalIface : public EtherInt + { + private: + TxLink *txLink; + + public: + LocalIface(const std::string &name, TxLink *tx, RxLink *rx, + MultiIface *m); + + bool recvPacket(EthPacketPtr pkt) { return txLink->transmit(pkt); } + void sendDone() { peer->sendDone(); } + bool isBusy() { return txLink->busy(); } + }; + + + protected: + /** + * Interface to talk to the peer gem5 processes. + */ + MultiIface *multiIface; + /** + * Send link + */ + TxLink *txLink; + /** + * Receive link + */ + RxLink *rxLink; + LocalIface *localIface; + + public: + typedef MultiEtherLinkParams Params; + MultiEtherLink(const Params *p); + ~MultiEtherLink(); + + const Params * + params() const + { + return dynamic_cast(_params); + } + + virtual EtherInt *getEthPort(const std::string &if_name, + int idx) override; + + void memWriteback() override; + void init() override; + void startup() override; + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; +}; + +#endif // __DEV_NET_MULTIETHERLINK_HH__ diff --git a/src/dev/net/multi_iface.cc b/src/dev/net/multi_iface.cc new file mode 100644 index 000000000..15f69f2ac --- /dev/null +++ b/src/dev/net/multi_iface.cc @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * The interface class for multi gem5 simulations. + */ + +#include "dev/net/multi_iface.hh" + +#include +#include + +#include "base/random.hh" +#include "base/trace.hh" +#include "debug/MultiEthernet.hh" +#include "debug/MultiEthernetPkt.hh" +#include "dev/net/etherpkt.hh" +#include "sim/sim_exit.hh" +#include "sim/sim_object.hh" + + +MultiIface::Sync *MultiIface::sync = nullptr; +MultiIface::SyncEvent *MultiIface::syncEvent = nullptr; +unsigned MultiIface::recvThreadsNum = 0; +MultiIface *MultiIface::master = nullptr; + +bool +MultiIface::Sync::run(SyncTrigger t, Tick sync_tick) +{ + std::unique_lock sync_lock(lock); + + trigger = t; + if (trigger != SyncTrigger::periodic) { + DPRINTF(MultiEthernet,"MultiIface::Sync::run() trigger:%d\n", + (unsigned)trigger); + } + + switch (state) { + case SyncState::asyncCkpt: + switch (trigger) { + case SyncTrigger::ckpt: + assert(MultiIface::syncEvent->interrupted == false); + state = SyncState::busy; + break; + case SyncTrigger::periodic: + if (waitNum == 0) { + // So all recv threads got an async checkpoint request already + // and a simExit is scheduled at the end of the current tick + // (i.e. it is a periodic sync scheduled at the same tick as + // the simExit). + state = SyncState::idle; + DPRINTF(MultiEthernet,"MultiIface::Sync::run() interrupted " + "due to async ckpt scheduled\n"); + return false; + } else { + // we still need to wait for some receiver thread to get the + // aysnc ckpt request. We are going to proceed as 'interrupted' + // periodic sync. + state = SyncState::interrupted; + DPRINTF(MultiEthernet,"MultiIface::Sync::run() interrupted " + "due to ckpt request is coming in\n"); + } + break; + case SyncTrigger::atomic: + assert(trigger != SyncTrigger::atomic); + } + break; + case SyncState::idle: + state = SyncState::busy; + break; + // Only one sync can be active at any time + case SyncState::interrupted: + case SyncState::busy: + assert(state != SyncState::interrupted); + assert(state != SyncState::busy); + break; + } + // Kick-off the sync unless we are in the middle of an interrupted + // periodic sync + if (state != SyncState::interrupted) { + assert(waitNum == 0); + waitNum = MultiIface::recvThreadsNum; + // initiate the global synchronisation + assert(MultiIface::master != nullptr); + MultiIface::master->syncRaw(triggerToMsg[(unsigned)trigger], sync_tick); + } + // now wait until all receiver threads complete the synchronisation + auto lf = [this]{ return waitNum == 0; }; + cv.wait(sync_lock, lf); + + // we are done + assert(state == SyncState::busy || state == SyncState::interrupted); + bool ret = (state != SyncState::interrupted); + state = SyncState::idle; + return ret; +} + +void +MultiIface::Sync::progress(MsgType msg) +{ + std::unique_lock sync_lock(lock); + + switch (msg) { + case MsgType::cmdAtomicSyncAck: + assert(state == SyncState::busy && trigger == SyncTrigger::atomic); + break; + case MsgType::cmdPeriodicSyncAck: + assert(state == SyncState::busy && trigger == SyncTrigger::periodic); + break; + case MsgType::cmdCkptSyncAck: + assert(state == SyncState::busy && trigger == SyncTrigger::ckpt); + break; + case MsgType::cmdCkptSyncReq: + switch (state) { + case SyncState::busy: + if (trigger == SyncTrigger::ckpt) { + // We are already in a checkpoint sync but got another ckpt + // sync request. This may happen if two (or more) peer gem5 + // processes try to start a ckpt nearly at the same time. + // Incrementing waitNum here (before decrementing it below) + // effectively results in ignoring this new ckpt sync request. + waitNum++; + break; + } + assert (waitNum == recvThreadsNum); + state = SyncState::interrupted; + // we need to fall over here to handle "recvThreadsNum == 1" case + case SyncState::interrupted: + assert(trigger == SyncTrigger::periodic); + assert(waitNum >= 1); + if (waitNum == 1) { + exitSimLoop("checkpoint"); + } + break; + case SyncState::idle: + // There is no on-going sync so we got an async ckpt request. If we + // are the only receiver thread then we need to schedule the + // checkpoint. Otherwise, only change the state to 'asyncCkpt' and + // let the last receiver thread to schedule the checkpoint at the + // 'asyncCkpt' case. + // Note that a periodic or resume sync may start later and that can + // trigger a state change to 'interrupted' (so the checkpoint may + // get scheduled at 'interrupted' case finally). + assert(waitNum == 0); + state = SyncState::asyncCkpt; + waitNum = MultiIface::recvThreadsNum; + // we need to fall over here to handle "recvThreadsNum == 1" case + case SyncState::asyncCkpt: + assert(waitNum >= 1); + if (waitNum == 1) + exitSimLoop("checkpoint"); + break; + default: + panic("Unexpected state for checkpoint request message"); + break; + } + break; + default: + panic("Unknown msg type"); + break; + } + waitNum--; + assert(state != SyncState::idle); + // Notify the simultaion thread if there is an on-going sync. + if (state != SyncState::asyncCkpt) { + sync_lock.unlock(); + cv.notify_one(); + } +} + +void MultiIface::SyncEvent::start(Tick start, Tick interval) +{ + assert(!scheduled()); + if (interval == 0) + panic("Multi synchronisation period must be greater than zero"); + repeat = interval; + schedule(start); +} + +void +MultiIface::SyncEvent::adjust(Tick start_tick, Tick repeat_tick) +{ + // The new multi interface may require earlier start of the + // synchronisation. + assert(scheduled() == true); + if (start_tick < when()) + reschedule(start_tick); + // The new multi interface may require more frequent synchronisation. + if (repeat == 0) + panic("Multi synchronisation period must be greater than zero"); + if (repeat < repeat_tick) + repeat = repeat_tick; +} + +void +MultiIface::SyncEvent::process() +{ + /* + * Note that this is a global event so this process method will be called + * by only exactly one thread. + */ + // if we are draining the system then we must not start a periodic sync (as + // it is not sure that all peer gem5 will reach this tick before taking + // the checkpoint). + if (isDraining == true) { + assert(interrupted == false); + interrupted = true; + DPRINTF(MultiEthernet,"MultiIface::SyncEvent::process() interrupted " + "due to draining\n"); + return; + } + if (interrupted == false) + scheduledAt = curTick(); + /* + * We hold the eventq lock at this point but the receiver thread may + * need the lock to schedule new recv events while waiting for the + * multi sync to complete. + * Note that the other simulation threads also release their eventq + * locks while waiting for us due to the global event semantics. + */ + curEventQueue()->unlock(); + // we do a global sync here + interrupted = !MultiIface::sync->run(SyncTrigger::periodic, scheduledAt); + // Global sync completed or got interrupted. + // we are expected to exit with the eventq lock held + curEventQueue()->lock(); + // schedule the next global sync event if this one completed. Otherwise + // (i.e. this one was interrupted by a checkpoint request), we will + // reschedule this one after the draining is complete. + if (!interrupted) + schedule(scheduledAt + repeat); +} + +void MultiIface::SyncEvent::resume() +{ + Tick sync_tick; + assert(!scheduled()); + if (interrupted) { + assert(curTick() >= scheduledAt); + // We have to complete the interrupted periodic sync asap. + // Note that this sync might be interrupted now again with a checkpoint + // request from a peer gem5... + sync_tick = curTick(); + schedule(sync_tick); + } else { + // So we completed the last periodic sync, let's find out the tick for + // next one + assert(curTick() > scheduledAt); + sync_tick = scheduledAt + repeat; + if (sync_tick < curTick()) + panic("Cannot resume periodic synchronisation"); + schedule(sync_tick); + } + DPRINTF(MultiEthernet, + "MultiIface::SyncEvent periodic sync resumed at %lld " + "(curTick:%lld)\n", sync_tick, curTick()); +} + +void MultiIface::SyncEvent::serialize(const std::string &base, + CheckpointOut &cp) const +{ + // Save the periodic multi sync schedule information + paramOut(cp, base + ".periodicSyncRepeat", repeat); + paramOut(cp, base + ".periodicSyncInterrupted", interrupted); + paramOut(cp, base + ".periodicSyncAt", scheduledAt); +} + +void MultiIface::SyncEvent::unserialize(const std::string &base, + CheckpointIn &cp) +{ + paramIn(cp, base + ".periodicSyncRepeat", repeat); + paramIn(cp, base + ".periodicSyncInterrupted", interrupted); + paramIn(cp, base + ".periodicSyncAt", scheduledAt); +} + +MultiIface::MultiIface(unsigned multi_rank, + Tick sync_start, + Tick sync_repeat, + EventManager *em) : + syncStart(sync_start), syncRepeat(sync_repeat), + recvThread(nullptr), eventManager(em), recvDone(nullptr), + scheduledRecvPacket(nullptr), linkDelay(0), rank(multi_rank) +{ + DPRINTF(MultiEthernet, "MultiIface() ctor rank:%d\n",multi_rank); + if (master == nullptr) { + assert(sync == nullptr); + assert(syncEvent == nullptr); + sync = new Sync(); + syncEvent = new SyncEvent(); + master = this; + } +} + +MultiIface::~MultiIface() +{ + assert(recvThread); + delete recvThread; + if (this == master) { + assert(syncEvent); + delete syncEvent; + assert(sync); + delete sync; + } +} + +void +MultiIface::packetOut(EthPacketPtr pkt, Tick send_delay) +{ + MultiHeaderPkt::Header header_pkt; + unsigned address_length = MultiHeaderPkt::maxAddressLength(); + + // Prepare a multi header packet for the Ethernet packet we want to + // send out. + header_pkt.msgType = MsgType::dataDescriptor; + header_pkt.sendTick = curTick(); + header_pkt.sendDelay = send_delay; + + // Store also the source and destination addresses. + pkt->packAddress(header_pkt.srcAddress, header_pkt.dstAddress, + address_length); + + header_pkt.dataPacketLength = pkt->size(); + + // Send out the multi hedare packet followed by the Ethernet packet. + sendRaw(&header_pkt, sizeof(header_pkt), header_pkt.dstAddress); + sendRaw(pkt->data, pkt->size(), header_pkt.dstAddress); + DPRINTF(MultiEthernetPkt, + "MultiIface::sendDataPacket() done size:%d send_delay:%llu " + "src:0x%02x%02x%02x%02x%02x%02x " + "dst:0x%02x%02x%02x%02x%02x%02x\n", + pkt->size(), send_delay, + header_pkt.srcAddress[0], header_pkt.srcAddress[1], + header_pkt.srcAddress[2], header_pkt.srcAddress[3], + header_pkt.srcAddress[4], header_pkt.srcAddress[5], + header_pkt.dstAddress[0], header_pkt.dstAddress[1], + header_pkt.dstAddress[2], header_pkt.dstAddress[3], + header_pkt.dstAddress[4], header_pkt.dstAddress[5]); +} + +bool +MultiIface::recvHeader(MultiHeaderPkt::Header &header_pkt) +{ + // Blocking receive of an incoming multi header packet. + return recvRaw((void *)&header_pkt, sizeof(header_pkt)); +} + +void +MultiIface::recvData(const MultiHeaderPkt::Header &header_pkt) +{ + // We are here beacuse a header packet has been received implying + // that an Ethernet (data) packet is coming in next. + assert(header_pkt.msgType == MsgType::dataDescriptor); + // Allocate storage for the incoming Ethernet packet. + EthPacketPtr new_packet(new EthPacketData(header_pkt.dataPacketLength)); + // Now execute the blocking receive and store the incoming data directly + // in the new EthPacketData object. + if (! recvRaw((void *)(new_packet->data), header_pkt.dataPacketLength)) + panic("Missing data packet"); + + new_packet->length = header_pkt.dataPacketLength; + // Grab the event queue lock to schedule a new receive event for the + // data packet. + curEventQueue()->lock(); + // Compute the receive tick. It includes the send delay and the + // simulated link delay. + Tick recv_tick = header_pkt.sendTick + header_pkt.sendDelay + linkDelay; + DPRINTF(MultiEthernetPkt, "MultiIface::recvThread() packet receive, " + "send_tick:%llu send_delay:%llu link_delay:%llu recv_tick:%llu\n", + header_pkt.sendTick, header_pkt.sendDelay, linkDelay, recv_tick); + + if (recv_tick <= curTick()) { + panic("Simulators out of sync - missed packet receive by %llu ticks", + curTick() - recv_tick); + } + // Now we are about to schedule a recvDone event for the new data packet. + // We use the same recvDone object for all incoming data packets. If + // that is already scheduled - i.e. a receive event for a previous + // data packet is already pending - then we have to check whether the + // receive tick for the new packet is earlier than that of the currently + // pending event. Packets may arrive out-of-order with respect to + // simulated receive time. If that is the case, we need to re-schedule the + // recvDone event for the new packet. Otherwise, we save the packet + // pointer and the recv tick for the new packet in the recvQueue. See + // the implementation of the packetIn() method for comments on how this + // information is retrieved from the recvQueue by the simulation thread. + if (!recvDone->scheduled()) { + assert(recvQueue.size() == 0); + assert(scheduledRecvPacket == nullptr); + scheduledRecvPacket = new_packet; + eventManager->schedule(recvDone, recv_tick); + } else if (recvDone->when() > recv_tick) { + recvQueue.emplace(scheduledRecvPacket, recvDone->when()); + eventManager->reschedule(recvDone, recv_tick); + scheduledRecvPacket = new_packet; + } else { + recvQueue.emplace(new_packet, recv_tick); + } + curEventQueue()->unlock(); +} + +void +MultiIface::recvThreadFunc() +{ + EthPacketPtr new_packet; + MultiHeaderPkt::Header header; + + // The new receiver thread shares the event queue with the simulation + // thread (associated with the simulated Ethernet link). + curEventQueue(eventManager->eventQueue()); + // Main loop to wait for and process any incoming message. + for (;;) { + // recvHeader() blocks until the next multi header packet comes in. + if (!recvHeader(header)) { + // We lost connection to the peer gem5 processes most likely + // because one of them called m5 exit. So we stop here. + exit_message("info", 0, "Message server closed connection, " + "simulation is exiting"); + } + // We got a valid multi header packet, let's process it + if (header.msgType == MsgType::dataDescriptor) { + recvData(header); + } else { + // everything else must be synchronisation related command + sync->progress(header.msgType); + } + } +} + +EthPacketPtr +MultiIface::packetIn() +{ + // We are called within the process() method of the recvDone event. We + // return the packet that triggered the current receive event. + // If there is further packets in the recvQueue, we also have to schedule + // the recvEvent for the next packet with the smallest receive tick. + // The priority queue container ensures that smallest receive tick is + // always on the top of the queue. + assert(scheduledRecvPacket != nullptr); + EthPacketPtr next_packet = scheduledRecvPacket; + + if (! recvQueue.empty()) { + eventManager->schedule(recvDone, recvQueue.top().second); + scheduledRecvPacket = recvQueue.top().first; + recvQueue.pop(); + } else { + scheduledRecvPacket = nullptr; + } + + return next_packet; +} + +void +MultiIface::spawnRecvThread(Event *recv_done, Tick link_delay) +{ + assert(recvThread == nullptr); + // all receive thread must be spawned before simulation starts + assert(eventManager->eventQueue()->getCurTick() == 0); + + recvDone = recv_done; + linkDelay = link_delay; + + recvThread = new std::thread(&MultiIface::recvThreadFunc, this); + + recvThreadsNum++; +} + +DrainState +MultiIface::drain() +{ + DPRINTF(MultiEthernet,"MultiIFace::drain() called\n"); + + // This can be called multiple times in the same drain cycle. + if (master == this) { + syncEvent->isDraining = true; + } + + return DrainState::Drained; +} + +void MultiIface::drainDone() { + if (master == this) { + assert(syncEvent->isDraining == true); + syncEvent->isDraining = false; + // We need to resume the interrupted periodic sync here now that the + // draining is done. If the last periodic sync completed before the + // checkpoint then the next one is already scheduled. + if (syncEvent->interrupted) + syncEvent->resume(); + } +} + +void MultiIface::serialize(const std::string &base, CheckpointOut &cp) const +{ + // Drain the multi interface before the checkpoint is taken. We cannot call + // this as part of the normal drain cycle because this multi sync has to be + // called exactly once after the system is fully drained. + // Note that every peer will take a checkpoint but they may take it at + // different ticks. + // This sync request may interrupt an on-going periodic sync in some peers. + sync->run(SyncTrigger::ckpt, curTick()); + + // Save the periodic multi sync status + syncEvent->serialize(base, cp); + + unsigned n_rx_packets = recvQueue.size(); + if (scheduledRecvPacket != nullptr) + n_rx_packets++; + + paramOut(cp, base + ".nRxPackets", n_rx_packets); + + if (n_rx_packets > 0) { + assert(recvDone->scheduled()); + scheduledRecvPacket->serialize(base + ".rxPacket[0]", cp); + } + + for (unsigned i=1; i < n_rx_packets; i++) { + const RecvInfo recv_info = recvQueue.impl().at(i-1); + recv_info.first->serialize(base + csprintf(".rxPacket[%d]", i), cp); + Tick rx_tick = recv_info.second; + paramOut(cp, base + csprintf(".rxTick[%d]", i), rx_tick); + } +} + +void MultiIface::unserialize(const std::string &base, CheckpointIn &cp) +{ + assert(recvQueue.size() == 0); + assert(scheduledRecvPacket == nullptr); + assert(recvDone->scheduled() == false); + + // restore periodic sync info + syncEvent->unserialize(base, cp); + + unsigned n_rx_packets; + paramIn(cp, base + ".nRxPackets", n_rx_packets); + + if (n_rx_packets > 0) { + scheduledRecvPacket = std::make_shared(16384); + scheduledRecvPacket->unserialize(base + ".rxPacket[0]", cp); + // Note: receive event will be scheduled when the link is unserialized + } + + for (unsigned i=1; i < n_rx_packets; i++) { + EthPacketPtr rx_packet = std::make_shared(16384); + rx_packet->unserialize(base + csprintf(".rxPacket[%d]", i), cp); + Tick rx_tick = 0; + paramIn(cp, base + csprintf(".rxTick[%d]", i), rx_tick); + assert(rx_tick > 0); + recvQueue.emplace(rx_packet,rx_tick); + } +} + +void MultiIface::initRandom() +{ + // Initialize the seed for random generator to avoid the same sequence + // in all gem5 peer processes + assert(master != nullptr); + if (this == master) + random_mt.init(5489 * (rank+1) + 257); +} + +void MultiIface::startPeriodicSync() +{ + DPRINTF(MultiEthernet, "MultiIface:::initPeriodicSync started\n"); + // Do a global sync here to ensure that peer gem5 processes are around + // (actually this may not be needed...) + sync->run(SyncTrigger::atomic, curTick()); + + // Start the periodic sync if it is a fresh simulation from scratch + if (curTick() == 0) { + if (this == master) { + syncEvent->start(syncStart, syncRepeat); + inform("Multi synchronisation activated: start at %lld, " + "repeat at every %lld ticks.\n", + syncStart, syncRepeat); + } else { + // In case another multiIface object requires different schedule + // for periodic sync than the master does. + syncEvent->adjust(syncStart, syncRepeat); + } + } else { + // Schedule the next periodic sync if resuming from a checkpoint + if (this == master) + syncEvent->resume(); + } + DPRINTF(MultiEthernet, "MultiIface::initPeriodicSync done\n"); +} diff --git a/src/dev/net/multi_iface.hh b/src/dev/net/multi_iface.hh new file mode 100644 index 000000000..f8ce2abf7 --- /dev/null +++ b/src/dev/net/multi_iface.hh @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * The interface class for multi gem5 simulations. + * + * Multi gem5 is an extension to gem5 to enable parallel simulation of a + * distributed system (e.g. simulation of a pool of machines + * connected by Ethernet links). A multi gem5 run consists of seperate gem5 + * processes running in parallel. Each gem5 process executes + * the simulation of a component of the simulated distributed system. + * (An example component can be a multi-core board with an Ethernet NIC.) + * The MultiIface class below provides services to transfer data and + * control messages among the gem5 processes. The main such services are + * as follows. + * + * 1. Send a data packet coming from a simulated Ethernet link. The packet + * will be transferred to (all) the target(s) gem5 processes. The send + * operation is always performed by the simulation thread, i.e. the gem5 + * thread that is processing the event queue associated with the simulated + * Ethernet link. + * + * 2. Spawn a receiver thread to process messages coming in from the + * from other gem5 processes. Each simulated Ethernet link has its own + * associated receiver thread. The receiver thread saves the incoming packet + * and schedule an appropriate receive event in the event queue. + * + * 3. Schedule a global barrier event periodically to keep the gem5 + * processes in sync. + * Periodic barrier event to keep peer gem5 processes in sync. The basic idea + * is that no gem5 process can go ahead further than the simulated link + * transmission delay to ensure that a corresponding receive event can always + * be scheduled for any message coming in from a peer gem5 process. + * + * + * + * This interface is an abstract class (sendRaw() and recvRaw() + * methods are pure virtual). It can work with various low level + * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP + * stream socket version is implemented in dev/src/tcp_iface.[hh,cc]. + */ +#ifndef __DEV_NET_MULTI_IFACE_HH__ +#define __DEV_NET_MULTI_IFACE_HH__ + +#include +#include +#include +#include +#include + +#include "dev/net/etherpkt.hh" +#include "dev/net/multi_packet.hh" +#include "sim/core.hh" +#include "sim/drain.hh" +#include "sim/global_event.hh" + +class EventManager; + +/** + * The interface class to talk to peer gem5 processes. + */ +class MultiIface : public Drainable +{ + public: + /*! + * The possible reasons a multi sync among gem5 peers is needed for. + */ + enum + class SyncTrigger { + periodic, /*!< Regular periodic sync. This can be interrupted by a + checkpoint sync request */ + ckpt, /*!< sync before taking a checkpoint */ + atomic /*!< sync that cannot be interrupted (e.g. sync at startup) */ + }; + + private: + typedef MultiHeaderPkt::MsgType MsgType; + + /** Sync State-Machine + \dot + digraph Sync { + node [shape=box, fontsize=10]; + idle -> busy + [ label="new trigger\n by run()" fontsize=8 ]; + busy -> busy + [ label="new message by progress():\n(msg == SyncAck &&\nwaitNum > 1) || \n(msg==CkptSyncReq &&\ntrigger == ckpt)" fontsize=8 ]; + busy -> idle + [ label="new message by progress():\n(msg == SyncAck &&\nwaitNum == 1)" fontsize=8 ]; + busy -> interrupted + [ label="new message by progress():\n(msg == CkptSyncReq &&\ntrigger == periodic)" fontsize=8 ]; + idle -> asyncCkpt + [ label="new message by progress():\nmsg == CkptSyncReq" fontsize=8 ]; + asyncCkpt -> asyncCkpt + [ label="new message by progress():\nmsg == CkptSyncReq" fontsize=8 ]; + asyncCkpt -> busy + [ label="new trigger by run():\ntrigger == ckpt" fontsize=8 ]; + asyncCkpt -> idle + [ label="new trigger by run():\n(trigger == periodic &&\nwaitNum == 0) " fontsize=8 ]; + asyncCkpt -> interrupted + [ label="new trigger by run():\n(trigger == periodic &&\nwaitNum > 0) " fontsize=8 ]; + interrupted -> interrupted + [ label="new message by progress():\n(msg == CkptSyncReq &&\nwaitNum > 1)" fontsize=8 ]; + interrupted -> idle + [ label="new message by progress():\n(msg == CkptSyncReq &&\nwaitNum == 1)" fontsize=8 ]; + } + \enddot + */ + /** @class Sync + * This class implements global sync operations among gem5 peer processes. + * + * @note This class is used as a singleton object (shared by all MultiIface + * objects). + */ + class Sync + { + private: + /*! + * Internal state of the sync singleton object. + */ + enum class SyncState { + busy, /*!< There is an on-going sync. */ + interrupted, /*!< An on-going periodic sync was interrupted. */ + asyncCkpt, /*!< A checkpoint (sim_exit) is already scheduled */ + idle /*!< There is no active sync. */ + }; + /** + * The lock to protect access to the MultiSync object. + */ + std::mutex lock; + /** + * Condition variable for the simulation thread to wait on + * until all receiver threads completes the current global + * synchronisation. + */ + std::condition_variable cv; + /** + * Number of receiver threads that not yet completed the current global + * synchronisation. + */ + unsigned waitNum; + /** + * The trigger for the most recent sync. + */ + SyncTrigger trigger; + /** + * Map sync triggers to request messages. + */ + std::array triggerToMsg = {{ + MsgType::cmdPeriodicSyncReq, + MsgType::cmdCkptSyncReq, + MsgType::cmdAtomicSyncReq + }}; + + /** + * Current sync state. + */ + SyncState state; + + public: + /** + * Core method to perform a full multi sync. + * + * @param t Sync trigger. + * @param sync_tick The tick the sync was expected to happen at. + * @return true if the sync completed, false if it was interrupted. + * + * @note In case of an interrupted periodic sync, sync_tick can be less + * than curTick() when we resume (i.e. re-run) it + */ + bool run(SyncTrigger t, Tick sync_tick); + /** + * Callback when the receiver thread gets a sync message. + */ + void progress(MsgType m); + + Sync() : waitNum(0), state(SyncState::idle) {} + ~Sync() {} + }; + + + /** + * The global event to schedule peridic multi sync. It is used as a + * singleton object. + * + * The periodic synchronisation works as follows. + * 1. A MultisyncEvent is scheduled as a global event when startup() is + * called. + * 2. The progress() method of the MultisyncEvent initiates a new barrier + * for each simulated Ethernet links. + * 3. Simulation thread(s) then waits until all receiver threads + * completes the ongoing barrier. The global sync event is done. + */ + class SyncEvent : public GlobalSyncEvent + { + public: + /** + * Flag to indicate that the most recent periodic sync was interrupted + * (by a checkpoint request). + */ + bool interrupted; + /** + * The tick when the most recent periodic synchronisation was scheduled + * at. + */ + Tick scheduledAt; + /** + * Flag to indicate an on-going drain cycle. + */ + bool isDraining; + + public: + /** + * Only the firstly instanstiated MultiIface object will + * call this constructor. + */ + SyncEvent() : GlobalSyncEvent(Default_Pri, 0), interrupted(false), + scheduledAt(0), isDraining(false) {} + + ~SyncEvent() { assert (scheduled() == false); } + /** + * Schedule the first periodic sync event. + * + * @param start Start tick for multi synchronisation + * @param repeat Frequency of multi synchronisation + * + */ + void start(Tick start, Tick repeat); + /** + * Reschedule (if necessary) the periodic sync event. + * + * @param start Start tick for multi synchronisation + * @param repeat Frequency of multi synchronisation + * + * @note Useful if we have multiple MultiIface objects with + * different 'start' and 'repeat' values for global sync. + */ + void adjust(Tick start, Tick repeat); + /** + * This is a global event so process() will be called by each + * simulation threads. (See further comments in the .cc file.) + */ + void process() override; + /** + * Schedule periodic sync when resuming from a checkpoint. + */ + void resume(); + + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); + }; + + /** + * The receive thread needs to store the packet pointer and the computed + * receive tick for each incoming data packet. This information is used + * by the simulation thread when it processes the corresponding receive + * event. (See more comments at the implemetation of the recvThreadFunc() + * and RecvPacketIn() methods.) + */ + typedef std::pair RecvInfo; + + /** + * Comparison predicate for RecvInfo, needed by the recvQueue. + */ + struct RecvInfoCompare { + bool operator()(const RecvInfo &lhs, const RecvInfo &rhs) + { + return lhs.second > rhs.second; + } + }; + + /** + * Customized priority queue used to store incoming data packets info by + * the receiver thread. We need to expose the underlying container to + * enable iterator access for serializing. + */ + class RecvQueue : public std::priority_queue, + RecvInfoCompare> + { + public: + std::vector &impl() { return c; } + const std::vector &impl() const { return c; } + }; + + /* + * The priority queue to store RecvInfo items ordered by receive ticks. + */ + RecvQueue recvQueue; + /** + * The singleton Sync object to perform multi synchronisation. + */ + static Sync *sync; + /** + * The singleton SyncEvent object to schedule periodic multi sync. + */ + static SyncEvent *syncEvent; + /** + * Tick to schedule the first multi sync event. + * This is just as optimization : we do not need any multi sync + * event until the simulated NIC is brought up by the OS. + */ + Tick syncStart; + /** + * Frequency of multi sync events in ticks. + */ + Tick syncRepeat; + /** + * Receiver thread pointer. + * Each MultiIface object must have exactly one receiver thread. + */ + std::thread *recvThread; + /** + * The event manager associated with the MultiIface object. + */ + EventManager *eventManager; + + /** + * The receive done event for the simulated Ethernet link. + * It is scheduled by the receiver thread for each incoming data + * packet. + */ + Event *recvDone; + + /** + * The packet that belongs to the currently scheduled recvDone event. + */ + EthPacketPtr scheduledRecvPacket; + + /** + * The link delay in ticks for the simulated Ethernet link. + */ + Tick linkDelay; + + /** + * The rank of this process among the gem5 peers. + */ + unsigned rank; + /** + * Total number of receiver threads (in this gem5 process). + * During the simulation it should be constant and equal to the + * number of MultiIface objects (i.e. simulated Ethernet + * links). + */ + static unsigned recvThreadsNum; + /** + * The very first MultiIface object created becomes the master. We need + * a master to co-ordinate the global synchronisation. + */ + static MultiIface *master; + + protected: + /** + * Low level generic send routine. + * @param buf buffer that holds the data to send out + * @param length number of bytes to send + * @param dest_addr address of the target (simulated NIC). This may be + * used by a subclass for optimization (e.g. optimize broadcast) + */ + virtual void sendRaw(void *buf, + unsigned length, + const MultiHeaderPkt::AddressType dest_addr) = 0; + /** + * Low level generic receive routine. + * @param buf the buffer to store the incoming message + * @param length buffer size (in bytes) + */ + virtual bool recvRaw(void *buf, unsigned length) = 0; + /** + * Low level request for synchronisation among gem5 processes. Only one + * MultiIface object needs to call this (in each gem5 process) to trigger + * a multi sync. + * + * @param sync_req Sync request command. + * @param sync_tick The tick when sync is expected to happen in the sender. + */ + virtual void syncRaw(MsgType sync_req, Tick sync_tick) = 0; + /** + * The function executed by a receiver thread. + */ + void recvThreadFunc(); + /** + * Receive a multi header packet. Called by the receiver thread. + * @param header the structure to store the incoming header packet. + * @return false if any error occured during the receive, true otherwise + * + * A header packet can carry a control command (e.g. 'barrier leave') or + * information about a data packet that is following the header packet + * back to back. + */ + bool recvHeader(MultiHeaderPkt::Header &header); + /** + * Receive a data packet. Called by the receiver thread. + * @param data_header The packet descriptor for the expected incoming data + * packet. + */ + void recvData(const MultiHeaderPkt::Header &data_header); + + public: + + /** + * ctor + * @param multi_rank Rank of this gem5 process within the multi run + * @param sync_start Start tick for multi synchronisation + * @param sync_repeat Frequency for multi synchronisation + * @param em The event manager associated with the simulated Ethernet link + */ + MultiIface(unsigned multi_rank, + Tick sync_start, + Tick sync_repeat, + EventManager *em); + + virtual ~MultiIface(); + /** + * Send out an Ethernet packet. + * @param pkt The Ethernet packet to send. + * @param send_delay The delay in ticks for the send completion event. + */ + void packetOut(EthPacketPtr pkt, Tick send_delay); + /** + * Fetch the next packet from the receive queue. + */ + EthPacketPtr packetIn(); + + /** + * spawn the receiver thread. + * @param recv_done The receive done event associated with the simulated + * Ethernet link. + * @param link_delay The link delay for the simulated Ethernet link. + */ + void spawnRecvThread(Event *recv_done, + Tick link_delay); + /** + * Initialize the random number generator with a different seed in each + * peer gem5 process. + */ + void initRandom(); + + DrainState drain() override; + + /** + * Callback when draining is complete. + */ + void drainDone(); + + /** + * Initialize the periodic synchronisation among peer gem5 processes. + */ + void startPeriodicSync(); + + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); + +}; + + +#endif // __DEV_NET_MULTI_IFACE_HH__ diff --git a/src/dev/net/multi_packet.cc b/src/dev/net/multi_packet.cc new file mode 100644 index 000000000..85f76b0c4 --- /dev/null +++ b/src/dev/net/multi_packet.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * MultiHeaderPkt class to encapsulate multi-gem5 header packets + * + */ + +#include "dev/net/multi_packet.hh" + +#include +#include + +#include "base/inet.hh" + +unsigned +MultiHeaderPkt::maxAddressLength() +{ + return sizeof(AddressType); +} + +void +MultiHeaderPkt::clearAddress(AddressType &addr) +{ + std::memset(addr, 0, sizeof(addr)); +} + +bool +MultiHeaderPkt::isAddressEqual(const AddressType &addr1, + const AddressType &addr2) +{ + return (std::memcmp(addr1, addr2, sizeof(addr1)) == 0); +} + +bool +MultiHeaderPkt::isAddressLess(const AddressType &addr1, + const AddressType &addr2) +{ + return (std::memcmp(addr1, addr2, sizeof(addr1)) < 0); +} + +void +MultiHeaderPkt::copyAddress(AddressType &dest, const AddressType &src) +{ + std::memcpy(dest, src, sizeof(dest)); +} + +bool +MultiHeaderPkt::isBroadcastAddress(const AddressType &addr) +{ + return ((Net::EthAddr *)&addr)->broadcast(); +} + +bool +MultiHeaderPkt::isMulticastAddress(const AddressType &addr) +{ + return ((Net::EthAddr *)&addr)->multicast(); +} + +bool +MultiHeaderPkt::isUnicastAddress(const AddressType &addr) +{ + return ((Net::EthAddr *)&addr)->unicast(); +} diff --git a/src/dev/net/multi_packet.hh b/src/dev/net/multi_packet.hh new file mode 100644 index 000000000..3d8e85dfa --- /dev/null +++ b/src/dev/net/multi_packet.hh @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * Header packet class for multi gem5 runs. + * + * For a high level description about multi gem5 see comments in + * header file multi_iface.hh. + * + * The MultiHeaderPkt class defines the format of message headers + * sent among gem5 processes during a multi gem5 simulation. A header packet + * can either carry the description of data packet (i.e. a simulated Ethernet + * packet) or a synchronisation related control command. In case of + * data packet description, the corresponding data packet always follows + * the header packet back-to-back. + */ +#ifndef __DEV_NET_MULTI_PACKET_HH__ +#define __DEV_NET_MULTI_PACKET_HH__ + +#include + +#include "base/types.hh" + +class MultiHeaderPkt +{ + private: + MultiHeaderPkt() {} + ~MultiHeaderPkt() {} + + public: + /** + * Simply type to help with calculating space requirements for + * the corresponding header field. + */ + typedef uint8_t AddressType[6]; + + /** + * The msg type defines what informarion a multi header packet carries. + */ + enum class MsgType + { + dataDescriptor, + cmdPeriodicSyncReq, + cmdPeriodicSyncAck, + cmdCkptSyncReq, + cmdCkptSyncAck, + cmdAtomicSyncReq, + cmdAtomicSyncAck, + unknown + }; + + struct Header + { + /** + * The msg type field is valid for all header packets. In case of + * a synchronisation control command this is the only valid field. + */ + MsgType msgType; + Tick sendTick; + Tick sendDelay; + /** + * Actual length of the simulated Ethernet packet. + */ + unsigned dataPacketLength; + /** + * Source MAC address. + */ + AddressType srcAddress; + /** + * Destination MAC address. + */ + AddressType dstAddress; + }; + + static unsigned maxAddressLength(); + + /** + * Static functions for manipulating and comparing MAC addresses. + */ + static void clearAddress(AddressType &addr); + static bool isAddressEqual(const AddressType &addr1, + const AddressType &addr2); + static bool isAddressLess(const AddressType &addr1, + const AddressType &addr2); + + static void copyAddress(AddressType &dest, + const AddressType &src); + + static bool isUnicastAddress(const AddressType &addr); + static bool isMulticastAddress(const AddressType &addr); + static bool isBroadcastAddress(const AddressType &addr); +}; + +#endif // __DEV_NET_MULTI_PACKET_HH__ diff --git a/src/dev/net/ns_gige.cc b/src/dev/net/ns_gige.cc new file mode 100644 index 000000000..a1dc23b50 --- /dev/null +++ b/src/dev/net/ns_gige.cc @@ -0,0 +1,2483 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + * Lisa Hsu + */ + +/** @file + * Device module for modelling the National Semiconductor + * DP83820 ethernet controller. Does not support priority queueing + */ + +#include "dev/net/ns_gige.hh" + +#include +#include +#include + +#include "base/debug.hh" +#include "base/inet.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "debug/EthernetAll.hh" +#include "dev/net/etherlink.hh" +#include "mem/packet.hh" +#include "mem/packet_access.hh" +#include "params/NSGigE.hh" +#include "sim/system.hh" + +// clang complains about std::set being overloaded with Packet::set if +// we open up the entire namespace std +using std::make_shared; +using std::min; +using std::ostream; +using std::string; + +const char *NsRxStateStrings[] = +{ + "rxIdle", + "rxDescRefr", + "rxDescRead", + "rxFifoBlock", + "rxFragWrite", + "rxDescWrite", + "rxAdvance" +}; + +const char *NsTxStateStrings[] = +{ + "txIdle", + "txDescRefr", + "txDescRead", + "txFifoBlock", + "txFragRead", + "txDescWrite", + "txAdvance" +}; + +const char *NsDmaState[] = +{ + "dmaIdle", + "dmaReading", + "dmaWriting", + "dmaReadWaiting", + "dmaWriteWaiting" +}; + +using namespace Net; +using namespace TheISA; + +/////////////////////////////////////////////////////////////////////// +// +// NSGigE PCI Device +// +NSGigE::NSGigE(Params *p) + : EtherDevBase(p), ioEnable(false), + txFifo(p->tx_fifo_size), rxFifo(p->rx_fifo_size), + txPacket(0), rxPacket(0), txPacketBufPtr(NULL), rxPacketBufPtr(NULL), + txXferLen(0), rxXferLen(0), rxDmaFree(false), txDmaFree(false), + txState(txIdle), txEnable(false), CTDD(false), txHalt(false), + txFragPtr(0), txDescCnt(0), txDmaState(dmaIdle), rxState(rxIdle), + rxEnable(false), CRDD(false), rxPktBytes(0), rxHalt(false), + rxFragPtr(0), rxDescCnt(0), rxDmaState(dmaIdle), extstsEnable(false), + eepromState(eepromStart), eepromClk(false), eepromBitsToRx(0), + eepromOpcode(0), eepromAddress(0), eepromData(0), + dmaReadDelay(p->dma_read_delay), dmaWriteDelay(p->dma_write_delay), + dmaReadFactor(p->dma_read_factor), dmaWriteFactor(p->dma_write_factor), + rxDmaData(NULL), rxDmaAddr(0), rxDmaLen(0), + txDmaData(NULL), txDmaAddr(0), txDmaLen(0), + rxDmaReadEvent(this), rxDmaWriteEvent(this), + txDmaReadEvent(this), txDmaWriteEvent(this), + dmaDescFree(p->dma_desc_free), dmaDataFree(p->dma_data_free), + txDelay(p->tx_delay), rxDelay(p->rx_delay), + rxKickTick(0), rxKickEvent(this), txKickTick(0), txKickEvent(this), + txEvent(this), rxFilterEnable(p->rx_filter), + acceptBroadcast(false), acceptMulticast(false), acceptUnicast(false), + acceptPerfect(false), acceptArp(false), multicastHashEnable(false), + intrDelay(p->intr_delay), intrTick(0), cpuPendingIntr(false), + intrEvent(0), interface(0) +{ + + + interface = new NSGigEInt(name() + ".int0", this); + + regsReset(); + memcpy(&rom.perfectMatch, p->hardware_address.bytes(), ETH_ADDR_LEN); + + memset(&rxDesc32, 0, sizeof(rxDesc32)); + memset(&txDesc32, 0, sizeof(txDesc32)); + memset(&rxDesc64, 0, sizeof(rxDesc64)); + memset(&txDesc64, 0, sizeof(txDesc64)); +} + +NSGigE::~NSGigE() +{ + delete interface; +} + +/** + * This is to write to the PCI general configuration registers + */ +Tick +NSGigE::writeConfig(PacketPtr pkt) +{ + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; + if (offset < PCI_DEVICE_SPECIFIC) + PciDevice::writeConfig(pkt); + else + panic("Device specific PCI config space not implemented!\n"); + + switch (offset) { + // seems to work fine without all these PCI settings, but i + // put in the IO to double check, an assertion will fail if we + // need to properly implement it + case PCI_COMMAND: + if (config.data[offset] & PCI_CMD_IOSE) + ioEnable = true; + else + ioEnable = false; + break; + } + + return configDelay; +} + +EtherInt* +NSGigE::getEthPort(const std::string &if_name, int idx) +{ + if (if_name == "interface") { + if (interface->getPeer()) + panic("interface already connected to\n"); + return interface; + } + return NULL; +} + +/** + * This reads the device registers, which are detailed in the NS83820 + * spec sheet + */ +Tick +NSGigE::read(PacketPtr pkt) +{ + assert(ioEnable); + + //The mask is to give you only the offset into the device register file + Addr daddr = pkt->getAddr() & 0xfff; + DPRINTF(EthernetPIO, "read da=%#x pa=%#x size=%d\n", + daddr, pkt->getAddr(), pkt->getSize()); + + + // there are some reserved registers, you can see ns_gige_reg.h and + // the spec sheet for details + if (daddr > LAST && daddr <= RESERVED) { + panic("Accessing reserved register"); + } else if (daddr > RESERVED && daddr <= 0x3FC) { + return readConfig(pkt); + } else if (daddr >= MIB_START && daddr <= MIB_END) { + // don't implement all the MIB's. hopefully the kernel + // doesn't actually DEPEND upon their values + // MIB are just hardware stats keepers + pkt->set(0); + pkt->makeAtomicResponse(); + return pioDelay; + } else if (daddr > 0x3FC) + panic("Something is messed up!\n"); + + assert(pkt->getSize() == sizeof(uint32_t)); + uint32_t ® = *pkt->getPtr(); + uint16_t rfaddr; + + switch (daddr) { + case CR: + reg = regs.command; + //these are supposed to be cleared on a read + reg &= ~(CR_RXD | CR_TXD | CR_TXR | CR_RXR); + break; + + case CFGR: + reg = regs.config; + break; + + case MEAR: + reg = regs.mear; + break; + + case PTSCR: + reg = regs.ptscr; + break; + + case ISR: + reg = regs.isr; + devIntrClear(ISR_ALL); + break; + + case IMR: + reg = regs.imr; + break; + + case IER: + reg = regs.ier; + break; + + case IHR: + reg = regs.ihr; + break; + + case TXDP: + reg = regs.txdp; + break; + + case TXDP_HI: + reg = regs.txdp_hi; + break; + + case TX_CFG: + reg = regs.txcfg; + break; + + case GPIOR: + reg = regs.gpior; + break; + + case RXDP: + reg = regs.rxdp; + break; + + case RXDP_HI: + reg = regs.rxdp_hi; + break; + + case RX_CFG: + reg = regs.rxcfg; + break; + + case PQCR: + reg = regs.pqcr; + break; + + case WCSR: + reg = regs.wcsr; + break; + + case PCR: + reg = regs.pcr; + break; + + // see the spec sheet for how RFCR and RFDR work + // basically, you write to RFCR to tell the machine + // what you want to do next, then you act upon RFDR, + // and the device will be prepared b/c of what you + // wrote to RFCR + case RFCR: + reg = regs.rfcr; + break; + + case RFDR: + rfaddr = (uint16_t)(regs.rfcr & RFCR_RFADDR); + switch (rfaddr) { + // Read from perfect match ROM octets + case 0x000: + reg = rom.perfectMatch[1]; + reg = reg << 8; + reg += rom.perfectMatch[0]; + break; + case 0x002: + reg = rom.perfectMatch[3] << 8; + reg += rom.perfectMatch[2]; + break; + case 0x004: + reg = rom.perfectMatch[5] << 8; + reg += rom.perfectMatch[4]; + break; + default: + // Read filter hash table + if (rfaddr >= FHASH_ADDR && + rfaddr < FHASH_ADDR + FHASH_SIZE) { + + // Only word-aligned reads supported + if (rfaddr % 2) + panic("unaligned read from filter hash table!"); + + reg = rom.filterHash[rfaddr - FHASH_ADDR + 1] << 8; + reg += rom.filterHash[rfaddr - FHASH_ADDR]; + break; + } + + panic("reading RFDR for something other than pattern" + " matching or hashing! %#x\n", rfaddr); + } + break; + + case SRR: + reg = regs.srr; + break; + + case MIBC: + reg = regs.mibc; + reg &= ~(MIBC_MIBS | MIBC_ACLR); + break; + + case VRCR: + reg = regs.vrcr; + break; + + case VTCR: + reg = regs.vtcr; + break; + + case VDR: + reg = regs.vdr; + break; + + case CCSR: + reg = regs.ccsr; + break; + + case TBICR: + reg = regs.tbicr; + break; + + case TBISR: + reg = regs.tbisr; + break; + + case TANAR: + reg = regs.tanar; + break; + + case TANLPAR: + reg = regs.tanlpar; + break; + + case TANER: + reg = regs.taner; + break; + + case TESR: + reg = regs.tesr; + break; + + case M5REG: + reg = 0; + if (params()->rx_thread) + reg |= M5REG_RX_THREAD; + if (params()->tx_thread) + reg |= M5REG_TX_THREAD; + if (params()->rss) + reg |= M5REG_RSS; + break; + + default: + panic("reading unimplemented register: addr=%#x", daddr); + } + + DPRINTF(EthernetPIO, "read from %#x: data=%d data=%#x\n", + daddr, reg, reg); + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +NSGigE::write(PacketPtr pkt) +{ + assert(ioEnable); + + Addr daddr = pkt->getAddr() & 0xfff; + DPRINTF(EthernetPIO, "write da=%#x pa=%#x size=%d\n", + daddr, pkt->getAddr(), pkt->getSize()); + + if (daddr > LAST && daddr <= RESERVED) { + panic("Accessing reserved register"); + } else if (daddr > RESERVED && daddr <= 0x3FC) { + return writeConfig(pkt); + } else if (daddr > 0x3FC) + panic("Something is messed up!\n"); + + if (pkt->getSize() == sizeof(uint32_t)) { + uint32_t reg = pkt->get(); + uint16_t rfaddr; + + DPRINTF(EthernetPIO, "write data=%d data=%#x\n", reg, reg); + + switch (daddr) { + case CR: + regs.command = reg; + if (reg & CR_TXD) { + txEnable = false; + } else if (reg & CR_TXE) { + txEnable = true; + + // the kernel is enabling the transmit machine + if (txState == txIdle) + txKick(); + } + + if (reg & CR_RXD) { + rxEnable = false; + } else if (reg & CR_RXE) { + rxEnable = true; + + if (rxState == rxIdle) + rxKick(); + } + + if (reg & CR_TXR) + txReset(); + + if (reg & CR_RXR) + rxReset(); + + if (reg & CR_SWI) + devIntrPost(ISR_SWI); + + if (reg & CR_RST) { + txReset(); + rxReset(); + + regsReset(); + } + break; + + case CFGR: + if (reg & CFGR_LNKSTS || + reg & CFGR_SPDSTS || + reg & CFGR_DUPSTS || + reg & CFGR_RESERVED || + reg & CFGR_T64ADDR || + reg & CFGR_PCI64_DET) { + // First clear all writable bits + regs.config &= CFGR_LNKSTS | CFGR_SPDSTS | CFGR_DUPSTS | + CFGR_RESERVED | CFGR_T64ADDR | + CFGR_PCI64_DET; + // Now set the appropriate writable bits + regs.config |= reg & ~(CFGR_LNKSTS | CFGR_SPDSTS | CFGR_DUPSTS | + CFGR_RESERVED | CFGR_T64ADDR | + CFGR_PCI64_DET); + } + +// all these #if 0's are because i don't THINK the kernel needs to +// have these implemented. if there is a problem relating to one of +// these, you may need to add functionality in. + +// grouped together and #if 0'ed to avoid empty if body and make clang happy +#if 0 + if (reg & CFGR_TBI_EN) ; + if (reg & CFGR_MODE_1000) ; + + if (reg & CFGR_PINT_DUPSTS || + reg & CFGR_PINT_LNKSTS || + reg & CFGR_PINT_SPDSTS) + ; + + if (reg & CFGR_TMRTEST) ; + if (reg & CFGR_MRM_DIS) ; + if (reg & CFGR_MWI_DIS) ; + + if (reg & CFGR_DATA64_EN) ; + if (reg & CFGR_M64ADDR) ; + if (reg & CFGR_PHY_RST) ; + if (reg & CFGR_PHY_DIS) ; + + if (reg & CFGR_REQALG) ; + if (reg & CFGR_SB) ; + if (reg & CFGR_POW) ; + if (reg & CFGR_EXD) ; + if (reg & CFGR_PESEL) ; + if (reg & CFGR_BROM_DIS) ; + if (reg & CFGR_EXT_125) ; + if (reg & CFGR_BEM) ; + + if (reg & CFGR_T64ADDR) ; + // panic("CFGR_T64ADDR is read only register!\n"); +#endif + if (reg & CFGR_AUTO_1000) + panic("CFGR_AUTO_1000 not implemented!\n"); + + if (reg & CFGR_PCI64_DET) + panic("CFGR_PCI64_DET is read only register!\n"); + + if (reg & CFGR_EXTSTS_EN) + extstsEnable = true; + else + extstsEnable = false; + break; + + case MEAR: + // Clear writable bits + regs.mear &= MEAR_EEDO; + // Set appropriate writable bits + regs.mear |= reg & ~MEAR_EEDO; + + // FreeBSD uses the EEPROM to read PMATCH (for the MAC address) + // even though it could get it through RFDR + if (reg & MEAR_EESEL) { + // Rising edge of clock + if (reg & MEAR_EECLK && !eepromClk) + eepromKick(); + } + else { + eepromState = eepromStart; + regs.mear &= ~MEAR_EEDI; + } + + eepromClk = reg & MEAR_EECLK; + + // since phy is completely faked, MEAR_MD* don't matter + +// grouped together and #if 0'ed to avoid empty if body and make clang happy +#if 0 + if (reg & MEAR_MDIO) ; + if (reg & MEAR_MDDIR) ; + if (reg & MEAR_MDC) ; +#endif + break; + + case PTSCR: + regs.ptscr = reg & ~(PTSCR_RBIST_RDONLY); + // these control BISTs for various parts of chip - we + // don't care or do just fake that the BIST is done + if (reg & PTSCR_RBIST_EN) + regs.ptscr |= PTSCR_RBIST_DONE; + if (reg & PTSCR_EEBIST_EN) + regs.ptscr &= ~PTSCR_EEBIST_EN; + if (reg & PTSCR_EELOAD_EN) + regs.ptscr &= ~PTSCR_EELOAD_EN; + break; + + case ISR: /* writing to the ISR has no effect */ + panic("ISR is a read only register!\n"); + + case IMR: + regs.imr = reg; + devIntrChangeMask(); + break; + + case IER: + regs.ier = reg; + break; + + case IHR: + regs.ihr = reg; + /* not going to implement real interrupt holdoff */ + break; + + case TXDP: + regs.txdp = (reg & 0xFFFFFFFC); + assert(txState == txIdle); + CTDD = false; + break; + + case TXDP_HI: + regs.txdp_hi = reg; + break; + + case TX_CFG: + regs.txcfg = reg; +#if 0 + if (reg & TX_CFG_CSI) ; + if (reg & TX_CFG_HBI) ; + if (reg & TX_CFG_MLB) ; + if (reg & TX_CFG_ATP) ; + if (reg & TX_CFG_ECRETRY) { + /* + * this could easily be implemented, but considering + * the network is just a fake pipe, wouldn't make + * sense to do this + */ + } + + if (reg & TX_CFG_BRST_DIS) ; +#endif + +#if 0 + /* we handle our own DMA, ignore the kernel's exhortations */ + if (reg & TX_CFG_MXDMA) ; +#endif + + // also, we currently don't care about fill/drain + // thresholds though this may change in the future with + // more realistic networks or a driver which changes it + // according to feedback + + break; + + case GPIOR: + // Only write writable bits + regs.gpior &= GPIOR_UNUSED | GPIOR_GP5_IN | GPIOR_GP4_IN + | GPIOR_GP3_IN | GPIOR_GP2_IN | GPIOR_GP1_IN; + regs.gpior |= reg & ~(GPIOR_UNUSED | GPIOR_GP5_IN | GPIOR_GP4_IN + | GPIOR_GP3_IN | GPIOR_GP2_IN | GPIOR_GP1_IN); + /* these just control general purpose i/o pins, don't matter */ + break; + + case RXDP: + regs.rxdp = reg; + CRDD = false; + break; + + case RXDP_HI: + regs.rxdp_hi = reg; + break; + + case RX_CFG: + regs.rxcfg = reg; +#if 0 + if (reg & RX_CFG_AEP) ; + if (reg & RX_CFG_ARP) ; + if (reg & RX_CFG_STRIPCRC) ; + if (reg & RX_CFG_RX_RD) ; + if (reg & RX_CFG_ALP) ; + if (reg & RX_CFG_AIRL) ; + + /* we handle our own DMA, ignore what kernel says about it */ + if (reg & RX_CFG_MXDMA) ; + + //also, we currently don't care about fill/drain thresholds + //though this may change in the future with more realistic + //networks or a driver which changes it according to feedback + if (reg & (RX_CFG_DRTH | RX_CFG_DRTH0)) ; +#endif + break; + + case PQCR: + /* there is no priority queueing used in the linux 2.6 driver */ + regs.pqcr = reg; + break; + + case WCSR: + /* not going to implement wake on LAN */ + regs.wcsr = reg; + break; + + case PCR: + /* not going to implement pause control */ + regs.pcr = reg; + break; + + case RFCR: + regs.rfcr = reg; + + rxFilterEnable = (reg & RFCR_RFEN) ? true : false; + acceptBroadcast = (reg & RFCR_AAB) ? true : false; + acceptMulticast = (reg & RFCR_AAM) ? true : false; + acceptUnicast = (reg & RFCR_AAU) ? true : false; + acceptPerfect = (reg & RFCR_APM) ? true : false; + acceptArp = (reg & RFCR_AARP) ? true : false; + multicastHashEnable = (reg & RFCR_MHEN) ? true : false; + +#if 0 + if (reg & RFCR_APAT) + panic("RFCR_APAT not implemented!\n"); +#endif + if (reg & RFCR_UHEN) + panic("Unicast hash filtering not used by drivers!\n"); + + if (reg & RFCR_ULM) + panic("RFCR_ULM not implemented!\n"); + + break; + + case RFDR: + rfaddr = (uint16_t)(regs.rfcr & RFCR_RFADDR); + switch (rfaddr) { + case 0x000: + rom.perfectMatch[0] = (uint8_t)reg; + rom.perfectMatch[1] = (uint8_t)(reg >> 8); + break; + case 0x002: + rom.perfectMatch[2] = (uint8_t)reg; + rom.perfectMatch[3] = (uint8_t)(reg >> 8); + break; + case 0x004: + rom.perfectMatch[4] = (uint8_t)reg; + rom.perfectMatch[5] = (uint8_t)(reg >> 8); + break; + default: + + if (rfaddr >= FHASH_ADDR && + rfaddr < FHASH_ADDR + FHASH_SIZE) { + + // Only word-aligned writes supported + if (rfaddr % 2) + panic("unaligned write to filter hash table!"); + + rom.filterHash[rfaddr - FHASH_ADDR] = (uint8_t)reg; + rom.filterHash[rfaddr - FHASH_ADDR + 1] + = (uint8_t)(reg >> 8); + break; + } + panic("writing RFDR for something other than pattern matching " + "or hashing! %#x\n", rfaddr); + } + + case BRAR: + regs.brar = reg; + break; + + case BRDR: + panic("the driver never uses BRDR, something is wrong!\n"); + + case SRR: + panic("SRR is read only register!\n"); + + case MIBC: + panic("the driver never uses MIBC, something is wrong!\n"); + + case VRCR: + regs.vrcr = reg; + break; + + case VTCR: + regs.vtcr = reg; + break; + + case VDR: + panic("the driver never uses VDR, something is wrong!\n"); + + case CCSR: + /* not going to implement clockrun stuff */ + regs.ccsr = reg; + break; + + case TBICR: + regs.tbicr = reg; + if (reg & TBICR_MR_LOOPBACK) + panic("TBICR_MR_LOOPBACK never used, something wrong!\n"); + + if (reg & TBICR_MR_AN_ENABLE) { + regs.tanlpar = regs.tanar; + regs.tbisr |= (TBISR_MR_AN_COMPLETE | TBISR_MR_LINK_STATUS); + } + +#if 0 + if (reg & TBICR_MR_RESTART_AN) ; +#endif + + break; + + case TBISR: + panic("TBISR is read only register!\n"); + + case TANAR: + // Only write the writable bits + regs.tanar &= TANAR_RF1 | TANAR_RF2 | TANAR_UNUSED; + regs.tanar |= reg & ~(TANAR_RF1 | TANAR_RF2 | TANAR_UNUSED); + + // Pause capability unimplemented +#if 0 + if (reg & TANAR_PS2) ; + if (reg & TANAR_PS1) ; +#endif + + break; + + case TANLPAR: + panic("this should only be written to by the fake phy!\n"); + + case TANER: + panic("TANER is read only register!\n"); + + case TESR: + regs.tesr = reg; + break; + + default: + panic("invalid register access daddr=%#x", daddr); + } + } else { + panic("Invalid Request Size"); + } + pkt->makeAtomicResponse(); + return pioDelay; +} + +void +NSGigE::devIntrPost(uint32_t interrupts) +{ + if (interrupts & ISR_RESERVE) + panic("Cannot set a reserved interrupt"); + + if (interrupts & ISR_NOIMPL) + warn("interrupt not implemented %#x\n", interrupts); + + interrupts &= ISR_IMPL; + regs.isr |= interrupts; + + if (interrupts & regs.imr) { + if (interrupts & ISR_SWI) { + totalSwi++; + } + if (interrupts & ISR_RXIDLE) { + totalRxIdle++; + } + if (interrupts & ISR_RXOK) { + totalRxOk++; + } + if (interrupts & ISR_RXDESC) { + totalRxDesc++; + } + if (interrupts & ISR_TXOK) { + totalTxOk++; + } + if (interrupts & ISR_TXIDLE) { + totalTxIdle++; + } + if (interrupts & ISR_TXDESC) { + totalTxDesc++; + } + if (interrupts & ISR_RXORN) { + totalRxOrn++; + } + } + + DPRINTF(EthernetIntr, + "interrupt written to ISR: intr=%#x isr=%#x imr=%#x\n", + interrupts, regs.isr, regs.imr); + + if ((regs.isr & regs.imr)) { + Tick when = curTick(); + if ((regs.isr & regs.imr & ISR_NODELAY) == 0) + when += intrDelay; + postedInterrupts++; + cpuIntrPost(when); + } +} + +/* writing this interrupt counting stats inside this means that this function + is now limited to being used to clear all interrupts upon the kernel + reading isr and servicing. just telling you in case you were thinking + of expanding use. +*/ +void +NSGigE::devIntrClear(uint32_t interrupts) +{ + if (interrupts & ISR_RESERVE) + panic("Cannot clear a reserved interrupt"); + + if (regs.isr & regs.imr & ISR_SWI) { + postedSwi++; + } + if (regs.isr & regs.imr & ISR_RXIDLE) { + postedRxIdle++; + } + if (regs.isr & regs.imr & ISR_RXOK) { + postedRxOk++; + } + if (regs.isr & regs.imr & ISR_RXDESC) { + postedRxDesc++; + } + if (regs.isr & regs.imr & ISR_TXOK) { + postedTxOk++; + } + if (regs.isr & regs.imr & ISR_TXIDLE) { + postedTxIdle++; + } + if (regs.isr & regs.imr & ISR_TXDESC) { + postedTxDesc++; + } + if (regs.isr & regs.imr & ISR_RXORN) { + postedRxOrn++; + } + + interrupts &= ~ISR_NOIMPL; + regs.isr &= ~interrupts; + + DPRINTF(EthernetIntr, + "interrupt cleared from ISR: intr=%x isr=%x imr=%x\n", + interrupts, regs.isr, regs.imr); + + if (!(regs.isr & regs.imr)) + cpuIntrClear(); +} + +void +NSGigE::devIntrChangeMask() +{ + DPRINTF(EthernetIntr, "interrupt mask changed: isr=%x imr=%x masked=%x\n", + regs.isr, regs.imr, regs.isr & regs.imr); + + if (regs.isr & regs.imr) + cpuIntrPost(curTick()); + else + cpuIntrClear(); +} + +void +NSGigE::cpuIntrPost(Tick when) +{ + // If the interrupt you want to post is later than an interrupt + // already scheduled, just let it post in the coming one and don't + // schedule another. + // HOWEVER, must be sure that the scheduled intrTick is in the + // future (this was formerly the source of a bug) + /** + * @todo this warning should be removed and the intrTick code should + * be fixed. + */ + assert(when >= curTick()); + assert(intrTick >= curTick() || intrTick == 0); + if (when > intrTick && intrTick != 0) { + DPRINTF(EthernetIntr, "don't need to schedule event...intrTick=%d\n", + intrTick); + return; + } + + intrTick = when; + if (intrTick < curTick()) { + Debug::breakpoint(); + intrTick = curTick(); + } + + DPRINTF(EthernetIntr, "going to schedule an interrupt for intrTick=%d\n", + intrTick); + + if (intrEvent) + intrEvent->squash(); + intrEvent = new IntrEvent(this, true); + schedule(intrEvent, intrTick); +} + +void +NSGigE::cpuInterrupt() +{ + assert(intrTick == curTick()); + + // Whether or not there's a pending interrupt, we don't care about + // it anymore + intrEvent = 0; + intrTick = 0; + + // Don't send an interrupt if there's already one + if (cpuPendingIntr) { + DPRINTF(EthernetIntr, + "would send an interrupt now, but there's already pending\n"); + } else { + // Send interrupt + cpuPendingIntr = true; + + DPRINTF(EthernetIntr, "posting interrupt\n"); + intrPost(); + } +} + +void +NSGigE::cpuIntrClear() +{ + if (!cpuPendingIntr) + return; + + if (intrEvent) { + intrEvent->squash(); + intrEvent = 0; + } + + intrTick = 0; + + cpuPendingIntr = false; + + DPRINTF(EthernetIntr, "clearing interrupt\n"); + intrClear(); +} + +bool +NSGigE::cpuIntrPending() const +{ return cpuPendingIntr; } + +void +NSGigE::txReset() +{ + + DPRINTF(Ethernet, "transmit reset\n"); + + CTDD = false; + txEnable = false;; + txFragPtr = 0; + assert(txDescCnt == 0); + txFifo.clear(); + txState = txIdle; + assert(txDmaState == dmaIdle); +} + +void +NSGigE::rxReset() +{ + DPRINTF(Ethernet, "receive reset\n"); + + CRDD = false; + assert(rxPktBytes == 0); + rxEnable = false; + rxFragPtr = 0; + assert(rxDescCnt == 0); + assert(rxDmaState == dmaIdle); + rxFifo.clear(); + rxState = rxIdle; +} + +void +NSGigE::regsReset() +{ + memset(®s, 0, sizeof(regs)); + regs.config = (CFGR_LNKSTS | CFGR_TBI_EN | CFGR_MODE_1000); + regs.mear = 0x12; + regs.txcfg = 0x120; // set drain threshold to 1024 bytes and + // fill threshold to 32 bytes + regs.rxcfg = 0x4; // set drain threshold to 16 bytes + regs.srr = 0x0103; // set the silicon revision to rev B or 0x103 + regs.mibc = MIBC_FRZ; + regs.vdr = 0x81; // set the vlan tag type to 802.1q + regs.tesr = 0xc000; // TBI capable of both full and half duplex + regs.brar = 0xffffffff; + + extstsEnable = false; + acceptBroadcast = false; + acceptMulticast = false; + acceptUnicast = false; + acceptPerfect = false; + acceptArp = false; +} + +bool +NSGigE::doRxDmaRead() +{ + assert(rxDmaState == dmaIdle || rxDmaState == dmaReadWaiting); + rxDmaState = dmaReading; + + if (dmaPending() || drainState() != DrainState::Running) + rxDmaState = dmaReadWaiting; + else + dmaRead(rxDmaAddr, rxDmaLen, &rxDmaReadEvent, (uint8_t*)rxDmaData); + + return true; +} + +void +NSGigE::rxDmaReadDone() +{ + assert(rxDmaState == dmaReading); + rxDmaState = dmaIdle; + + DPRINTF(EthernetDMA, "rx dma read paddr=%#x len=%d\n", + rxDmaAddr, rxDmaLen); + DDUMP(EthernetDMA, rxDmaData, rxDmaLen); + + // If the transmit state machine has a pending DMA, let it go first + if (txDmaState == dmaReadWaiting || txDmaState == dmaWriteWaiting) + txKick(); + + rxKick(); +} + +bool +NSGigE::doRxDmaWrite() +{ + assert(rxDmaState == dmaIdle || rxDmaState == dmaWriteWaiting); + rxDmaState = dmaWriting; + + if (dmaPending() || drainState() != DrainState::Running) + rxDmaState = dmaWriteWaiting; + else + dmaWrite(rxDmaAddr, rxDmaLen, &rxDmaWriteEvent, (uint8_t*)rxDmaData); + return true; +} + +void +NSGigE::rxDmaWriteDone() +{ + assert(rxDmaState == dmaWriting); + rxDmaState = dmaIdle; + + DPRINTF(EthernetDMA, "rx dma write paddr=%#x len=%d\n", + rxDmaAddr, rxDmaLen); + DDUMP(EthernetDMA, rxDmaData, rxDmaLen); + + // If the transmit state machine has a pending DMA, let it go first + if (txDmaState == dmaReadWaiting || txDmaState == dmaWriteWaiting) + txKick(); + + rxKick(); +} + +void +NSGigE::rxKick() +{ + bool is64bit = (bool)(regs.config & CFGR_M64ADDR); + + DPRINTF(EthernetSM, + "receive kick rxState=%s (rxBuf.size=%d) %d-bit\n", + NsRxStateStrings[rxState], rxFifo.size(), is64bit ? 64 : 32); + + Addr link, bufptr; + uint32_t &cmdsts = is64bit ? rxDesc64.cmdsts : rxDesc32.cmdsts; + uint32_t &extsts = is64bit ? rxDesc64.extsts : rxDesc32.extsts; + + next: + if (rxKickTick > curTick()) { + DPRINTF(EthernetSM, "receive kick exiting, can't run till %d\n", + rxKickTick); + + goto exit; + } + + // Go to the next state machine clock tick. + rxKickTick = clockEdge(Cycles(1)); + + switch(rxDmaState) { + case dmaReadWaiting: + if (doRxDmaRead()) + goto exit; + break; + case dmaWriteWaiting: + if (doRxDmaWrite()) + goto exit; + break; + default: + break; + } + + link = is64bit ? (Addr)rxDesc64.link : (Addr)rxDesc32.link; + bufptr = is64bit ? (Addr)rxDesc64.bufptr : (Addr)rxDesc32.bufptr; + + // see state machine from spec for details + // the way this works is, if you finish work on one state and can + // go directly to another, you do that through jumping to the + // label "next". however, if you have intermediate work, like DMA + // so that you can't go to the next state yet, you go to exit and + // exit the loop. however, when the DMA is done it will trigger + // an event and come back to this loop. + switch (rxState) { + case rxIdle: + if (!rxEnable) { + DPRINTF(EthernetSM, "Receive Disabled! Nothing to do.\n"); + goto exit; + } + + if (CRDD) { + rxState = rxDescRefr; + + rxDmaAddr = regs.rxdp & 0x3fffffff; + rxDmaData = + is64bit ? (void *)&rxDesc64.link : (void *)&rxDesc32.link; + rxDmaLen = is64bit ? sizeof(rxDesc64.link) : sizeof(rxDesc32.link); + rxDmaFree = dmaDescFree; + + descDmaReads++; + descDmaRdBytes += rxDmaLen; + + if (doRxDmaRead()) + goto exit; + } else { + rxState = rxDescRead; + + rxDmaAddr = regs.rxdp & 0x3fffffff; + rxDmaData = is64bit ? (void *)&rxDesc64 : (void *)&rxDesc32; + rxDmaLen = is64bit ? sizeof(rxDesc64) : sizeof(rxDesc32); + rxDmaFree = dmaDescFree; + + descDmaReads++; + descDmaRdBytes += rxDmaLen; + + if (doRxDmaRead()) + goto exit; + } + break; + + case rxDescRefr: + if (rxDmaState != dmaIdle) + goto exit; + + rxState = rxAdvance; + break; + + case rxDescRead: + if (rxDmaState != dmaIdle) + goto exit; + + DPRINTF(EthernetDesc, "rxDesc: addr=%08x read descriptor\n", + regs.rxdp & 0x3fffffff); + DPRINTF(EthernetDesc, + "rxDesc: link=%#x bufptr=%#x cmdsts=%08x extsts=%08x\n", + link, bufptr, cmdsts, extsts); + + if (cmdsts & CMDSTS_OWN) { + devIntrPost(ISR_RXIDLE); + rxState = rxIdle; + goto exit; + } else { + rxState = rxFifoBlock; + rxFragPtr = bufptr; + rxDescCnt = cmdsts & CMDSTS_LEN_MASK; + } + break; + + case rxFifoBlock: + if (!rxPacket) { + /** + * @todo in reality, we should be able to start processing + * the packet as it arrives, and not have to wait for the + * full packet ot be in the receive fifo. + */ + if (rxFifo.empty()) + goto exit; + + DPRINTF(EthernetSM, "****processing receive of new packet****\n"); + + // If we don't have a packet, grab a new one from the fifo. + rxPacket = rxFifo.front(); + rxPktBytes = rxPacket->length; + rxPacketBufPtr = rxPacket->data; + +#if TRACING_ON + if (DTRACE(Ethernet)) { + IpPtr ip(rxPacket); + if (ip) { + DPRINTF(Ethernet, "ID is %d\n", ip->id()); + TcpPtr tcp(ip); + if (tcp) { + DPRINTF(Ethernet, + "Src Port=%d, Dest Port=%d, Seq=%d, Ack=%d\n", + tcp->sport(), tcp->dport(), tcp->seq(), + tcp->ack()); + } + } + } +#endif + + // sanity check - i think the driver behaves like this + assert(rxDescCnt >= rxPktBytes); + rxFifo.pop(); + } + + + // dont' need the && rxDescCnt > 0 if driver sanity check + // above holds + if (rxPktBytes > 0) { + rxState = rxFragWrite; + // don't need min<>(rxPktBytes,rxDescCnt) if above sanity + // check holds + rxXferLen = rxPktBytes; + + rxDmaAddr = rxFragPtr & 0x3fffffff; + rxDmaData = rxPacketBufPtr; + rxDmaLen = rxXferLen; + rxDmaFree = dmaDataFree; + + if (doRxDmaWrite()) + goto exit; + + } else { + rxState = rxDescWrite; + + //if (rxPktBytes == 0) { /* packet is done */ + assert(rxPktBytes == 0); + DPRINTF(EthernetSM, "done with receiving packet\n"); + + cmdsts |= CMDSTS_OWN; + cmdsts &= ~CMDSTS_MORE; + cmdsts |= CMDSTS_OK; + cmdsts &= 0xffff0000; + cmdsts += rxPacket->length; //i.e. set CMDSTS_SIZE + +#if 0 + /* + * all the driver uses these are for its own stats keeping + * which we don't care about, aren't necessary for + * functionality and doing this would just slow us down. + * if they end up using this in a later version for + * functional purposes, just undef + */ + if (rxFilterEnable) { + cmdsts &= ~CMDSTS_DEST_MASK; + const EthAddr &dst = rxFifoFront()->dst(); + if (dst->unicast()) + cmdsts |= CMDSTS_DEST_SELF; + if (dst->multicast()) + cmdsts |= CMDSTS_DEST_MULTI; + if (dst->broadcast()) + cmdsts |= CMDSTS_DEST_MASK; + } +#endif + + IpPtr ip(rxPacket); + if (extstsEnable && ip) { + extsts |= EXTSTS_IPPKT; + rxIpChecksums++; + if (cksum(ip) != 0) { + DPRINTF(EthernetCksum, "Rx IP Checksum Error\n"); + extsts |= EXTSTS_IPERR; + } + TcpPtr tcp(ip); + UdpPtr udp(ip); + if (tcp) { + extsts |= EXTSTS_TCPPKT; + rxTcpChecksums++; + if (cksum(tcp) != 0) { + DPRINTF(EthernetCksum, "Rx TCP Checksum Error\n"); + extsts |= EXTSTS_TCPERR; + + } + } else if (udp) { + extsts |= EXTSTS_UDPPKT; + rxUdpChecksums++; + if (cksum(udp) != 0) { + DPRINTF(EthernetCksum, "Rx UDP Checksum Error\n"); + extsts |= EXTSTS_UDPERR; + } + } + } + rxPacket = 0; + + /* + * the driver seems to always receive into desc buffers + * of size 1514, so you never have a pkt that is split + * into multiple descriptors on the receive side, so + * i don't implement that case, hence the assert above. + */ + + DPRINTF(EthernetDesc, + "rxDesc: addr=%08x writeback cmdsts extsts\n", + regs.rxdp & 0x3fffffff); + DPRINTF(EthernetDesc, + "rxDesc: link=%#x bufptr=%#x cmdsts=%08x extsts=%08x\n", + link, bufptr, cmdsts, extsts); + + rxDmaAddr = regs.rxdp & 0x3fffffff; + rxDmaData = &cmdsts; + if (is64bit) { + rxDmaAddr += offsetof(ns_desc64, cmdsts); + rxDmaLen = sizeof(rxDesc64.cmdsts) + sizeof(rxDesc64.extsts); + } else { + rxDmaAddr += offsetof(ns_desc32, cmdsts); + rxDmaLen = sizeof(rxDesc32.cmdsts) + sizeof(rxDesc32.extsts); + } + rxDmaFree = dmaDescFree; + + descDmaWrites++; + descDmaWrBytes += rxDmaLen; + + if (doRxDmaWrite()) + goto exit; + } + break; + + case rxFragWrite: + if (rxDmaState != dmaIdle) + goto exit; + + rxPacketBufPtr += rxXferLen; + rxFragPtr += rxXferLen; + rxPktBytes -= rxXferLen; + + rxState = rxFifoBlock; + break; + + case rxDescWrite: + if (rxDmaState != dmaIdle) + goto exit; + + assert(cmdsts & CMDSTS_OWN); + + assert(rxPacket == 0); + devIntrPost(ISR_RXOK); + + if (cmdsts & CMDSTS_INTR) + devIntrPost(ISR_RXDESC); + + if (!rxEnable) { + DPRINTF(EthernetSM, "Halting the RX state machine\n"); + rxState = rxIdle; + goto exit; + } else + rxState = rxAdvance; + break; + + case rxAdvance: + if (link == 0) { + devIntrPost(ISR_RXIDLE); + rxState = rxIdle; + CRDD = true; + goto exit; + } else { + if (rxDmaState != dmaIdle) + goto exit; + rxState = rxDescRead; + regs.rxdp = link; + CRDD = false; + + rxDmaAddr = regs.rxdp & 0x3fffffff; + rxDmaData = is64bit ? (void *)&rxDesc64 : (void *)&rxDesc32; + rxDmaLen = is64bit ? sizeof(rxDesc64) : sizeof(rxDesc32); + rxDmaFree = dmaDescFree; + + if (doRxDmaRead()) + goto exit; + } + break; + + default: + panic("Invalid rxState!"); + } + + DPRINTF(EthernetSM, "entering next rxState=%s\n", + NsRxStateStrings[rxState]); + goto next; + + exit: + /** + * @todo do we want to schedule a future kick? + */ + DPRINTF(EthernetSM, "rx state machine exited rxState=%s\n", + NsRxStateStrings[rxState]); + + if (!rxKickEvent.scheduled()) + schedule(rxKickEvent, rxKickTick); +} + +void +NSGigE::transmit() +{ + if (txFifo.empty()) { + DPRINTF(Ethernet, "nothing to transmit\n"); + return; + } + + DPRINTF(Ethernet, "Attempt Pkt Transmit: txFifo length=%d\n", + txFifo.size()); + if (interface->sendPacket(txFifo.front())) { +#if TRACING_ON + if (DTRACE(Ethernet)) { + IpPtr ip(txFifo.front()); + if (ip) { + DPRINTF(Ethernet, "ID is %d\n", ip->id()); + TcpPtr tcp(ip); + if (tcp) { + DPRINTF(Ethernet, + "Src Port=%d, Dest Port=%d, Seq=%d, Ack=%d\n", + tcp->sport(), tcp->dport(), tcp->seq(), + tcp->ack()); + } + } + } +#endif + + DDUMP(EthernetData, txFifo.front()->data, txFifo.front()->length); + txBytes += txFifo.front()->length; + txPackets++; + + DPRINTF(Ethernet, "Successful Xmit! now txFifoAvail is %d\n", + txFifo.avail()); + txFifo.pop(); + + /* + * normally do a writeback of the descriptor here, and ONLY + * after that is done, send this interrupt. but since our + * stuff never actually fails, just do this interrupt here, + * otherwise the code has to stray from this nice format. + * besides, it's functionally the same. + */ + devIntrPost(ISR_TXOK); + } + + if (!txFifo.empty() && !txEvent.scheduled()) { + DPRINTF(Ethernet, "reschedule transmit\n"); + schedule(txEvent, curTick() + retryTime); + } +} + +bool +NSGigE::doTxDmaRead() +{ + assert(txDmaState == dmaIdle || txDmaState == dmaReadWaiting); + txDmaState = dmaReading; + + if (dmaPending() || drainState() != DrainState::Running) + txDmaState = dmaReadWaiting; + else + dmaRead(txDmaAddr, txDmaLen, &txDmaReadEvent, (uint8_t*)txDmaData); + + return true; +} + +void +NSGigE::txDmaReadDone() +{ + assert(txDmaState == dmaReading); + txDmaState = dmaIdle; + + DPRINTF(EthernetDMA, "tx dma read paddr=%#x len=%d\n", + txDmaAddr, txDmaLen); + DDUMP(EthernetDMA, txDmaData, txDmaLen); + + // If the receive state machine has a pending DMA, let it go first + if (rxDmaState == dmaReadWaiting || rxDmaState == dmaWriteWaiting) + rxKick(); + + txKick(); +} + +bool +NSGigE::doTxDmaWrite() +{ + assert(txDmaState == dmaIdle || txDmaState == dmaWriteWaiting); + txDmaState = dmaWriting; + + if (dmaPending() || drainState() != DrainState::Running) + txDmaState = dmaWriteWaiting; + else + dmaWrite(txDmaAddr, txDmaLen, &txDmaWriteEvent, (uint8_t*)txDmaData); + return true; +} + +void +NSGigE::txDmaWriteDone() +{ + assert(txDmaState == dmaWriting); + txDmaState = dmaIdle; + + DPRINTF(EthernetDMA, "tx dma write paddr=%#x len=%d\n", + txDmaAddr, txDmaLen); + DDUMP(EthernetDMA, txDmaData, txDmaLen); + + // If the receive state machine has a pending DMA, let it go first + if (rxDmaState == dmaReadWaiting || rxDmaState == dmaWriteWaiting) + rxKick(); + + txKick(); +} + +void +NSGigE::txKick() +{ + bool is64bit = (bool)(regs.config & CFGR_M64ADDR); + + DPRINTF(EthernetSM, "transmit kick txState=%s %d-bit\n", + NsTxStateStrings[txState], is64bit ? 64 : 32); + + Addr link, bufptr; + uint32_t &cmdsts = is64bit ? txDesc64.cmdsts : txDesc32.cmdsts; + uint32_t &extsts = is64bit ? txDesc64.extsts : txDesc32.extsts; + + next: + if (txKickTick > curTick()) { + DPRINTF(EthernetSM, "transmit kick exiting, can't run till %d\n", + txKickTick); + goto exit; + } + + // Go to the next state machine clock tick. + txKickTick = clockEdge(Cycles(1)); + + switch(txDmaState) { + case dmaReadWaiting: + if (doTxDmaRead()) + goto exit; + break; + case dmaWriteWaiting: + if (doTxDmaWrite()) + goto exit; + break; + default: + break; + } + + link = is64bit ? (Addr)txDesc64.link : (Addr)txDesc32.link; + bufptr = is64bit ? (Addr)txDesc64.bufptr : (Addr)txDesc32.bufptr; + switch (txState) { + case txIdle: + if (!txEnable) { + DPRINTF(EthernetSM, "Transmit disabled. Nothing to do.\n"); + goto exit; + } + + if (CTDD) { + txState = txDescRefr; + + txDmaAddr = regs.txdp & 0x3fffffff; + txDmaData = + is64bit ? (void *)&txDesc64.link : (void *)&txDesc32.link; + txDmaLen = is64bit ? sizeof(txDesc64.link) : sizeof(txDesc32.link); + txDmaFree = dmaDescFree; + + descDmaReads++; + descDmaRdBytes += txDmaLen; + + if (doTxDmaRead()) + goto exit; + + } else { + txState = txDescRead; + + txDmaAddr = regs.txdp & 0x3fffffff; + txDmaData = is64bit ? (void *)&txDesc64 : (void *)&txDesc32; + txDmaLen = is64bit ? sizeof(txDesc64) : sizeof(txDesc32); + txDmaFree = dmaDescFree; + + descDmaReads++; + descDmaRdBytes += txDmaLen; + + if (doTxDmaRead()) + goto exit; + } + break; + + case txDescRefr: + if (txDmaState != dmaIdle) + goto exit; + + txState = txAdvance; + break; + + case txDescRead: + if (txDmaState != dmaIdle) + goto exit; + + DPRINTF(EthernetDesc, "txDesc: addr=%08x read descriptor\n", + regs.txdp & 0x3fffffff); + DPRINTF(EthernetDesc, + "txDesc: link=%#x bufptr=%#x cmdsts=%#08x extsts=%#08x\n", + link, bufptr, cmdsts, extsts); + + if (cmdsts & CMDSTS_OWN) { + txState = txFifoBlock; + txFragPtr = bufptr; + txDescCnt = cmdsts & CMDSTS_LEN_MASK; + } else { + devIntrPost(ISR_TXIDLE); + txState = txIdle; + goto exit; + } + break; + + case txFifoBlock: + if (!txPacket) { + DPRINTF(EthernetSM, "****starting the tx of a new packet****\n"); + txPacket = make_shared(16384); + txPacketBufPtr = txPacket->data; + } + + if (txDescCnt == 0) { + DPRINTF(EthernetSM, "the txDescCnt == 0, done with descriptor\n"); + if (cmdsts & CMDSTS_MORE) { + DPRINTF(EthernetSM, "there are more descriptors to come\n"); + txState = txDescWrite; + + cmdsts &= ~CMDSTS_OWN; + + txDmaAddr = regs.txdp & 0x3fffffff; + txDmaData = &cmdsts; + if (is64bit) { + txDmaAddr += offsetof(ns_desc64, cmdsts); + txDmaLen = sizeof(txDesc64.cmdsts); + } else { + txDmaAddr += offsetof(ns_desc32, cmdsts); + txDmaLen = sizeof(txDesc32.cmdsts); + } + txDmaFree = dmaDescFree; + + if (doTxDmaWrite()) + goto exit; + + } else { /* this packet is totally done */ + DPRINTF(EthernetSM, "This packet is done, let's wrap it up\n"); + /* deal with the the packet that just finished */ + if ((regs.vtcr & VTCR_PPCHK) && extstsEnable) { + IpPtr ip(txPacket); + if (extsts & EXTSTS_UDPPKT) { + UdpPtr udp(ip); + if (udp) { + udp->sum(0); + udp->sum(cksum(udp)); + txUdpChecksums++; + } else { + Debug::breakpoint(); + warn_once("UDPPKT set, but not UDP!\n"); + } + } else if (extsts & EXTSTS_TCPPKT) { + TcpPtr tcp(ip); + if (tcp) { + tcp->sum(0); + tcp->sum(cksum(tcp)); + txTcpChecksums++; + } else { + Debug::breakpoint(); + warn_once("TCPPKT set, but not UDP!\n"); + } + } + if (extsts & EXTSTS_IPPKT) { + if (ip) { + ip->sum(0); + ip->sum(cksum(ip)); + txIpChecksums++; + } else { + Debug::breakpoint(); + warn_once("IPPKT set, but not UDP!\n"); + } + } + } + + txPacket->length = txPacketBufPtr - txPacket->data; + // this is just because the receive can't handle a + // packet bigger want to make sure + if (txPacket->length > 1514) + panic("transmit packet too large, %s > 1514\n", + txPacket->length); + +#ifndef NDEBUG + bool success = +#endif + txFifo.push(txPacket); + assert(success); + + /* + * this following section is not tqo spec, but + * functionally shouldn't be any different. normally, + * the chip will wait til the transmit has occurred + * before writing back the descriptor because it has + * to wait to see that it was successfully transmitted + * to decide whether to set CMDSTS_OK or not. + * however, in the simulator since it is always + * successfully transmitted, and writing it exactly to + * spec would complicate the code, we just do it here + */ + + cmdsts &= ~CMDSTS_OWN; + cmdsts |= CMDSTS_OK; + + DPRINTF(EthernetDesc, + "txDesc writeback: cmdsts=%08x extsts=%08x\n", + cmdsts, extsts); + + txDmaFree = dmaDescFree; + txDmaAddr = regs.txdp & 0x3fffffff; + txDmaData = &cmdsts; + if (is64bit) { + txDmaAddr += offsetof(ns_desc64, cmdsts); + txDmaLen = + sizeof(txDesc64.cmdsts) + sizeof(txDesc64.extsts); + } else { + txDmaAddr += offsetof(ns_desc32, cmdsts); + txDmaLen = + sizeof(txDesc32.cmdsts) + sizeof(txDesc32.extsts); + } + + descDmaWrites++; + descDmaWrBytes += txDmaLen; + + transmit(); + txPacket = 0; + + if (!txEnable) { + DPRINTF(EthernetSM, "halting TX state machine\n"); + txState = txIdle; + goto exit; + } else + txState = txAdvance; + + if (doTxDmaWrite()) + goto exit; + } + } else { + DPRINTF(EthernetSM, "this descriptor isn't done yet\n"); + if (!txFifo.full()) { + txState = txFragRead; + + /* + * The number of bytes transferred is either whatever + * is left in the descriptor (txDescCnt), or if there + * is not enough room in the fifo, just whatever room + * is left in the fifo + */ + txXferLen = min(txDescCnt, txFifo.avail()); + + txDmaAddr = txFragPtr & 0x3fffffff; + txDmaData = txPacketBufPtr; + txDmaLen = txXferLen; + txDmaFree = dmaDataFree; + + if (doTxDmaRead()) + goto exit; + } else { + txState = txFifoBlock; + transmit(); + + goto exit; + } + + } + break; + + case txFragRead: + if (txDmaState != dmaIdle) + goto exit; + + txPacketBufPtr += txXferLen; + txFragPtr += txXferLen; + txDescCnt -= txXferLen; + txFifo.reserve(txXferLen); + + txState = txFifoBlock; + break; + + case txDescWrite: + if (txDmaState != dmaIdle) + goto exit; + + if (cmdsts & CMDSTS_INTR) + devIntrPost(ISR_TXDESC); + + if (!txEnable) { + DPRINTF(EthernetSM, "halting TX state machine\n"); + txState = txIdle; + goto exit; + } else + txState = txAdvance; + break; + + case txAdvance: + if (link == 0) { + devIntrPost(ISR_TXIDLE); + txState = txIdle; + goto exit; + } else { + if (txDmaState != dmaIdle) + goto exit; + txState = txDescRead; + regs.txdp = link; + CTDD = false; + + txDmaAddr = link & 0x3fffffff; + txDmaData = is64bit ? (void *)&txDesc64 : (void *)&txDesc32; + txDmaLen = is64bit ? sizeof(txDesc64) : sizeof(txDesc32); + txDmaFree = dmaDescFree; + + if (doTxDmaRead()) + goto exit; + } + break; + + default: + panic("invalid state"); + } + + DPRINTF(EthernetSM, "entering next txState=%s\n", + NsTxStateStrings[txState]); + goto next; + + exit: + /** + * @todo do we want to schedule a future kick? + */ + DPRINTF(EthernetSM, "tx state machine exited txState=%s\n", + NsTxStateStrings[txState]); + + if (!txKickEvent.scheduled()) + schedule(txKickEvent, txKickTick); +} + +/** + * Advance the EEPROM state machine + * Called on rising edge of EEPROM clock bit in MEAR + */ +void +NSGigE::eepromKick() +{ + switch (eepromState) { + + case eepromStart: + + // Wait for start bit + if (regs.mear & MEAR_EEDI) { + // Set up to get 2 opcode bits + eepromState = eepromGetOpcode; + eepromBitsToRx = 2; + eepromOpcode = 0; + } + break; + + case eepromGetOpcode: + eepromOpcode <<= 1; + eepromOpcode += (regs.mear & MEAR_EEDI) ? 1 : 0; + --eepromBitsToRx; + + // Done getting opcode + if (eepromBitsToRx == 0) { + if (eepromOpcode != EEPROM_READ) + panic("only EEPROM reads are implemented!"); + + // Set up to get address + eepromState = eepromGetAddress; + eepromBitsToRx = 6; + eepromAddress = 0; + } + break; + + case eepromGetAddress: + eepromAddress <<= 1; + eepromAddress += (regs.mear & MEAR_EEDI) ? 1 : 0; + --eepromBitsToRx; + + // Done getting address + if (eepromBitsToRx == 0) { + + if (eepromAddress >= EEPROM_SIZE) + panic("EEPROM read access out of range!"); + + switch (eepromAddress) { + + case EEPROM_PMATCH2_ADDR: + eepromData = rom.perfectMatch[5]; + eepromData <<= 8; + eepromData += rom.perfectMatch[4]; + break; + + case EEPROM_PMATCH1_ADDR: + eepromData = rom.perfectMatch[3]; + eepromData <<= 8; + eepromData += rom.perfectMatch[2]; + break; + + case EEPROM_PMATCH0_ADDR: + eepromData = rom.perfectMatch[1]; + eepromData <<= 8; + eepromData += rom.perfectMatch[0]; + break; + + default: + panic("FreeBSD driver only uses EEPROM to read PMATCH!"); + } + // Set up to read data + eepromState = eepromRead; + eepromBitsToRx = 16; + + // Clear data in bit + regs.mear &= ~MEAR_EEDI; + } + break; + + case eepromRead: + // Clear Data Out bit + regs.mear &= ~MEAR_EEDO; + // Set bit to value of current EEPROM bit + regs.mear |= (eepromData & 0x8000) ? MEAR_EEDO : 0x0; + + eepromData <<= 1; + --eepromBitsToRx; + + // All done + if (eepromBitsToRx == 0) { + eepromState = eepromStart; + } + break; + + default: + panic("invalid EEPROM state"); + } + +} + +void +NSGigE::transferDone() +{ + if (txFifo.empty()) { + DPRINTF(Ethernet, "transfer complete: txFifo empty...nothing to do\n"); + return; + } + + DPRINTF(Ethernet, "transfer complete: data in txFifo...schedule xmit\n"); + + reschedule(txEvent, clockEdge(Cycles(1)), true); +} + +bool +NSGigE::rxFilter(const EthPacketPtr &packet) +{ + EthPtr eth = packet; + bool drop = true; + string type; + + const EthAddr &dst = eth->dst(); + if (dst.unicast()) { + // If we're accepting all unicast addresses + if (acceptUnicast) + drop = false; + + // If we make a perfect match + if (acceptPerfect && dst == rom.perfectMatch) + drop = false; + + if (acceptArp && eth->type() == ETH_TYPE_ARP) + drop = false; + + } else if (dst.broadcast()) { + // if we're accepting broadcasts + if (acceptBroadcast) + drop = false; + + } else if (dst.multicast()) { + // if we're accepting all multicasts + if (acceptMulticast) + drop = false; + + // Multicast hashing faked - all packets accepted + if (multicastHashEnable) + drop = false; + } + + if (drop) { + DPRINTF(Ethernet, "rxFilter drop\n"); + DDUMP(EthernetData, packet->data, packet->length); + } + + return drop; +} + +bool +NSGigE::recvPacket(EthPacketPtr packet) +{ + rxBytes += packet->length; + rxPackets++; + + DPRINTF(Ethernet, "Receiving packet from wire, rxFifoAvail=%d\n", + rxFifo.avail()); + + if (!rxEnable) { + DPRINTF(Ethernet, "receive disabled...packet dropped\n"); + return true; + } + + if (!rxFilterEnable) { + DPRINTF(Ethernet, + "receive packet filtering disabled . . . packet dropped\n"); + return true; + } + + if (rxFilter(packet)) { + DPRINTF(Ethernet, "packet filtered...dropped\n"); + return true; + } + + if (rxFifo.avail() < packet->length) { +#if TRACING_ON + IpPtr ip(packet); + TcpPtr tcp(ip); + if (ip) { + DPRINTF(Ethernet, + "packet won't fit in receive buffer...pkt ID %d dropped\n", + ip->id()); + if (tcp) { + DPRINTF(Ethernet, "Seq=%d\n", tcp->seq()); + } + } +#endif + droppedPackets++; + devIntrPost(ISR_RXORN); + return false; + } + + rxFifo.push(packet); + + rxKick(); + return true; +} + + +void +NSGigE::drainResume() +{ + Drainable::drainResume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + + +//===================================================================== +// +// +void +NSGigE::serialize(CheckpointOut &cp) const +{ + // Serialize the PciDevice base class + PciDevice::serialize(cp); + + /* + * Finalize any DMA events now. + */ + // @todo will mem system save pending dma? + + /* + * Serialize the device registers + */ + SERIALIZE_SCALAR(regs.command); + SERIALIZE_SCALAR(regs.config); + SERIALIZE_SCALAR(regs.mear); + SERIALIZE_SCALAR(regs.ptscr); + SERIALIZE_SCALAR(regs.isr); + SERIALIZE_SCALAR(regs.imr); + SERIALIZE_SCALAR(regs.ier); + SERIALIZE_SCALAR(regs.ihr); + SERIALIZE_SCALAR(regs.txdp); + SERIALIZE_SCALAR(regs.txdp_hi); + SERIALIZE_SCALAR(regs.txcfg); + SERIALIZE_SCALAR(regs.gpior); + SERIALIZE_SCALAR(regs.rxdp); + SERIALIZE_SCALAR(regs.rxdp_hi); + SERIALIZE_SCALAR(regs.rxcfg); + SERIALIZE_SCALAR(regs.pqcr); + SERIALIZE_SCALAR(regs.wcsr); + SERIALIZE_SCALAR(regs.pcr); + SERIALIZE_SCALAR(regs.rfcr); + SERIALIZE_SCALAR(regs.rfdr); + SERIALIZE_SCALAR(regs.brar); + SERIALIZE_SCALAR(regs.brdr); + SERIALIZE_SCALAR(regs.srr); + SERIALIZE_SCALAR(regs.mibc); + SERIALIZE_SCALAR(regs.vrcr); + SERIALIZE_SCALAR(regs.vtcr); + SERIALIZE_SCALAR(regs.vdr); + SERIALIZE_SCALAR(regs.ccsr); + SERIALIZE_SCALAR(regs.tbicr); + SERIALIZE_SCALAR(regs.tbisr); + SERIALIZE_SCALAR(regs.tanar); + SERIALIZE_SCALAR(regs.tanlpar); + SERIALIZE_SCALAR(regs.taner); + SERIALIZE_SCALAR(regs.tesr); + + SERIALIZE_ARRAY(rom.perfectMatch, ETH_ADDR_LEN); + SERIALIZE_ARRAY(rom.filterHash, FHASH_SIZE); + + SERIALIZE_SCALAR(ioEnable); + + /* + * Serialize the data Fifos + */ + rxFifo.serialize("rxFifo", cp); + txFifo.serialize("txFifo", cp); + + /* + * Serialize the various helper variables + */ + bool txPacketExists = txPacket != nullptr; + SERIALIZE_SCALAR(txPacketExists); + if (txPacketExists) { + txPacket->length = txPacketBufPtr - txPacket->data; + txPacket->serialize("txPacket", cp); + uint32_t txPktBufPtr = (uint32_t) (txPacketBufPtr - txPacket->data); + SERIALIZE_SCALAR(txPktBufPtr); + } + + bool rxPacketExists = rxPacket != nullptr; + SERIALIZE_SCALAR(rxPacketExists); + if (rxPacketExists) { + rxPacket->serialize("rxPacket", cp); + uint32_t rxPktBufPtr = (uint32_t) (rxPacketBufPtr - rxPacket->data); + SERIALIZE_SCALAR(rxPktBufPtr); + } + + SERIALIZE_SCALAR(txXferLen); + SERIALIZE_SCALAR(rxXferLen); + + /* + * Serialize Cached Descriptors + */ + SERIALIZE_SCALAR(rxDesc64.link); + SERIALIZE_SCALAR(rxDesc64.bufptr); + SERIALIZE_SCALAR(rxDesc64.cmdsts); + SERIALIZE_SCALAR(rxDesc64.extsts); + SERIALIZE_SCALAR(txDesc64.link); + SERIALIZE_SCALAR(txDesc64.bufptr); + SERIALIZE_SCALAR(txDesc64.cmdsts); + SERIALIZE_SCALAR(txDesc64.extsts); + SERIALIZE_SCALAR(rxDesc32.link); + SERIALIZE_SCALAR(rxDesc32.bufptr); + SERIALIZE_SCALAR(rxDesc32.cmdsts); + SERIALIZE_SCALAR(rxDesc32.extsts); + SERIALIZE_SCALAR(txDesc32.link); + SERIALIZE_SCALAR(txDesc32.bufptr); + SERIALIZE_SCALAR(txDesc32.cmdsts); + SERIALIZE_SCALAR(txDesc32.extsts); + SERIALIZE_SCALAR(extstsEnable); + + /* + * Serialize tx state machine + */ + int txState = this->txState; + SERIALIZE_SCALAR(txState); + SERIALIZE_SCALAR(txEnable); + SERIALIZE_SCALAR(CTDD); + SERIALIZE_SCALAR(txFragPtr); + SERIALIZE_SCALAR(txDescCnt); + int txDmaState = this->txDmaState; + SERIALIZE_SCALAR(txDmaState); + SERIALIZE_SCALAR(txKickTick); + + /* + * Serialize rx state machine + */ + int rxState = this->rxState; + SERIALIZE_SCALAR(rxState); + SERIALIZE_SCALAR(rxEnable); + SERIALIZE_SCALAR(CRDD); + SERIALIZE_SCALAR(rxPktBytes); + SERIALIZE_SCALAR(rxFragPtr); + SERIALIZE_SCALAR(rxDescCnt); + int rxDmaState = this->rxDmaState; + SERIALIZE_SCALAR(rxDmaState); + SERIALIZE_SCALAR(rxKickTick); + + /* + * Serialize EEPROM state machine + */ + int eepromState = this->eepromState; + SERIALIZE_SCALAR(eepromState); + SERIALIZE_SCALAR(eepromClk); + SERIALIZE_SCALAR(eepromBitsToRx); + SERIALIZE_SCALAR(eepromOpcode); + SERIALIZE_SCALAR(eepromAddress); + SERIALIZE_SCALAR(eepromData); + + /* + * If there's a pending transmit, store the time so we can + * reschedule it later + */ + Tick transmitTick = txEvent.scheduled() ? txEvent.when() - curTick() : 0; + SERIALIZE_SCALAR(transmitTick); + + /* + * receive address filter settings + */ + SERIALIZE_SCALAR(rxFilterEnable); + SERIALIZE_SCALAR(acceptBroadcast); + SERIALIZE_SCALAR(acceptMulticast); + SERIALIZE_SCALAR(acceptUnicast); + SERIALIZE_SCALAR(acceptPerfect); + SERIALIZE_SCALAR(acceptArp); + SERIALIZE_SCALAR(multicastHashEnable); + + /* + * Keep track of pending interrupt status. + */ + SERIALIZE_SCALAR(intrTick); + SERIALIZE_SCALAR(cpuPendingIntr); + Tick intrEventTick = 0; + if (intrEvent) + intrEventTick = intrEvent->when(); + SERIALIZE_SCALAR(intrEventTick); + +} + +void +NSGigE::unserialize(CheckpointIn &cp) +{ + // Unserialize the PciDevice base class + PciDevice::unserialize(cp); + + UNSERIALIZE_SCALAR(regs.command); + UNSERIALIZE_SCALAR(regs.config); + UNSERIALIZE_SCALAR(regs.mear); + UNSERIALIZE_SCALAR(regs.ptscr); + UNSERIALIZE_SCALAR(regs.isr); + UNSERIALIZE_SCALAR(regs.imr); + UNSERIALIZE_SCALAR(regs.ier); + UNSERIALIZE_SCALAR(regs.ihr); + UNSERIALIZE_SCALAR(regs.txdp); + UNSERIALIZE_SCALAR(regs.txdp_hi); + UNSERIALIZE_SCALAR(regs.txcfg); + UNSERIALIZE_SCALAR(regs.gpior); + UNSERIALIZE_SCALAR(regs.rxdp); + UNSERIALIZE_SCALAR(regs.rxdp_hi); + UNSERIALIZE_SCALAR(regs.rxcfg); + UNSERIALIZE_SCALAR(regs.pqcr); + UNSERIALIZE_SCALAR(regs.wcsr); + UNSERIALIZE_SCALAR(regs.pcr); + UNSERIALIZE_SCALAR(regs.rfcr); + UNSERIALIZE_SCALAR(regs.rfdr); + UNSERIALIZE_SCALAR(regs.brar); + UNSERIALIZE_SCALAR(regs.brdr); + UNSERIALIZE_SCALAR(regs.srr); + UNSERIALIZE_SCALAR(regs.mibc); + UNSERIALIZE_SCALAR(regs.vrcr); + UNSERIALIZE_SCALAR(regs.vtcr); + UNSERIALIZE_SCALAR(regs.vdr); + UNSERIALIZE_SCALAR(regs.ccsr); + UNSERIALIZE_SCALAR(regs.tbicr); + UNSERIALIZE_SCALAR(regs.tbisr); + UNSERIALIZE_SCALAR(regs.tanar); + UNSERIALIZE_SCALAR(regs.tanlpar); + UNSERIALIZE_SCALAR(regs.taner); + UNSERIALIZE_SCALAR(regs.tesr); + + UNSERIALIZE_ARRAY(rom.perfectMatch, ETH_ADDR_LEN); + UNSERIALIZE_ARRAY(rom.filterHash, FHASH_SIZE); + + UNSERIALIZE_SCALAR(ioEnable); + + /* + * unserialize the data fifos + */ + rxFifo.unserialize("rxFifo", cp); + txFifo.unserialize("txFifo", cp); + + /* + * unserialize the various helper variables + */ + bool txPacketExists; + UNSERIALIZE_SCALAR(txPacketExists); + if (txPacketExists) { + txPacket = make_shared(16384); + txPacket->unserialize("txPacket", cp); + uint32_t txPktBufPtr; + UNSERIALIZE_SCALAR(txPktBufPtr); + txPacketBufPtr = (uint8_t *) txPacket->data + txPktBufPtr; + } else + txPacket = 0; + + bool rxPacketExists; + UNSERIALIZE_SCALAR(rxPacketExists); + rxPacket = 0; + if (rxPacketExists) { + rxPacket = make_shared(16384); + rxPacket->unserialize("rxPacket", cp); + uint32_t rxPktBufPtr; + UNSERIALIZE_SCALAR(rxPktBufPtr); + rxPacketBufPtr = (uint8_t *) rxPacket->data + rxPktBufPtr; + } else + rxPacket = 0; + + UNSERIALIZE_SCALAR(txXferLen); + UNSERIALIZE_SCALAR(rxXferLen); + + /* + * Unserialize Cached Descriptors + */ + UNSERIALIZE_SCALAR(rxDesc64.link); + UNSERIALIZE_SCALAR(rxDesc64.bufptr); + UNSERIALIZE_SCALAR(rxDesc64.cmdsts); + UNSERIALIZE_SCALAR(rxDesc64.extsts); + UNSERIALIZE_SCALAR(txDesc64.link); + UNSERIALIZE_SCALAR(txDesc64.bufptr); + UNSERIALIZE_SCALAR(txDesc64.cmdsts); + UNSERIALIZE_SCALAR(txDesc64.extsts); + UNSERIALIZE_SCALAR(rxDesc32.link); + UNSERIALIZE_SCALAR(rxDesc32.bufptr); + UNSERIALIZE_SCALAR(rxDesc32.cmdsts); + UNSERIALIZE_SCALAR(rxDesc32.extsts); + UNSERIALIZE_SCALAR(txDesc32.link); + UNSERIALIZE_SCALAR(txDesc32.bufptr); + UNSERIALIZE_SCALAR(txDesc32.cmdsts); + UNSERIALIZE_SCALAR(txDesc32.extsts); + UNSERIALIZE_SCALAR(extstsEnable); + + /* + * unserialize tx state machine + */ + int txState; + UNSERIALIZE_SCALAR(txState); + this->txState = (TxState) txState; + UNSERIALIZE_SCALAR(txEnable); + UNSERIALIZE_SCALAR(CTDD); + UNSERIALIZE_SCALAR(txFragPtr); + UNSERIALIZE_SCALAR(txDescCnt); + int txDmaState; + UNSERIALIZE_SCALAR(txDmaState); + this->txDmaState = (DmaState) txDmaState; + UNSERIALIZE_SCALAR(txKickTick); + if (txKickTick) + schedule(txKickEvent, txKickTick); + + /* + * unserialize rx state machine + */ + int rxState; + UNSERIALIZE_SCALAR(rxState); + this->rxState = (RxState) rxState; + UNSERIALIZE_SCALAR(rxEnable); + UNSERIALIZE_SCALAR(CRDD); + UNSERIALIZE_SCALAR(rxPktBytes); + UNSERIALIZE_SCALAR(rxFragPtr); + UNSERIALIZE_SCALAR(rxDescCnt); + int rxDmaState; + UNSERIALIZE_SCALAR(rxDmaState); + this->rxDmaState = (DmaState) rxDmaState; + UNSERIALIZE_SCALAR(rxKickTick); + if (rxKickTick) + schedule(rxKickEvent, rxKickTick); + + /* + * Unserialize EEPROM state machine + */ + int eepromState; + UNSERIALIZE_SCALAR(eepromState); + this->eepromState = (EEPROMState) eepromState; + UNSERIALIZE_SCALAR(eepromClk); + UNSERIALIZE_SCALAR(eepromBitsToRx); + UNSERIALIZE_SCALAR(eepromOpcode); + UNSERIALIZE_SCALAR(eepromAddress); + UNSERIALIZE_SCALAR(eepromData); + + /* + * If there's a pending transmit, reschedule it now + */ + Tick transmitTick; + UNSERIALIZE_SCALAR(transmitTick); + if (transmitTick) + schedule(txEvent, curTick() + transmitTick); + + /* + * unserialize receive address filter settings + */ + UNSERIALIZE_SCALAR(rxFilterEnable); + UNSERIALIZE_SCALAR(acceptBroadcast); + UNSERIALIZE_SCALAR(acceptMulticast); + UNSERIALIZE_SCALAR(acceptUnicast); + UNSERIALIZE_SCALAR(acceptPerfect); + UNSERIALIZE_SCALAR(acceptArp); + UNSERIALIZE_SCALAR(multicastHashEnable); + + /* + * Keep track of pending interrupt status. + */ + UNSERIALIZE_SCALAR(intrTick); + UNSERIALIZE_SCALAR(cpuPendingIntr); + Tick intrEventTick; + UNSERIALIZE_SCALAR(intrEventTick); + if (intrEventTick) { + intrEvent = new IntrEvent(this, true); + schedule(intrEvent, intrEventTick); + } +} + +NSGigE * +NSGigEParams::create() +{ + return new NSGigE(this); +} diff --git a/src/dev/net/ns_gige.hh b/src/dev/net/ns_gige.hh new file mode 100644 index 000000000..096b2b69e --- /dev/null +++ b/src/dev/net/ns_gige.hh @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + * Lisa Hsu + */ + +/** @file + * Device module for modelling the National Semiconductor + * DP83820 ethernet controller + */ + +#ifndef __DEV_NET_NS_GIGE_HH__ +#define __DEV_NET_NS_GIGE_HH__ + +#include "base/inet.hh" +#include "dev/io_device.hh" +#include "dev/net/etherdevice.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherpkt.hh" +#include "dev/net/ns_gige_reg.h" +#include "dev/net/pktfifo.hh" +#include "params/NSGigE.hh" +#include "sim/eventq.hh" + +// Hash filtering constants +const uint16_t FHASH_ADDR = 0x100; +const uint16_t FHASH_SIZE = 0x100; + +// EEPROM constants +const uint8_t EEPROM_READ = 0x2; +const uint8_t EEPROM_SIZE = 64; // Size in words of NSC93C46 EEPROM +const uint8_t EEPROM_PMATCH2_ADDR = 0xA; // EEPROM Address of PMATCH word 2 +const uint8_t EEPROM_PMATCH1_ADDR = 0xB; // EEPROM Address of PMATCH word 1 +const uint8_t EEPROM_PMATCH0_ADDR = 0xC; // EEPROM Address of PMATCH word 0 + +/** + * Ethernet device registers + */ +struct dp_regs { + uint32_t command; + uint32_t config; + uint32_t mear; + uint32_t ptscr; + uint32_t isr; + uint32_t imr; + uint32_t ier; + uint32_t ihr; + uint32_t txdp; + uint32_t txdp_hi; + uint32_t txcfg; + uint32_t gpior; + uint32_t rxdp; + uint32_t rxdp_hi; + uint32_t rxcfg; + uint32_t pqcr; + uint32_t wcsr; + uint32_t pcr; + uint32_t rfcr; + uint32_t rfdr; + uint32_t brar; + uint32_t brdr; + uint32_t srr; + uint32_t mibc; + uint32_t vrcr; + uint32_t vtcr; + uint32_t vdr; + uint32_t ccsr; + uint32_t tbicr; + uint32_t tbisr; + uint32_t tanar; + uint32_t tanlpar; + uint32_t taner; + uint32_t tesr; +}; + +struct dp_rom { + /** + * for perfect match memory. + * the linux driver doesn't use any other ROM + */ + uint8_t perfectMatch[ETH_ADDR_LEN]; + + /** + * for hash table memory. + * used by the freebsd driver + */ + uint8_t filterHash[FHASH_SIZE]; +}; + +class NSGigEInt; +class Packet; + +/** + * NS DP83820 Ethernet device model + */ +class NSGigE : public EtherDevBase +{ + public: + /** Transmit State Machine states */ + enum TxState + { + txIdle, + txDescRefr, + txDescRead, + txFifoBlock, + txFragRead, + txDescWrite, + txAdvance + }; + + /** Receive State Machine States */ + enum RxState + { + rxIdle, + rxDescRefr, + rxDescRead, + rxFifoBlock, + rxFragWrite, + rxDescWrite, + rxAdvance + }; + + enum DmaState + { + dmaIdle, + dmaReading, + dmaWriting, + dmaReadWaiting, + dmaWriteWaiting + }; + + /** EEPROM State Machine States */ + enum EEPROMState + { + eepromStart, + eepromGetOpcode, + eepromGetAddress, + eepromRead + }; + + protected: + /** device register file */ + dp_regs regs; + dp_rom rom; + + /** pci settings */ + bool ioEnable; +#if 0 + bool memEnable; + bool bmEnable; +#endif + + /*** BASIC STRUCTURES FOR TX/RX ***/ + /* Data FIFOs */ + PacketFifo txFifo; + PacketFifo rxFifo; + + /** various helper vars */ + EthPacketPtr txPacket; + EthPacketPtr rxPacket; + uint8_t *txPacketBufPtr; + uint8_t *rxPacketBufPtr; + uint32_t txXferLen; + uint32_t rxXferLen; + bool rxDmaFree; + bool txDmaFree; + + /** DescCaches */ + ns_desc32 txDesc32; + ns_desc32 rxDesc32; + ns_desc64 txDesc64; + ns_desc64 rxDesc64; + + /* tx State Machine */ + TxState txState; + bool txEnable; + + /** Current Transmit Descriptor Done */ + bool CTDD; + /** halt the tx state machine after next packet */ + bool txHalt; + /** ptr to the next byte in the current fragment */ + Addr txFragPtr; + /** count of bytes remaining in the current descriptor */ + uint32_t txDescCnt; + DmaState txDmaState; + + /** rx State Machine */ + RxState rxState; + bool rxEnable; + + /** Current Receive Descriptor Done */ + bool CRDD; + /** num of bytes in the current packet being drained from rxDataFifo */ + uint32_t rxPktBytes; + /** halt the rx state machine after current packet */ + bool rxHalt; + /** ptr to the next byte in current fragment */ + Addr rxFragPtr; + /** count of bytes remaining in the current descriptor */ + uint32_t rxDescCnt; + DmaState rxDmaState; + + bool extstsEnable; + + /** EEPROM State Machine */ + EEPROMState eepromState; + bool eepromClk; + uint8_t eepromBitsToRx; + uint8_t eepromOpcode; + uint8_t eepromAddress; + uint16_t eepromData; + + protected: + Tick dmaReadDelay; + Tick dmaWriteDelay; + + Tick dmaReadFactor; + Tick dmaWriteFactor; + + void *rxDmaData; + Addr rxDmaAddr; + int rxDmaLen; + bool doRxDmaRead(); + bool doRxDmaWrite(); + + void *txDmaData; + Addr txDmaAddr; + int txDmaLen; + bool doTxDmaRead(); + bool doTxDmaWrite(); + + void rxDmaReadDone(); + friend class EventWrapper; + EventWrapper rxDmaReadEvent; + + void rxDmaWriteDone(); + friend class EventWrapper; + EventWrapper rxDmaWriteEvent; + + void txDmaReadDone(); + friend class EventWrapper; + EventWrapper txDmaReadEvent; + + void txDmaWriteDone(); + friend class EventWrapper; + EventWrapper txDmaWriteEvent; + + bool dmaDescFree; + bool dmaDataFree; + + protected: + Tick txDelay; + Tick rxDelay; + + void txReset(); + void rxReset(); + void regsReset(); + + void rxKick(); + Tick rxKickTick; + typedef EventWrapper RxKickEvent; + friend void RxKickEvent::process(); + RxKickEvent rxKickEvent; + + void txKick(); + Tick txKickTick; + typedef EventWrapper TxKickEvent; + friend void TxKickEvent::process(); + TxKickEvent txKickEvent; + + void eepromKick(); + + /** + * Retransmit event + */ + void transmit(); + void txEventTransmit() + { + transmit(); + if (txState == txFifoBlock) + txKick(); + } + typedef EventWrapper TxEvent; + friend void TxEvent::process(); + TxEvent txEvent; + + void txDump() const; + void rxDump() const; + + /** + * receive address filter + */ + bool rxFilterEnable; + bool rxFilter(const EthPacketPtr &packet); + bool acceptBroadcast; + bool acceptMulticast; + bool acceptUnicast; + bool acceptPerfect; + bool acceptArp; + bool multicastHashEnable; + + /** + * Interrupt management + */ + void devIntrPost(uint32_t interrupts); + void devIntrClear(uint32_t interrupts); + void devIntrChangeMask(); + + Tick intrDelay; + Tick intrTick; + bool cpuPendingIntr; + void cpuIntrPost(Tick when); + void cpuInterrupt(); + void cpuIntrClear(); + + typedef EventWrapper IntrEvent; + friend void IntrEvent::process(); + IntrEvent *intrEvent; + NSGigEInt *interface; + + public: + typedef NSGigEParams Params; + const Params *params() const { + return dynamic_cast(_params); + } + + NSGigE(Params *params); + ~NSGigE(); + + EtherInt *getEthPort(const std::string &if_name, int idx) override; + + Tick writeConfig(PacketPtr pkt) override; + + Tick read(PacketPtr pkt) override; + Tick write(PacketPtr pkt) override; + + bool cpuIntrPending() const; + void cpuIntrAck() { cpuIntrClear(); } + + bool recvPacket(EthPacketPtr packet); + void transferDone(); + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + void drainResume() override; +}; + +/* + * Ethernet Interface for an Ethernet Device + */ +class NSGigEInt : public EtherInt +{ + private: + NSGigE *dev; + + public: + NSGigEInt(const std::string &name, NSGigE *d) + : EtherInt(name), dev(d) + { } + + virtual bool recvPacket(EthPacketPtr pkt) { return dev->recvPacket(pkt); } + virtual void sendDone() { dev->transferDone(); } +}; + +#endif // __DEV_NET_NS_GIGE_HH__ diff --git a/src/dev/net/ns_gige_reg.h b/src/dev/net/ns_gige_reg.h new file mode 100644 index 000000000..c37c06aed --- /dev/null +++ b/src/dev/net/ns_gige_reg.h @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** @file + * Ethernet device register definitions for the National + * Semiconductor DP83820 Ethernet controller + */ + +#ifndef __DEV_NS_GIGE_REG_H__ +#define __DEV_NS_GIGE_REG_H__ + +/* Device Register Address Map */ +enum DeviceRegisterAddress { + CR = 0x00, + CFGR = 0x04, + MEAR = 0x08, + PTSCR = 0x0c, + ISR = 0x10, + IMR = 0x14, + IER = 0x18, + IHR = 0x1c, + TXDP = 0x20, + TXDP_HI = 0x24, + TX_CFG = 0x28, + GPIOR = 0x2c, + RXDP = 0x30, + RXDP_HI = 0x34, + RX_CFG = 0x38, + PQCR = 0x3c, + WCSR = 0x40, + PCR = 0x44, + RFCR = 0x48, + RFDR = 0x4c, + BRAR = 0x50, + BRDR = 0x54, + SRR = 0x58, + MIBC = 0x5c, + MIB_START = 0x60, + MIB_END = 0x88, + VRCR = 0xbc, + VTCR = 0xc0, + VDR = 0xc4, + CCSR = 0xcc, + TBICR = 0xe0, + TBISR = 0xe4, + TANAR = 0xe8, + TANLPAR = 0xec, + TANER = 0xf0, + TESR = 0xf4, + M5REG = 0xf8, + LAST = 0xf8, + RESERVED = 0xfc +}; + +/* Chip Command Register */ +enum ChipCommandRegister { + CR_TXE = 0x00000001, + CR_TXD = 0x00000002, + CR_RXE = 0x00000004, + CR_RXD = 0x00000008, + CR_TXR = 0x00000010, + CR_RXR = 0x00000020, + CR_SWI = 0x00000080, + CR_RST = 0x00000100 +}; + +/* configuration register */ +enum ConfigurationRegisters { + CFGR_LNKSTS = 0x80000000, + CFGR_SPDSTS = 0x60000000, + CFGR_SPDSTS1 = 0x40000000, + CFGR_SPDSTS0 = 0x20000000, + CFGR_DUPSTS = 0x10000000, + CFGR_TBI_EN = 0x01000000, + CFGR_RESERVED = 0x0e000000, + CFGR_MODE_1000 = 0x00400000, + CFGR_AUTO_1000 = 0x00200000, + CFGR_PINT_CTL = 0x001c0000, + CFGR_PINT_DUPSTS = 0x00100000, + CFGR_PINT_LNKSTS = 0x00080000, + CFGR_PINT_SPDSTS = 0x00040000, + CFGR_TMRTEST = 0x00020000, + CFGR_MRM_DIS = 0x00010000, + CFGR_MWI_DIS = 0x00008000, + CFGR_T64ADDR = 0x00004000, + CFGR_PCI64_DET = 0x00002000, + CFGR_DATA64_EN = 0x00001000, + CFGR_M64ADDR = 0x00000800, + CFGR_PHY_RST = 0x00000400, + CFGR_PHY_DIS = 0x00000200, + CFGR_EXTSTS_EN = 0x00000100, + CFGR_REQALG = 0x00000080, + CFGR_SB = 0x00000040, + CFGR_POW = 0x00000020, + CFGR_EXD = 0x00000010, + CFGR_PESEL = 0x00000008, + CFGR_BROM_DIS = 0x00000004, + CFGR_EXT_125 = 0x00000002, + CFGR_BEM = 0x00000001 +}; + +/* EEPROM access register */ +enum EEPROMAccessRegister { + MEAR_EEDI = 0x00000001, + MEAR_EEDO = 0x00000002, + MEAR_EECLK = 0x00000004, + MEAR_EESEL = 0x00000008, + MEAR_MDIO = 0x00000010, + MEAR_MDDIR = 0x00000020, + MEAR_MDC = 0x00000040, +}; + +/* PCI test control register */ +enum PCITestControlRegister { + PTSCR_EEBIST_FAIL = 0x00000001, + PTSCR_EEBIST_EN = 0x00000002, + PTSCR_EELOAD_EN = 0x00000004, + PTSCR_RBIST_FAIL = 0x000001b8, + PTSCR_RBIST_DONE = 0x00000200, + PTSCR_RBIST_EN = 0x00000400, + PTSCR_RBIST_RST = 0x00002000, + PTSCR_RBIST_RDONLY = 0x000003f9 +}; + +/* interrupt status register */ +enum InterruptStatusRegister { + ISR_RESERVE = 0x80000000, + ISR_TXDESC3 = 0x40000000, + ISR_TXDESC2 = 0x20000000, + ISR_TXDESC1 = 0x10000000, + ISR_TXDESC0 = 0x08000000, + ISR_RXDESC3 = 0x04000000, + ISR_RXDESC2 = 0x02000000, + ISR_RXDESC1 = 0x01000000, + ISR_RXDESC0 = 0x00800000, + ISR_TXRCMP = 0x00400000, + ISR_RXRCMP = 0x00200000, + ISR_DPERR = 0x00100000, + ISR_SSERR = 0x00080000, + ISR_RMABT = 0x00040000, + ISR_RTAB = 0x00020000, + ISR_RXSOVR = 0x00010000, + ISR_HIBINT = 0x00008000, + ISR_PHY = 0x00004000, + ISR_PME = 0x00002000, + ISR_SWI = 0x00001000, + ISR_MIB = 0x00000800, + ISR_TXURN = 0x00000400, + ISR_TXIDLE = 0x00000200, + ISR_TXERR = 0x00000100, + ISR_TXDESC = 0x00000080, + ISR_TXOK = 0x00000040, + ISR_RXORN = 0x00000020, + ISR_RXIDLE = 0x00000010, + ISR_RXEARLY = 0x00000008, + ISR_RXERR = 0x00000004, + ISR_RXDESC = 0x00000002, + ISR_RXOK = 0x00000001, + ISR_ALL = 0x7FFFFFFF, + ISR_DELAY = (ISR_TXIDLE|ISR_TXDESC|ISR_TXOK| + ISR_RXIDLE|ISR_RXDESC|ISR_RXOK), + ISR_NODELAY = (ISR_ALL & ~ISR_DELAY), + ISR_IMPL = (ISR_SWI|ISR_TXIDLE|ISR_TXDESC|ISR_TXOK|ISR_RXORN| + ISR_RXIDLE|ISR_RXDESC|ISR_RXOK), + ISR_NOIMPL = (ISR_ALL & ~ISR_IMPL) +}; + +/* transmit configuration register */ +enum TransmitConfigurationRegister { + TX_CFG_CSI = 0x80000000, + TX_CFG_HBI = 0x40000000, + TX_CFG_MLB = 0x20000000, + TX_CFG_ATP = 0x10000000, + TX_CFG_ECRETRY = 0x00800000, + TX_CFG_BRST_DIS = 0x00080000, + TX_CFG_MXDMA1024 = 0x00000000, + TX_CFG_MXDMA512 = 0x00700000, + TX_CFG_MXDMA256 = 0x00600000, + TX_CFG_MXDMA128 = 0x00500000, + TX_CFG_MXDMA64 = 0x00400000, + TX_CFG_MXDMA32 = 0x00300000, + TX_CFG_MXDMA16 = 0x00200000, + TX_CFG_MXDMA8 = 0x00100000, + TX_CFG_MXDMA = 0x00700000, + + TX_CFG_FLTH_MASK = 0x0000ff00, + TX_CFG_DRTH_MASK = 0x000000ff +}; + +/*general purpose I/O control register */ +enum GeneralPurposeIOControlRegister { + GPIOR_UNUSED = 0xffff8000, + GPIOR_GP5_IN = 0x00004000, + GPIOR_GP4_IN = 0x00002000, + GPIOR_GP3_IN = 0x00001000, + GPIOR_GP2_IN = 0x00000800, + GPIOR_GP1_IN = 0x00000400, + GPIOR_GP5_OE = 0x00000200, + GPIOR_GP4_OE = 0x00000100, + GPIOR_GP3_OE = 0x00000080, + GPIOR_GP2_OE = 0x00000040, + GPIOR_GP1_OE = 0x00000020, + GPIOR_GP5_OUT = 0x00000010, + GPIOR_GP4_OUT = 0x00000008, + GPIOR_GP3_OUT = 0x00000004, + GPIOR_GP2_OUT = 0x00000002, + GPIOR_GP1_OUT = 0x00000001 +}; + +/* receive configuration register */ +enum ReceiveConfigurationRegister { + RX_CFG_AEP = 0x80000000, + RX_CFG_ARP = 0x40000000, + RX_CFG_STRIPCRC = 0x20000000, + RX_CFG_RX_FD = 0x10000000, + RX_CFG_ALP = 0x08000000, + RX_CFG_AIRL = 0x04000000, + RX_CFG_MXDMA512 = 0x00700000, + RX_CFG_MXDMA = 0x00700000, + RX_CFG_DRTH = 0x0000003e, + RX_CFG_DRTH0 = 0x00000002 +}; + +/* pause control status register */ +enum PauseControlStatusRegister { + PCR_PSEN = (1 << 31), + PCR_PS_MCAST = (1 << 30), + PCR_PS_DA = (1 << 29), + PCR_STHI_8 = (3 << 23), + PCR_STLO_4 = (1 << 23), + PCR_FFHI_8K = (3 << 21), + PCR_FFLO_4K = (1 << 21), + PCR_PAUSE_CNT = 0xFFFE +}; + +/*receive filter/match control register */ +enum ReceiveFilterMatchControlRegister { + RFCR_RFEN = 0x80000000, + RFCR_AAB = 0x40000000, + RFCR_AAM = 0x20000000, + RFCR_AAU = 0x10000000, + RFCR_APM = 0x08000000, + RFCR_APAT = 0x07800000, + RFCR_APAT3 = 0x04000000, + RFCR_APAT2 = 0x02000000, + RFCR_APAT1 = 0x01000000, + RFCR_APAT0 = 0x00800000, + RFCR_AARP = 0x00400000, + RFCR_MHEN = 0x00200000, + RFCR_UHEN = 0x00100000, + RFCR_ULM = 0x00080000, + RFCR_RFADDR = 0x000003ff +}; + +/* receive filter/match data register */ +enum ReceiveFilterMatchDataRegister { + RFDR_BMASK = 0x00030000, + RFDR_RFDATA0 = 0x000000ff, + RFDR_RFDATA1 = 0x0000ff00 +}; + +/* management information base control register */ +enum ManagementInformationBaseControlRegister { + MIBC_MIBS = 0x00000008, + MIBC_ACLR = 0x00000004, + MIBC_FRZ = 0x00000002, + MIBC_WRN = 0x00000001 +}; + +/* VLAN/IP receive control register */ +enum VLANIPReceiveControlRegister { + VRCR_RUDPE = 0x00000080, + VRCR_RTCPE = 0x00000040, + VRCR_RIPE = 0x00000020, + VRCR_IPEN = 0x00000010, + VRCR_DUTF = 0x00000008, + VRCR_DVTF = 0x00000004, + VRCR_VTREN = 0x00000002, + VRCR_VTDEN = 0x00000001 +}; + +/* VLAN/IP transmit control register */ +enum VLANIPTransmitControlRegister { + VTCR_PPCHK = 0x00000008, + VTCR_GCHK = 0x00000004, + VTCR_VPPTI = 0x00000002, + VTCR_VGTI = 0x00000001 +}; + +/* Clockrun Control/Status Register */ +enum ClockrunControlStatusRegister { + CCSR_CLKRUN_EN = 0x00000001 +}; + +/* TBI control register */ +enum TBIControlRegister { + TBICR_MR_LOOPBACK = 0x00004000, + TBICR_MR_AN_ENABLE = 0x00001000, + TBICR_MR_RESTART_AN = 0x00000200 +}; + +/* TBI status register */ +enum TBIStatusRegister { + TBISR_MR_LINK_STATUS = 0x00000020, + TBISR_MR_AN_COMPLETE = 0x00000004 +}; + +/* TBI auto-negotiation advertisement register */ +enum TBIAutoNegotiationAdvertisementRegister { + TANAR_NP = 0x00008000, + TANAR_RF2 = 0x00002000, + TANAR_RF1 = 0x00001000, + TANAR_PS2 = 0x00000100, + TANAR_PS1 = 0x00000080, + TANAR_HALF_DUP = 0x00000040, + TANAR_FULL_DUP = 0x00000020, + TANAR_UNUSED = 0x00000E1F +}; + +/* M5 control register */ +enum M5ControlRegister { + M5REG_RESERVED = 0xfffffffc, + M5REG_RSS = 0x00000004, + M5REG_RX_THREAD = 0x00000002, + M5REG_TX_THREAD = 0x00000001 +}; + +struct ns_desc32 { + uint32_t link; /* link field to next descriptor in linked list */ + uint32_t bufptr; /* pointer to the first fragment or buffer */ + uint32_t cmdsts; /* command/status field */ + uint32_t extsts; /* extended status field for VLAN and IP info */ +}; + +struct ns_desc64 { + uint64_t link; /* link field to next descriptor in linked list */ + uint64_t bufptr; /* pointer to the first fragment or buffer */ + uint32_t cmdsts; /* command/status field */ + uint32_t extsts; /* extended status field for VLAN and IP info */ +}; + +/* cmdsts flags for descriptors */ +enum CMDSTSFlatsForDescriptors { + CMDSTS_OWN = 0x80000000, + CMDSTS_MORE = 0x40000000, + CMDSTS_INTR = 0x20000000, + CMDSTS_ERR = 0x10000000, + CMDSTS_OK = 0x08000000, + CMDSTS_LEN_MASK = 0x0000ffff, + + CMDSTS_DEST_MASK = 0x01800000, + CMDSTS_DEST_SELF = 0x00800000, + CMDSTS_DEST_MULTI = 0x01000000 +}; + +/* extended flags for descriptors */ +enum ExtendedFlagsForDescriptors { + EXTSTS_UDPERR = 0x00400000, + EXTSTS_UDPPKT = 0x00200000, + EXTSTS_TCPERR = 0x00100000, + EXTSTS_TCPPKT = 0x00080000, + EXTSTS_IPERR = 0x00040000, + EXTSTS_IPPKT = 0x00020000 +}; + +/* speed status */ +static inline int +SPDSTS_POLARITY(int lnksts) +{ + return (CFGR_SPDSTS1 | CFGR_SPDSTS0 | CFGR_DUPSTS | + (lnksts ? CFGR_LNKSTS : 0)); +} + +#endif /* __DEV_NS_GIGE_REG_H__ */ diff --git a/src/dev/net/pktfifo.cc b/src/dev/net/pktfifo.cc new file mode 100644 index 000000000..af4dbf412 --- /dev/null +++ b/src/dev/net/pktfifo.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#include "dev/net/pktfifo.hh" + +#include "base/misc.hh" + +using namespace std; + +bool +PacketFifo::copyout(void *dest, unsigned offset, unsigned len) +{ + char *data = (char *)dest; + if (offset + len >= size()) + return false; + + iterator i = fifo.begin(); + iterator end = fifo.end(); + while (len > 0) { + EthPacketPtr &pkt = i->packet; + while (offset >= pkt->length) { + offset -= pkt->length; + ++i; + } + + if (i == end) + panic("invalid fifo"); + + unsigned size = min(pkt->length - offset, len); + memcpy(data, pkt->data, size); + offset = 0; + len -= size; + data += size; + ++i; + } + + return true; +} + + +void +PacketFifoEntry::serialize(const string &base, CheckpointOut &cp) const +{ + packet->serialize(base + ".packet", cp); + paramOut(cp, base + ".slack", slack); + paramOut(cp, base + ".number", number); + paramOut(cp, base + ".priv", priv); +} + +void +PacketFifoEntry::unserialize(const string &base, CheckpointIn &cp) +{ + packet = make_shared(16384); + packet->unserialize(base + ".packet", cp); + paramIn(cp, base + ".slack", slack); + paramIn(cp, base + ".number", number); + paramIn(cp, base + ".priv", priv); +} + +void +PacketFifo::serialize(const string &base, CheckpointOut &cp) const +{ + paramOut(cp, base + ".size", _size); + paramOut(cp, base + ".maxsize", _maxsize); + paramOut(cp, base + ".reserved", _reserved); + paramOut(cp, base + ".packets", fifo.size()); + + int i = 0; + for (const auto &entry : fifo) + entry.serialize(csprintf("%s.entry%d", base, i++), cp); +} + +void +PacketFifo::unserialize(const string &base, CheckpointIn &cp) +{ + paramIn(cp, base + ".size", _size); +// paramIn(cp, base + ".maxsize", _maxsize); + paramIn(cp, base + ".reserved", _reserved); + int fifosize; + paramIn(cp, base + ".packets", fifosize); + + fifo.clear(); + + for (int i = 0; i < fifosize; ++i) { + PacketFifoEntry entry; + entry.unserialize(csprintf("%s.entry%d", base, i), cp); + fifo.push_back(entry); + } +} diff --git a/src/dev/net/pktfifo.hh b/src/dev/net/pktfifo.hh new file mode 100644 index 000000000..5ef75423c --- /dev/null +++ b/src/dev/net/pktfifo.hh @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#ifndef __DEV_NET_PKTFIFO_HH__ +#define __DEV_NET_PKTFIFO_HH__ + +#include +#include +#include + +#include "base/misc.hh" +#include "dev/net/etherpkt.hh" +#include "sim/serialize.hh" + +class Checkpoint; + +struct PacketFifoEntry +{ + EthPacketPtr packet; + uint64_t number; + unsigned slack; + int priv; + + PacketFifoEntry() + { + clear(); + } + + PacketFifoEntry(const PacketFifoEntry &s) + : packet(s.packet), number(s.number), slack(s.slack), priv(s.priv) + { + } + + PacketFifoEntry(EthPacketPtr p, uint64_t n) + : packet(p), number(n), slack(0), priv(-1) + { + } + + void clear() + { + packet = NULL; + number = 0; + slack = 0; + priv = -1; + } + + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); +}; + +class PacketFifo +{ + public: + + typedef std::list fifo_list; + typedef fifo_list::iterator iterator; + typedef fifo_list::const_iterator const_iterator; + + protected: + std::list fifo; + uint64_t _counter; + unsigned _maxsize; + unsigned _size; + unsigned _reserved; + + public: + explicit PacketFifo(int max) + : _counter(0), _maxsize(max), _size(0), _reserved(0) {} + virtual ~PacketFifo() {} + + unsigned packets() const { return fifo.size(); } + unsigned maxsize() const { return _maxsize; } + unsigned size() const { return _size; } + unsigned reserved() const { return _reserved; } + unsigned avail() const { return _maxsize - _size - _reserved; } + bool empty() const { return size() <= 0; } + bool full() const { return avail() <= 0; } + + unsigned + reserve(unsigned len = 0) + { + _reserved += len; + assert(avail() >= 0); + return _reserved; + } + + iterator begin() { return fifo.begin(); } + iterator end() { return fifo.end(); } + + const_iterator begin() const { return fifo.begin(); } + const_iterator end() const { return fifo.end(); } + + EthPacketPtr front() { return fifo.begin()->packet; } + + bool push(EthPacketPtr ptr) + { + assert(ptr->length); + assert(_reserved <= ptr->length); + if (avail() < ptr->length - _reserved) + return false; + + _size += ptr->length; + + PacketFifoEntry entry; + entry.packet = ptr; + entry.number = _counter++; + fifo.push_back(entry); + _reserved = 0; + return true; + } + + void pop() + { + if (empty()) + return; + + iterator entry = fifo.begin(); + _size -= entry->packet->length; + _size -= entry->slack; + entry->packet = NULL; + fifo.pop_front(); + } + + void clear() + { + for (iterator i = begin(); i != end(); ++i) + i->clear(); + fifo.clear(); + _size = 0; + _reserved = 0; + } + + void remove(iterator i) + { + if (i != fifo.begin()) { + iterator prev = i; + --prev; + assert(prev != fifo.end()); + prev->slack += i->packet->length; + prev->slack += i->slack; + } else { + _size -= i->packet->length; + _size -= i->slack; + } + + i->clear(); + fifo.erase(i); + } + + bool copyout(void *dest, unsigned offset, unsigned len); + + int countPacketsBefore(const_iterator i) const + { + if (i == fifo.end()) + return 0; + return i->number - fifo.begin()->number; + } + + int countPacketsAfter(const_iterator i) const + { + auto end = fifo.end(); + if (i == end) + return 0; + return (--end)->number - i->number; + } + + void check() const + { + unsigned total = 0; + for (auto i = begin(); i != end(); ++i) + total += i->packet->length + i->slack; + + if (total != _size) + panic("total (%d) is not == to size (%d)\n", total, _size); + } + +/** + * Serialization stuff + */ + public: + void serialize(const std::string &base, CheckpointOut &cp) const; + void unserialize(const std::string &base, CheckpointIn &cp); +}; + +#endif // __DEV_NET_PKTFIFO_HH__ diff --git a/src/dev/net/sinic.cc b/src/dev/net/sinic.cc new file mode 100644 index 000000000..d0adb1016 --- /dev/null +++ b/src/dev/net/sinic.cc @@ -0,0 +1,1563 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#include "dev/net/sinic.hh" + +#include +#include +#include + +#ifdef SINIC_VTOPHYS +#include "arch/vtophys.hh" + +#endif +#include "base/compiler.hh" +#include "base/debug.hh" +#include "base/inet.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "debug/EthernetAll.hh" +#include "dev/net/etherlink.hh" +#include "mem/packet.hh" +#include "mem/packet_access.hh" +#include "sim/eventq.hh" +#include "sim/stats.hh" + +using namespace std; +using namespace Net; +using namespace TheISA; + +namespace Sinic { + +const char *RxStateStrings[] = +{ + "rxIdle", + "rxFifoBlock", + "rxBeginCopy", + "rxCopy", + "rxCopyDone" +}; + +const char *TxStateStrings[] = +{ + "txIdle", + "txFifoBlock", + "txBeginCopy", + "txCopy", + "txCopyDone" +}; + + +/////////////////////////////////////////////////////////////////////// +// +// Sinic PCI Device +// +Base::Base(const Params *p) + : EtherDevBase(p), rxEnable(false), txEnable(false), + intrDelay(p->intr_delay), intrTick(0), cpuIntrEnable(false), + cpuPendingIntr(false), intrEvent(0), interface(NULL) +{ +} + +Device::Device(const Params *p) + : Base(p), rxUnique(0), txUnique(0), + virtualRegs(p->virtual_count < 1 ? 1 : p->virtual_count), + rxFifo(p->rx_fifo_size), txFifo(p->tx_fifo_size), + rxKickTick(0), txKickTick(0), + txEvent(this), rxDmaEvent(this), txDmaEvent(this), + dmaReadDelay(p->dma_read_delay), dmaReadFactor(p->dma_read_factor), + dmaWriteDelay(p->dma_write_delay), dmaWriteFactor(p->dma_write_factor) +{ + interface = new Interface(name() + ".int0", this); + reset(); + +} + +Device::~Device() +{} + +void +Device::regStats() +{ + Base::regStats(); + + _maxVnicDistance = 0; + + maxVnicDistance + .name(name() + ".maxVnicDistance") + .desc("maximum vnic distance") + ; + + totalVnicDistance + .name(name() + ".totalVnicDistance") + .desc("total vnic distance") + ; + numVnicDistance + .name(name() + ".numVnicDistance") + .desc("number of vnic distance measurements") + ; + + avgVnicDistance + .name(name() + ".avgVnicDistance") + .desc("average vnic distance") + ; + + avgVnicDistance = totalVnicDistance / numVnicDistance; +} + +void +Device::resetStats() +{ + Base::resetStats(); + + _maxVnicDistance = 0; +} + +EtherInt* +Device::getEthPort(const std::string &if_name, int idx) +{ + if (if_name == "interface") { + if (interface->getPeer()) + panic("interface already connected to\n"); + + return interface; + } + return NULL; +} + + +void +Device::prepareIO(ContextID cpu, int index) +{ + int size = virtualRegs.size(); + if (index > size) + panic("Trying to access a vnic that doesn't exist %d > %d\n", + index, size); +} + +//add stats for head of line blocking +//add stats for average fifo length +//add stats for average number of vnics busy + +void +Device::prepareRead(ContextID cpu, int index) +{ + using namespace Regs; + prepareIO(cpu, index); + + VirtualReg &vnic = virtualRegs[index]; + + // update rx registers + uint64_t rxdone = vnic.RxDone; + rxdone = set_RxDone_Packets(rxdone, rxFifo.countPacketsAfter(rxFifoPtr)); + rxdone = set_RxDone_Empty(rxdone, rxFifo.empty()); + rxdone = set_RxDone_High(rxdone, rxFifo.size() > regs.RxFifoHigh); + rxdone = set_RxDone_NotHigh(rxdone, rxLow); + regs.RxData = vnic.RxData; + regs.RxDone = rxdone; + regs.RxWait = rxdone; + + // update tx regsiters + uint64_t txdone = vnic.TxDone; + txdone = set_TxDone_Packets(txdone, txFifo.packets()); + txdone = set_TxDone_Full(txdone, txFifo.avail() < regs.TxMaxCopy); + txdone = set_TxDone_Low(txdone, txFifo.size() < regs.TxFifoLow); + regs.TxData = vnic.TxData; + regs.TxDone = txdone; + regs.TxWait = txdone; + + int head = 0xffff; + + if (!rxFifo.empty()) { + int vnic = rxFifo.begin()->priv; + if (vnic != -1 && virtualRegs[vnic].rxPacketOffset > 0) + head = vnic; + } + + regs.RxStatus = set_RxStatus_Head(regs.RxStatus, head); + regs.RxStatus = set_RxStatus_Busy(regs.RxStatus, rxBusyCount); + regs.RxStatus = set_RxStatus_Mapped(regs.RxStatus, rxMappedCount); + regs.RxStatus = set_RxStatus_Dirty(regs.RxStatus, rxDirtyCount); +} + +void +Device::prepareWrite(ContextID cpu, int index) +{ + prepareIO(cpu, index); +} + +/** + * I/O read of device register + */ +Tick +Device::read(PacketPtr pkt) +{ + assert(config.command & PCI_CMD_MSE); + assert(pkt->getAddr() >= BARAddrs[0] && pkt->getSize() < BARSize[0]); + + ContextID cpu = pkt->req->contextId(); + Addr daddr = pkt->getAddr() - BARAddrs[0]; + Addr index = daddr >> Regs::VirtualShift; + Addr raddr = daddr & Regs::VirtualMask; + + if (!regValid(raddr)) + panic("invalid register: cpu=%d vnic=%d da=%#x pa=%#x size=%d", + cpu, index, daddr, pkt->getAddr(), pkt->getSize()); + + const Regs::Info &info = regInfo(raddr); + if (!info.read) + panic("read %s (write only): " + "cpu=%d vnic=%d da=%#x pa=%#x size=%d", + info.name, cpu, index, daddr, pkt->getAddr(), pkt->getSize()); + + panic("read %s (invalid size): " + "cpu=%d vnic=%d da=%#x pa=%#x size=%d", + info.name, cpu, index, daddr, pkt->getAddr(), pkt->getSize()); + + prepareRead(cpu, index); + + uint64_t value M5_VAR_USED = 0; + if (pkt->getSize() == 4) { + uint32_t reg = regData32(raddr); + pkt->set(reg); + value = reg; + } + + if (pkt->getSize() == 8) { + uint64_t reg = regData64(raddr); + pkt->set(reg); + value = reg; + } + + DPRINTF(EthernetPIO, + "read %s: cpu=%d vnic=%d da=%#x pa=%#x size=%d val=%#x\n", + info.name, cpu, index, daddr, pkt->getAddr(), pkt->getSize(), value); + + // reading the interrupt status register has the side effect of + // clearing it + if (raddr == Regs::IntrStatus) + devIntrClear(); + + return pioDelay; +} + +/** + * IPR read of device register + + Fault +Device::iprRead(Addr daddr, ContextID cpu, uint64_t &result) +{ + if (!regValid(daddr)) + panic("invalid address: da=%#x", daddr); + + const Regs::Info &info = regInfo(daddr); + if (!info.read) + panic("reading %s (write only): cpu=%d da=%#x", info.name, cpu, daddr); + + DPRINTF(EthernetPIO, "IPR read %s: cpu=%d da=%#x\n", + info.name, cpu, daddr); + + prepareRead(cpu, 0); + + if (info.size == 4) + result = regData32(daddr); + + if (info.size == 8) + result = regData64(daddr); + + DPRINTF(EthernetPIO, "IPR read %s: cpu=%s da=%#x val=%#x\n", + info.name, cpu, result); + + return NoFault; +} +*/ +/** + * I/O write of device register + */ +Tick +Device::write(PacketPtr pkt) +{ + assert(config.command & PCI_CMD_MSE); + assert(pkt->getAddr() >= BARAddrs[0] && pkt->getSize() < BARSize[0]); + + ContextID cpu = pkt->req->contextId(); + Addr daddr = pkt->getAddr() - BARAddrs[0]; + Addr index = daddr >> Regs::VirtualShift; + Addr raddr = daddr & Regs::VirtualMask; + + if (!regValid(raddr)) + panic("invalid register: cpu=%d, da=%#x pa=%#x size=%d", + cpu, daddr, pkt->getAddr(), pkt->getSize()); + + const Regs::Info &info = regInfo(raddr); + if (!info.write) + panic("write %s (read only): " + "cpu=%d vnic=%d da=%#x pa=%#x size=%d", + info.name, cpu, index, daddr, pkt->getAddr(), pkt->getSize()); + + if (pkt->getSize() != info.size) + panic("write %s (invalid size): " + "cpu=%d vnic=%d da=%#x pa=%#x size=%d", + info.name, cpu, index, daddr, pkt->getAddr(), pkt->getSize()); + + VirtualReg &vnic = virtualRegs[index]; + + DPRINTF(EthernetPIO, + "write %s vnic %d: cpu=%d val=%#x da=%#x pa=%#x size=%d\n", + info.name, index, cpu, info.size == 4 ? pkt->get() : + pkt->get(), daddr, pkt->getAddr(), pkt->getSize()); + + prepareWrite(cpu, index); + + switch (raddr) { + case Regs::Config: + changeConfig(pkt->get()); + break; + + case Regs::Command: + command(pkt->get()); + break; + + case Regs::IntrStatus: + devIntrClear(regs.IntrStatus & pkt->get()); + break; + + case Regs::IntrMask: + devIntrChangeMask(pkt->get()); + break; + + case Regs::RxData: + if (Regs::get_RxDone_Busy(vnic.RxDone)) + panic("receive machine busy with another request! rxState=%s", + RxStateStrings[rxState]); + + vnic.rxUnique = rxUnique++; + vnic.RxDone = Regs::RxDone_Busy; + vnic.RxData = pkt->get(); + rxBusyCount++; + + if (Regs::get_RxData_Vaddr(pkt->get())) { + panic("vtophys not implemented in newmem"); +#ifdef SINIC_VTOPHYS + Addr vaddr = Regs::get_RxData_Addr(reg64); + Addr paddr = vtophys(req->xc, vaddr); + DPRINTF(EthernetPIO, "write RxData vnic %d (rxunique %d): " + "vaddr=%#x, paddr=%#x\n", + index, vnic.rxUnique, vaddr, paddr); + + vnic.RxData = Regs::set_RxData_Addr(vnic.RxData, paddr); +#endif + } else { + DPRINTF(EthernetPIO, "write RxData vnic %d (rxunique %d)\n", + index, vnic.rxUnique); + } + + if (vnic.rxIndex == rxFifo.end()) { + DPRINTF(EthernetPIO, "request new packet...appending to rxList\n"); + rxList.push_back(index); + } else { + DPRINTF(EthernetPIO, "packet exists...appending to rxBusy\n"); + rxBusy.push_back(index); + } + + if (rxEnable && (rxState == rxIdle || rxState == rxFifoBlock)) { + rxState = rxFifoBlock; + rxKick(); + } + break; + + case Regs::TxData: + if (Regs::get_TxDone_Busy(vnic.TxDone)) + panic("transmit machine busy with another request! txState=%s", + TxStateStrings[txState]); + + vnic.txUnique = txUnique++; + vnic.TxDone = Regs::TxDone_Busy; + + if (Regs::get_TxData_Vaddr(pkt->get())) { + panic("vtophys won't work here in newmem.\n"); +#ifdef SINIC_VTOPHYS + Addr vaddr = Regs::get_TxData_Addr(reg64); + Addr paddr = vtophys(req->xc, vaddr); + DPRINTF(EthernetPIO, "write TxData vnic %d (txunique %d): " + "vaddr=%#x, paddr=%#x\n", + index, vnic.txUnique, vaddr, paddr); + + vnic.TxData = Regs::set_TxData_Addr(vnic.TxData, paddr); +#endif + } else { + DPRINTF(EthernetPIO, "write TxData vnic %d (txunique %d)\n", + index, vnic.txUnique); + } + + if (txList.empty() || txList.front() != index) + txList.push_back(index); + if (txEnable && txState == txIdle && txList.front() == index) { + txState = txFifoBlock; + txKick(); + } + break; + } + + return pioDelay; +} + +void +Device::devIntrPost(uint32_t interrupts) +{ + if ((interrupts & Regs::Intr_Res)) + panic("Cannot set a reserved interrupt"); + + regs.IntrStatus |= interrupts; + + DPRINTF(EthernetIntr, + "interrupt written to intStatus: intr=%#x status=%#x mask=%#x\n", + interrupts, regs.IntrStatus, regs.IntrMask); + + interrupts = regs.IntrStatus & regs.IntrMask; + + // Intr_RxHigh is special, we only signal it if we've emptied the fifo + // and then filled it above the high watermark + if (rxEmpty) + rxEmpty = false; + else + interrupts &= ~Regs::Intr_RxHigh; + + // Intr_TxLow is special, we only signal it if we've filled up the fifo + // and then dropped below the low watermark + if (txFull) + txFull = false; + else + interrupts &= ~Regs::Intr_TxLow; + + if (interrupts) { + Tick when = curTick(); + if ((interrupts & Regs::Intr_NoDelay) == 0) + when += intrDelay; + cpuIntrPost(when); + } +} + +void +Device::devIntrClear(uint32_t interrupts) +{ + if ((interrupts & Regs::Intr_Res)) + panic("Cannot clear a reserved interrupt"); + + regs.IntrStatus &= ~interrupts; + + DPRINTF(EthernetIntr, + "interrupt cleared from intStatus: intr=%x status=%x mask=%x\n", + interrupts, regs.IntrStatus, regs.IntrMask); + + if (!(regs.IntrStatus & regs.IntrMask)) + cpuIntrClear(); +} + +void +Device::devIntrChangeMask(uint32_t newmask) +{ + if (regs.IntrMask == newmask) + return; + + regs.IntrMask = newmask; + + DPRINTF(EthernetIntr, + "interrupt mask changed: intStatus=%x intMask=%x masked=%x\n", + regs.IntrStatus, regs.IntrMask, regs.IntrStatus & regs.IntrMask); + + if (regs.IntrStatus & regs.IntrMask) + cpuIntrPost(curTick()); + else + cpuIntrClear(); +} + +void +Base::cpuIntrPost(Tick when) +{ + // If the interrupt you want to post is later than an interrupt + // already scheduled, just let it post in the coming one and don't + // schedule another. + // HOWEVER, must be sure that the scheduled intrTick is in the + // future (this was formerly the source of a bug) + /** + * @todo this warning should be removed and the intrTick code should + * be fixed. + */ + assert(when >= curTick()); + assert(intrTick >= curTick() || intrTick == 0); + if (!cpuIntrEnable) { + DPRINTF(EthernetIntr, "interrupts not enabled.\n", + intrTick); + return; + } + + if (when > intrTick && intrTick != 0) { + DPRINTF(EthernetIntr, "don't need to schedule event...intrTick=%d\n", + intrTick); + return; + } + + intrTick = when; + if (intrTick < curTick()) { + Debug::breakpoint(); + intrTick = curTick(); + } + + DPRINTF(EthernetIntr, "going to schedule an interrupt for intrTick=%d\n", + intrTick); + + if (intrEvent) + intrEvent->squash(); + intrEvent = new IntrEvent(this, true); + schedule(intrEvent, intrTick); +} + +void +Base::cpuInterrupt() +{ + assert(intrTick == curTick()); + + // Whether or not there's a pending interrupt, we don't care about + // it anymore + intrEvent = 0; + intrTick = 0; + + // Don't send an interrupt if there's already one + if (cpuPendingIntr) { + DPRINTF(EthernetIntr, + "would send an interrupt now, but there's already pending\n"); + } else { + // Send interrupt + cpuPendingIntr = true; + + DPRINTF(EthernetIntr, "posting interrupt\n"); + intrPost(); + } +} + +void +Base::cpuIntrClear() +{ + if (!cpuPendingIntr) + return; + + if (intrEvent) { + intrEvent->squash(); + intrEvent = 0; + } + + intrTick = 0; + + cpuPendingIntr = false; + + DPRINTF(EthernetIntr, "clearing cchip interrupt\n"); + intrClear(); +} + +bool +Base::cpuIntrPending() const +{ return cpuPendingIntr; } + +void +Device::changeConfig(uint32_t newconf) +{ + uint32_t changed = regs.Config ^ newconf; + if (!changed) + return; + + regs.Config = newconf; + + if ((changed & Regs::Config_IntEn)) { + cpuIntrEnable = regs.Config & Regs::Config_IntEn; + if (cpuIntrEnable) { + if (regs.IntrStatus & regs.IntrMask) + cpuIntrPost(curTick()); + } else { + cpuIntrClear(); + } + } + + if ((changed & Regs::Config_TxEn)) { + txEnable = regs.Config & Regs::Config_TxEn; + if (txEnable) + txKick(); + } + + if ((changed & Regs::Config_RxEn)) { + rxEnable = regs.Config & Regs::Config_RxEn; + if (rxEnable) + rxKick(); + } +} + +void +Device::command(uint32_t command) +{ + if (command & Regs::Command_Intr) + devIntrPost(Regs::Intr_Soft); + + if (command & Regs::Command_Reset) + reset(); +} + +void +Device::reset() +{ + using namespace Regs; + + memset(®s, 0, sizeof(regs)); + + regs.Config = 0; + if (params()->rx_thread) + regs.Config |= Config_RxThread; + if (params()->tx_thread) + regs.Config |= Config_TxThread; + if (params()->rss) + regs.Config |= Config_RSS; + if (params()->zero_copy) + regs.Config |= Config_ZeroCopy; + if (params()->delay_copy) + regs.Config |= Config_DelayCopy; + if (params()->virtual_addr) + regs.Config |= Config_Vaddr; + + if (params()->delay_copy && params()->zero_copy) + panic("Can't delay copy and zero copy"); + + regs.IntrMask = Intr_Soft | Intr_RxHigh | Intr_RxPacket | Intr_TxLow; + regs.RxMaxCopy = params()->rx_max_copy; + regs.TxMaxCopy = params()->tx_max_copy; + regs.ZeroCopySize = params()->zero_copy_size; + regs.ZeroCopyMark = params()->zero_copy_threshold; + regs.VirtualCount = params()->virtual_count; + regs.RxMaxIntr = params()->rx_max_intr; + regs.RxFifoSize = params()->rx_fifo_size; + regs.TxFifoSize = params()->tx_fifo_size; + regs.RxFifoLow = params()->rx_fifo_low_mark; + regs.TxFifoLow = params()->tx_fifo_threshold; + regs.RxFifoHigh = params()->rx_fifo_threshold; + regs.TxFifoHigh = params()->tx_fifo_high_mark; + regs.HwAddr = params()->hardware_address; + + if (regs.RxMaxCopy < regs.ZeroCopyMark) + panic("Must be able to copy at least as many bytes as the threshold"); + + if (regs.ZeroCopySize >= regs.ZeroCopyMark) + panic("The number of bytes to copy must be less than the threshold"); + + rxList.clear(); + rxBusy.clear(); + rxActive = -1; + txList.clear(); + rxBusyCount = 0; + rxDirtyCount = 0; + rxMappedCount = 0; + + rxState = rxIdle; + txState = txIdle; + + rxFifo.clear(); + rxFifoPtr = rxFifo.end(); + txFifo.clear(); + rxEmpty = false; + rxLow = true; + txFull = false; + + int size = virtualRegs.size(); + virtualRegs.clear(); + virtualRegs.resize(size); + for (int i = 0; i < size; ++i) + virtualRegs[i].rxIndex = rxFifo.end(); +} + +void +Device::rxDmaDone() +{ + assert(rxState == rxCopy); + rxState = rxCopyDone; + DPRINTF(EthernetDMA, "end rx dma write paddr=%#x len=%d\n", + rxDmaAddr, rxDmaLen); + DDUMP(EthernetData, rxDmaData, rxDmaLen); + + // If the transmit state machine has a pending DMA, let it go first + if (txState == txBeginCopy) + txKick(); + + rxKick(); +} + +void +Device::rxKick() +{ + VirtualReg *vnic = NULL; + + DPRINTF(EthernetSM, "rxKick: rxState=%s (rxFifo.size=%d)\n", + RxStateStrings[rxState], rxFifo.size()); + + if (rxKickTick > curTick()) { + DPRINTF(EthernetSM, "rxKick: exiting, can't run till %d\n", + rxKickTick); + return; + } + + next: + rxFifo.check(); + if (rxState == rxIdle) + goto exit; + + if (rxActive == -1) { + if (rxState != rxFifoBlock) + panic("no active vnic while in state %s", RxStateStrings[rxState]); + + DPRINTF(EthernetSM, "processing rxState=%s\n", + RxStateStrings[rxState]); + } else { + vnic = &virtualRegs[rxActive]; + DPRINTF(EthernetSM, + "processing rxState=%s for vnic %d (rxunique %d)\n", + RxStateStrings[rxState], rxActive, vnic->rxUnique); + } + + switch (rxState) { + case rxFifoBlock: + if (DTRACE(EthernetSM)) { + PacketFifo::iterator end = rxFifo.end(); + int size = virtualRegs.size(); + for (int i = 0; i < size; ++i) { + VirtualReg *vn = &virtualRegs[i]; + bool busy = Regs::get_RxDone_Busy(vn->RxDone); + if (vn->rxIndex != end) { +#ifndef NDEBUG + bool dirty = vn->rxPacketOffset > 0; + const char *status; + + if (busy && dirty) + status = "busy,dirty"; + else if (busy) + status = "busy"; + else if (dirty) + status = "dirty"; + else + status = "mapped"; + + DPRINTF(EthernetSM, + "vnic %d %s (rxunique %d), packet %d, slack %d\n", + i, status, vn->rxUnique, + rxFifo.countPacketsBefore(vn->rxIndex), + vn->rxIndex->slack); +#endif + } else if (busy) { + DPRINTF(EthernetSM, "vnic %d unmapped (rxunique %d)\n", + i, vn->rxUnique); + } + } + } + + if (!rxBusy.empty()) { + rxActive = rxBusy.front(); + rxBusy.pop_front(); + vnic = &virtualRegs[rxActive]; + + if (vnic->rxIndex == rxFifo.end()) + panic("continuing vnic without packet\n"); + + DPRINTF(EthernetSM, + "continue processing for vnic %d (rxunique %d)\n", + rxActive, vnic->rxUnique); + + rxState = rxBeginCopy; + + int vnic_distance = rxFifo.countPacketsBefore(vnic->rxIndex); + totalVnicDistance += vnic_distance; + numVnicDistance += 1; + if (vnic_distance > _maxVnicDistance) { + maxVnicDistance = vnic_distance; + _maxVnicDistance = vnic_distance; + } + + break; + } + + if (rxFifoPtr == rxFifo.end()) { + DPRINTF(EthernetSM, "receive waiting for data. Nothing to do.\n"); + goto exit; + } + + if (rxList.empty()) + panic("Not idle, but nothing to do!"); + + assert(!rxFifo.empty()); + + rxActive = rxList.front(); + rxList.pop_front(); + vnic = &virtualRegs[rxActive]; + + DPRINTF(EthernetSM, + "processing new packet for vnic %d (rxunique %d)\n", + rxActive, vnic->rxUnique); + + // Grab a new packet from the fifo. + vnic->rxIndex = rxFifoPtr++; + vnic->rxIndex->priv = rxActive; + vnic->rxPacketOffset = 0; + vnic->rxPacketBytes = vnic->rxIndex->packet->length; + assert(vnic->rxPacketBytes); + rxMappedCount++; + + vnic->rxDoneData = 0; + /* scope for variables */ { + IpPtr ip(vnic->rxIndex->packet); + if (ip) { + DPRINTF(Ethernet, "ID is %d\n", ip->id()); + vnic->rxDoneData |= Regs::RxDone_IpPacket; + rxIpChecksums++; + if (cksum(ip) != 0) { + DPRINTF(EthernetCksum, "Rx IP Checksum Error\n"); + vnic->rxDoneData |= Regs::RxDone_IpError; + } + TcpPtr tcp(ip); + UdpPtr udp(ip); + if (tcp) { + DPRINTF(Ethernet, + "Src Port=%d, Dest Port=%d, Seq=%d, Ack=%d\n", + tcp->sport(), tcp->dport(), tcp->seq(), + tcp->ack()); + vnic->rxDoneData |= Regs::RxDone_TcpPacket; + rxTcpChecksums++; + if (cksum(tcp) != 0) { + DPRINTF(EthernetCksum, "Rx TCP Checksum Error\n"); + vnic->rxDoneData |= Regs::RxDone_TcpError; + } + } else if (udp) { + vnic->rxDoneData |= Regs::RxDone_UdpPacket; + rxUdpChecksums++; + if (cksum(udp) != 0) { + DPRINTF(EthernetCksum, "Rx UDP Checksum Error\n"); + vnic->rxDoneData |= Regs::RxDone_UdpError; + } + } + } + } + rxState = rxBeginCopy; + break; + + case rxBeginCopy: + if (dmaPending() || drainState() != DrainState::Running) + goto exit; + + rxDmaAddr = pciToDma(Regs::get_RxData_Addr(vnic->RxData)); + rxDmaLen = min(Regs::get_RxData_Len(vnic->RxData), + vnic->rxPacketBytes); + + /* + * if we're doing zero/delay copy and we're below the fifo + * threshold, see if we should try to do the zero/defer copy + */ + if ((Regs::get_Config_ZeroCopy(regs.Config) || + Regs::get_Config_DelayCopy(regs.Config)) && + !Regs::get_RxData_NoDelay(vnic->RxData) && rxLow) { + if (rxDmaLen > regs.ZeroCopyMark) + rxDmaLen = regs.ZeroCopySize; + } + rxDmaData = vnic->rxIndex->packet->data + vnic->rxPacketOffset; + rxState = rxCopy; + if (rxDmaAddr == 1LL) { + rxState = rxCopyDone; + break; + } + + dmaWrite(rxDmaAddr, rxDmaLen, &rxDmaEvent, rxDmaData); + break; + + case rxCopy: + DPRINTF(EthernetSM, "receive machine still copying\n"); + goto exit; + + case rxCopyDone: + vnic->RxDone = vnic->rxDoneData; + vnic->RxDone |= Regs::RxDone_Complete; + rxBusyCount--; + + if (vnic->rxPacketBytes == rxDmaLen) { + if (vnic->rxPacketOffset) + rxDirtyCount--; + + // Packet is complete. Indicate how many bytes were copied + vnic->RxDone = Regs::set_RxDone_CopyLen(vnic->RxDone, rxDmaLen); + + DPRINTF(EthernetSM, + "rxKick: packet complete on vnic %d (rxunique %d)\n", + rxActive, vnic->rxUnique); + rxFifo.remove(vnic->rxIndex); + vnic->rxIndex = rxFifo.end(); + rxMappedCount--; + } else { + if (!vnic->rxPacketOffset) + rxDirtyCount++; + + vnic->rxPacketBytes -= rxDmaLen; + vnic->rxPacketOffset += rxDmaLen; + vnic->RxDone |= Regs::RxDone_More; + vnic->RxDone = Regs::set_RxDone_CopyLen(vnic->RxDone, + vnic->rxPacketBytes); + DPRINTF(EthernetSM, + "rxKick: packet not complete on vnic %d (rxunique %d): " + "%d bytes left\n", + rxActive, vnic->rxUnique, vnic->rxPacketBytes); + } + + rxActive = -1; + rxState = rxBusy.empty() && rxList.empty() ? rxIdle : rxFifoBlock; + + if (rxFifo.empty()) { + devIntrPost(Regs::Intr_RxEmpty); + rxEmpty = true; + } + + if (rxFifo.size() < regs.RxFifoLow) + rxLow = true; + + if (rxFifo.size() > regs.RxFifoHigh) + rxLow = false; + + devIntrPost(Regs::Intr_RxDMA); + break; + + default: + panic("Invalid rxState!"); + } + + DPRINTF(EthernetSM, "entering next rxState=%s\n", + RxStateStrings[rxState]); + + goto next; + + exit: + /** + * @todo do we want to schedule a future kick? + */ + DPRINTF(EthernetSM, "rx state machine exited rxState=%s\n", + RxStateStrings[rxState]); +} + +void +Device::txDmaDone() +{ + assert(txState == txCopy); + txState = txCopyDone; + DPRINTF(EthernetDMA, "tx dma read paddr=%#x len=%d\n", + txDmaAddr, txDmaLen); + DDUMP(EthernetData, txDmaData, txDmaLen); + + // If the receive state machine has a pending DMA, let it go first + if (rxState == rxBeginCopy) + rxKick(); + + txKick(); +} + +void +Device::transmit() +{ + if (txFifo.empty()) { + DPRINTF(Ethernet, "nothing to transmit\n"); + return; + } + + uint32_t interrupts; + EthPacketPtr packet = txFifo.front(); + if (!interface->sendPacket(packet)) { + DPRINTF(Ethernet, "Packet Transmit: failed txFifo available %d\n", + txFifo.avail()); + return; + } + + txFifo.pop(); +#if TRACING_ON + if (DTRACE(Ethernet)) { + IpPtr ip(packet); + if (ip) { + DPRINTF(Ethernet, "ID is %d\n", ip->id()); + TcpPtr tcp(ip); + if (tcp) { + DPRINTF(Ethernet, + "Src Port=%d, Dest Port=%d, Seq=%d, Ack=%d\n", + tcp->sport(), tcp->dport(), tcp->seq(), + tcp->ack()); + } + } + } +#endif + + DDUMP(EthernetData, packet->data, packet->length); + txBytes += packet->length; + txPackets++; + + DPRINTF(Ethernet, "Packet Transmit: successful txFifo Available %d\n", + txFifo.avail()); + + interrupts = Regs::Intr_TxPacket; + if (txFifo.size() < regs.TxFifoLow) + interrupts |= Regs::Intr_TxLow; + devIntrPost(interrupts); +} + +void +Device::txKick() +{ + VirtualReg *vnic; + DPRINTF(EthernetSM, "txKick: txState=%s (txFifo.size=%d)\n", + TxStateStrings[txState], txFifo.size()); + + if (txKickTick > curTick()) { + DPRINTF(EthernetSM, "txKick: exiting, can't run till %d\n", + txKickTick); + return; + } + + next: + if (txState == txIdle) + goto exit; + + assert(!txList.empty()); + vnic = &virtualRegs[txList.front()]; + + switch (txState) { + case txFifoBlock: + assert(Regs::get_TxDone_Busy(vnic->TxDone)); + if (!txPacket) { + // Grab a new packet from the fifo. + txPacket = make_shared(16384); + txPacketOffset = 0; + } + + if (txFifo.avail() - txPacket->length < + Regs::get_TxData_Len(vnic->TxData)) { + DPRINTF(EthernetSM, "transmit fifo full. Nothing to do.\n"); + goto exit; + } + + txState = txBeginCopy; + break; + + case txBeginCopy: + if (dmaPending() || drainState() != DrainState::Running) + goto exit; + + txDmaAddr = pciToDma(Regs::get_TxData_Addr(vnic->TxData)); + txDmaLen = Regs::get_TxData_Len(vnic->TxData); + txDmaData = txPacket->data + txPacketOffset; + txState = txCopy; + + dmaRead(txDmaAddr, txDmaLen, &txDmaEvent, txDmaData); + break; + + case txCopy: + DPRINTF(EthernetSM, "transmit machine still copying\n"); + goto exit; + + case txCopyDone: + vnic->TxDone = txDmaLen | Regs::TxDone_Complete; + txPacket->length += txDmaLen; + if ((vnic->TxData & Regs::TxData_More)) { + txPacketOffset += txDmaLen; + txState = txIdle; + devIntrPost(Regs::Intr_TxDMA); + break; + } + + assert(txPacket->length <= txFifo.avail()); + if ((vnic->TxData & Regs::TxData_Checksum)) { + IpPtr ip(txPacket); + if (ip) { + TcpPtr tcp(ip); + if (tcp) { + tcp->sum(0); + tcp->sum(cksum(tcp)); + txTcpChecksums++; + } + + UdpPtr udp(ip); + if (udp) { + udp->sum(0); + udp->sum(cksum(udp)); + txUdpChecksums++; + } + + ip->sum(0); + ip->sum(cksum(ip)); + txIpChecksums++; + } + } + + txFifo.push(txPacket); + if (txFifo.avail() < regs.TxMaxCopy) { + devIntrPost(Regs::Intr_TxFull); + txFull = true; + } + txPacket = 0; + transmit(); + txList.pop_front(); + txState = txList.empty() ? txIdle : txFifoBlock; + devIntrPost(Regs::Intr_TxDMA); + break; + + default: + panic("Invalid txState!"); + } + + DPRINTF(EthernetSM, "entering next txState=%s\n", + TxStateStrings[txState]); + + goto next; + + exit: + /** + * @todo do we want to schedule a future kick? + */ + DPRINTF(EthernetSM, "tx state machine exited txState=%s\n", + TxStateStrings[txState]); +} + +void +Device::transferDone() +{ + if (txFifo.empty()) { + DPRINTF(Ethernet, "transfer complete: txFifo empty...nothing to do\n"); + return; + } + + DPRINTF(Ethernet, "transfer complete: data in txFifo...schedule xmit\n"); + + reschedule(txEvent, clockEdge(Cycles(1)), true); +} + +bool +Device::rxFilter(const EthPacketPtr &packet) +{ + if (!Regs::get_Config_Filter(regs.Config)) + return false; + + panic("receive filter not implemented\n"); + bool drop = true; + +#if 0 + string type; + + EthHdr *eth = packet->eth(); + if (eth->unicast()) { + // If we're accepting all unicast addresses + if (acceptUnicast) + drop = false; + + // If we make a perfect match + if (acceptPerfect && params->eaddr == eth.dst()) + drop = false; + + if (acceptArp && eth->type() == ETH_TYPE_ARP) + drop = false; + + } else if (eth->broadcast()) { + // if we're accepting broadcasts + if (acceptBroadcast) + drop = false; + + } else if (eth->multicast()) { + // if we're accepting all multicasts + if (acceptMulticast) + drop = false; + + } + + if (drop) { + DPRINTF(Ethernet, "rxFilter drop\n"); + DDUMP(EthernetData, packet->data, packet->length); + } +#endif + return drop; +} + +bool +Device::recvPacket(EthPacketPtr packet) +{ + rxBytes += packet->length; + rxPackets++; + + DPRINTF(Ethernet, "Receiving packet from wire, rxFifo Available is %d\n", + rxFifo.avail()); + + if (!rxEnable) { + DPRINTF(Ethernet, "receive disabled...packet dropped\n"); + return true; + } + + if (rxFilter(packet)) { + DPRINTF(Ethernet, "packet filtered...dropped\n"); + return true; + } + + if (rxFifo.size() >= regs.RxFifoHigh) + devIntrPost(Regs::Intr_RxHigh); + + if (!rxFifo.push(packet)) { + DPRINTF(Ethernet, + "packet will not fit in receive buffer...packet dropped\n"); + return false; + } + + // If we were at the last element, back up one ot go to the new + // last element of the list. + if (rxFifoPtr == rxFifo.end()) + --rxFifoPtr; + + devIntrPost(Regs::Intr_RxPacket); + rxKick(); + return true; +} + +void +Device::drainResume() +{ + Drainable::drainResume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + +//===================================================================== +// +// +void +Base::serialize(CheckpointOut &cp) const +{ + // Serialize the PciDevice base class + PciDevice::serialize(cp); + + SERIALIZE_SCALAR(rxEnable); + SERIALIZE_SCALAR(txEnable); + SERIALIZE_SCALAR(cpuIntrEnable); + + /* + * Keep track of pending interrupt status. + */ + SERIALIZE_SCALAR(intrTick); + SERIALIZE_SCALAR(cpuPendingIntr); + Tick intrEventTick = 0; + if (intrEvent) + intrEventTick = intrEvent->when(); + SERIALIZE_SCALAR(intrEventTick); +} + +void +Base::unserialize(CheckpointIn &cp) +{ + // Unserialize the PciDevice base class + PciDevice::unserialize(cp); + + UNSERIALIZE_SCALAR(rxEnable); + UNSERIALIZE_SCALAR(txEnable); + UNSERIALIZE_SCALAR(cpuIntrEnable); + + /* + * Keep track of pending interrupt status. + */ + UNSERIALIZE_SCALAR(intrTick); + UNSERIALIZE_SCALAR(cpuPendingIntr); + Tick intrEventTick; + UNSERIALIZE_SCALAR(intrEventTick); + if (intrEventTick) { + intrEvent = new IntrEvent(this, true); + schedule(intrEvent, intrEventTick); + } +} + +void +Device::serialize(CheckpointOut &cp) const +{ + int count; + + // Serialize the PciDevice base class + Base::serialize(cp); + + if (rxState == rxCopy) + panic("can't serialize with an in flight dma request rxState=%s", + RxStateStrings[rxState]); + + if (txState == txCopy) + panic("can't serialize with an in flight dma request txState=%s", + TxStateStrings[txState]); + + /* + * Serialize the device registers that could be modified by the OS. + */ + SERIALIZE_SCALAR(regs.Config); + SERIALIZE_SCALAR(regs.IntrStatus); + SERIALIZE_SCALAR(regs.IntrMask); + SERIALIZE_SCALAR(regs.RxData); + SERIALIZE_SCALAR(regs.TxData); + + /* + * Serialize the virtual nic state + */ + int virtualRegsSize = virtualRegs.size(); + SERIALIZE_SCALAR(virtualRegsSize); + for (int i = 0; i < virtualRegsSize; ++i) { + const VirtualReg *vnic = &virtualRegs[i]; + + std::string reg = csprintf("vnic%d", i); + paramOut(cp, reg + ".RxData", vnic->RxData); + paramOut(cp, reg + ".RxDone", vnic->RxDone); + paramOut(cp, reg + ".TxData", vnic->TxData); + paramOut(cp, reg + ".TxDone", vnic->TxDone); + + bool rxPacketExists = vnic->rxIndex != rxFifo.end(); + paramOut(cp, reg + ".rxPacketExists", rxPacketExists); + if (rxPacketExists) { + int rxPacket = 0; + auto i = rxFifo.begin(); + while (i != vnic->rxIndex) { + assert(i != rxFifo.end()); + ++i; + ++rxPacket; + } + + paramOut(cp, reg + ".rxPacket", rxPacket); + paramOut(cp, reg + ".rxPacketOffset", vnic->rxPacketOffset); + paramOut(cp, reg + ".rxPacketBytes", vnic->rxPacketBytes); + } + paramOut(cp, reg + ".rxDoneData", vnic->rxDoneData); + } + + int rxFifoPtr = -1; + if (this->rxFifoPtr != rxFifo.end()) + rxFifoPtr = rxFifo.countPacketsBefore(this->rxFifoPtr); + SERIALIZE_SCALAR(rxFifoPtr); + + SERIALIZE_SCALAR(rxActive); + SERIALIZE_SCALAR(rxBusyCount); + SERIALIZE_SCALAR(rxDirtyCount); + SERIALIZE_SCALAR(rxMappedCount); + + VirtualList::const_iterator i, end; + for (count = 0, i = rxList.begin(), end = rxList.end(); i != end; ++i) + paramOut(cp, csprintf("rxList%d", count++), *i); + int rxListSize = count; + SERIALIZE_SCALAR(rxListSize); + + for (count = 0, i = rxBusy.begin(), end = rxBusy.end(); i != end; ++i) + paramOut(cp, csprintf("rxBusy%d", count++), *i); + int rxBusySize = count; + SERIALIZE_SCALAR(rxBusySize); + + for (count = 0, i = txList.begin(), end = txList.end(); i != end; ++i) + paramOut(cp, csprintf("txList%d", count++), *i); + int txListSize = count; + SERIALIZE_SCALAR(txListSize); + + /* + * Serialize rx state machine + */ + int rxState = this->rxState; + SERIALIZE_SCALAR(rxState); + SERIALIZE_SCALAR(rxEmpty); + SERIALIZE_SCALAR(rxLow); + rxFifo.serialize("rxFifo", cp); + + /* + * Serialize tx state machine + */ + int txState = this->txState; + SERIALIZE_SCALAR(txState); + SERIALIZE_SCALAR(txFull); + txFifo.serialize("txFifo", cp); + bool txPacketExists = txPacket != nullptr; + SERIALIZE_SCALAR(txPacketExists); + if (txPacketExists) { + txPacket->serialize("txPacket", cp); + SERIALIZE_SCALAR(txPacketOffset); + SERIALIZE_SCALAR(txPacketBytes); + } + + /* + * If there's a pending transmit, store the time so we can + * reschedule it later + */ + Tick transmitTick = txEvent.scheduled() ? txEvent.when() - curTick() : 0; + SERIALIZE_SCALAR(transmitTick); +} + +void +Device::unserialize(CheckpointIn &cp) +{ + // Unserialize the PciDevice base class + Base::unserialize(cp); + + /* + * Unserialize the device registers that may have been written by the OS. + */ + UNSERIALIZE_SCALAR(regs.Config); + UNSERIALIZE_SCALAR(regs.IntrStatus); + UNSERIALIZE_SCALAR(regs.IntrMask); + UNSERIALIZE_SCALAR(regs.RxData); + UNSERIALIZE_SCALAR(regs.TxData); + + UNSERIALIZE_SCALAR(rxActive); + UNSERIALIZE_SCALAR(rxBusyCount); + UNSERIALIZE_SCALAR(rxDirtyCount); + UNSERIALIZE_SCALAR(rxMappedCount); + + int rxListSize; + UNSERIALIZE_SCALAR(rxListSize); + rxList.clear(); + for (int i = 0; i < rxListSize; ++i) { + int value; + paramIn(cp, csprintf("rxList%d", i), value); + rxList.push_back(value); + } + + int rxBusySize; + UNSERIALIZE_SCALAR(rxBusySize); + rxBusy.clear(); + for (int i = 0; i < rxBusySize; ++i) { + int value; + paramIn(cp, csprintf("rxBusy%d", i), value); + rxBusy.push_back(value); + } + + int txListSize; + UNSERIALIZE_SCALAR(txListSize); + txList.clear(); + for (int i = 0; i < txListSize; ++i) { + int value; + paramIn(cp, csprintf("txList%d", i), value); + txList.push_back(value); + } + + /* + * Unserialize rx state machine + */ + int rxState; + UNSERIALIZE_SCALAR(rxState); + UNSERIALIZE_SCALAR(rxEmpty); + UNSERIALIZE_SCALAR(rxLow); + this->rxState = (RxState) rxState; + rxFifo.unserialize("rxFifo", cp); + + int rxFifoPtr; + UNSERIALIZE_SCALAR(rxFifoPtr); + if (rxFifoPtr >= 0) { + this->rxFifoPtr = rxFifo.begin(); + for (int i = 0; i < rxFifoPtr; ++i) + ++this->rxFifoPtr; + } else { + this->rxFifoPtr = rxFifo.end(); + } + + /* + * Unserialize tx state machine + */ + int txState; + UNSERIALIZE_SCALAR(txState); + UNSERIALIZE_SCALAR(txFull); + this->txState = (TxState) txState; + txFifo.unserialize("txFifo", cp); + bool txPacketExists; + UNSERIALIZE_SCALAR(txPacketExists); + txPacket = 0; + if (txPacketExists) { + txPacket = make_shared(16384); + txPacket->unserialize("txPacket", cp); + UNSERIALIZE_SCALAR(txPacketOffset); + UNSERIALIZE_SCALAR(txPacketBytes); + } + + /* + * unserialize the virtual nic registers/state + * + * this must be done after the unserialization of the rxFifo + * because the packet iterators depend on the fifo being populated + */ + int virtualRegsSize; + UNSERIALIZE_SCALAR(virtualRegsSize); + virtualRegs.clear(); + virtualRegs.resize(virtualRegsSize); + for (int i = 0; i < virtualRegsSize; ++i) { + VirtualReg *vnic = &virtualRegs[i]; + std::string reg = csprintf("vnic%d", i); + + paramIn(cp, reg + ".RxData", vnic->RxData); + paramIn(cp, reg + ".RxDone", vnic->RxDone); + paramIn(cp, reg + ".TxData", vnic->TxData); + paramIn(cp, reg + ".TxDone", vnic->TxDone); + + vnic->rxUnique = rxUnique++; + vnic->txUnique = txUnique++; + + bool rxPacketExists; + paramIn(cp, reg + ".rxPacketExists", rxPacketExists); + if (rxPacketExists) { + int rxPacket; + paramIn(cp, reg + ".rxPacket", rxPacket); + vnic->rxIndex = rxFifo.begin(); + while (rxPacket--) + ++vnic->rxIndex; + + paramIn(cp, reg + ".rxPacketOffset", + vnic->rxPacketOffset); + paramIn(cp, reg + ".rxPacketBytes", vnic->rxPacketBytes); + } else { + vnic->rxIndex = rxFifo.end(); + } + paramIn(cp, reg + ".rxDoneData", vnic->rxDoneData); + } + + /* + * If there's a pending transmit, reschedule it now + */ + Tick transmitTick; + UNSERIALIZE_SCALAR(transmitTick); + if (transmitTick) + schedule(txEvent, curTick() + transmitTick); + + pioPort.sendRangeChange(); + +} + +} // namespace Sinic + +Sinic::Device * +SinicParams::create() +{ + return new Sinic::Device(this); +} diff --git a/src/dev/net/sinic.hh b/src/dev/net/sinic.hh new file mode 100644 index 000000000..b041cb16d --- /dev/null +++ b/src/dev/net/sinic.hh @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#ifndef __DEV_NET_SINIC_HH__ +#define __DEV_NET_SINIC_HH__ + +#include "base/inet.hh" +#include "base/statistics.hh" +#include "dev/io_device.hh" +#include "dev/net/etherdevice.hh" +#include "dev/net/etherint.hh" +#include "dev/net/etherpkt.hh" +#include "dev/net/pktfifo.hh" +#include "dev/net/sinicreg.hh" +#include "dev/pci/device.hh" +#include "params/Sinic.hh" +#include "sim/eventq.hh" + +namespace Sinic { + +class Interface; +class Base : public EtherDevBase +{ + protected: + bool rxEnable; + bool txEnable; + + protected: + Tick intrDelay; + Tick intrTick; + bool cpuIntrEnable; + bool cpuPendingIntr; + void cpuIntrPost(Tick when); + void cpuInterrupt(); + void cpuIntrClear(); + + typedef EventWrapper IntrEvent; + friend void IntrEvent::process(); + IntrEvent *intrEvent; + Interface *interface; + + bool cpuIntrPending() const; + void cpuIntrAck() { cpuIntrClear(); } + +/** + * Serialization stuff + */ + public: + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + +/** + * Construction/Destruction/Parameters + */ + public: + typedef SinicParams Params; + const Params *params() const { return (const Params *)_params; } + Base(const Params *p); +}; + +class Device : public Base +{ + protected: + /** Receive State Machine States */ + enum RxState { + rxIdle, + rxFifoBlock, + rxBeginCopy, + rxCopy, + rxCopyDone + }; + + /** Transmit State Machine states */ + enum TxState { + txIdle, + txFifoBlock, + txBeginCopy, + txCopy, + txCopyDone + }; + + /** device register file */ + struct { + uint32_t Config; // 0x00 + uint32_t Command; // 0x04 + uint32_t IntrStatus; // 0x08 + uint32_t IntrMask; // 0x0c + uint32_t RxMaxCopy; // 0x10 + uint32_t TxMaxCopy; // 0x14 + uint32_t ZeroCopySize; // 0x18 + uint32_t ZeroCopyMark; // 0x1c + uint32_t VirtualCount; // 0x20 + uint32_t RxMaxIntr; // 0x24 + uint32_t RxFifoSize; // 0x28 + uint32_t TxFifoSize; // 0x2c + uint32_t RxFifoLow; // 0x30 + uint32_t TxFifoLow; // 0x34 + uint32_t RxFifoHigh; // 0x38 + uint32_t TxFifoHigh; // 0x3c + uint64_t RxData; // 0x40 + uint64_t RxDone; // 0x48 + uint64_t RxWait; // 0x50 + uint64_t TxData; // 0x58 + uint64_t TxDone; // 0x60 + uint64_t TxWait; // 0x68 + uint64_t HwAddr; // 0x70 + uint64_t RxStatus; // 0x78 + } regs; + + struct VirtualReg { + uint64_t RxData; + uint64_t RxDone; + uint64_t TxData; + uint64_t TxDone; + + PacketFifo::iterator rxIndex; + unsigned rxPacketOffset; + unsigned rxPacketBytes; + uint64_t rxDoneData; + + Counter rxUnique; + Counter txUnique; + + VirtualReg() + : RxData(0), RxDone(0), TxData(0), TxDone(0), + rxPacketOffset(0), rxPacketBytes(0), rxDoneData(0) + { } + }; + typedef std::vector VirtualRegs; + typedef std::list VirtualList; + Counter rxUnique; + Counter txUnique; + VirtualRegs virtualRegs; + VirtualList rxList; + VirtualList rxBusy; + int rxActive; + VirtualList txList; + + int rxBusyCount; + int rxMappedCount; + int rxDirtyCount; + + uint8_t ®Data8(Addr daddr) { return *((uint8_t *)®s + daddr); } + uint32_t ®Data32(Addr daddr) { return *(uint32_t *)®Data8(daddr); } + uint64_t ®Data64(Addr daddr) { return *(uint64_t *)®Data8(daddr); } + + protected: + RxState rxState; + PacketFifo rxFifo; + PacketFifo::iterator rxFifoPtr; + bool rxEmpty; + bool rxLow; + Addr rxDmaAddr; + uint8_t *rxDmaData; + unsigned rxDmaLen; + + TxState txState; + PacketFifo txFifo; + bool txFull; + EthPacketPtr txPacket; + int txPacketOffset; + int txPacketBytes; + Addr txDmaAddr; + uint8_t *txDmaData; + int txDmaLen; + + protected: + void reset(); + + void rxKick(); + Tick rxKickTick; + typedef EventWrapper RxKickEvent; + friend void RxKickEvent::process(); + + void txKick(); + Tick txKickTick; + typedef EventWrapper TxKickEvent; + friend void TxKickEvent::process(); + + /** + * Retransmit event + */ + void transmit(); + void txEventTransmit() + { + transmit(); + if (txState == txFifoBlock) + txKick(); + } + typedef EventWrapper TxEvent; + friend void TxEvent::process(); + TxEvent txEvent; + + void txDump() const; + void rxDump() const; + + /** + * receive address filter + */ + bool rxFilter(const EthPacketPtr &packet); + +/** + * device configuration + */ + void changeConfig(uint32_t newconfig); + void command(uint32_t command); + +/** + * device ethernet interface + */ + public: + bool recvPacket(EthPacketPtr packet); + void transferDone(); + EtherInt *getEthPort(const std::string &if_name, int idx) override; + +/** + * DMA parameters + */ + protected: + void rxDmaDone(); + friend class EventWrapper; + EventWrapper rxDmaEvent; + + void txDmaDone(); + friend class EventWrapper; + EventWrapper txDmaEvent; + + Tick dmaReadDelay; + Tick dmaReadFactor; + Tick dmaWriteDelay; + Tick dmaWriteFactor; + +/** + * Interrupt management + */ + protected: + void devIntrPost(uint32_t interrupts); + void devIntrClear(uint32_t interrupts = Regs::Intr_All); + void devIntrChangeMask(uint32_t newmask); + +/** + * Memory Interface + */ + public: + Tick read(PacketPtr pkt) override; + Tick write(PacketPtr pkt) override; + virtual void drainResume() override; + + void prepareIO(ContextID cpu, int index); + void prepareRead(ContextID cpu, int index); + void prepareWrite(ContextID cpu, int index); + // Fault iprRead(Addr daddr, ContextID cpu, uint64_t &result); + +/** + * Statistics + */ + private: + Stats::Scalar totalVnicDistance; + Stats::Scalar numVnicDistance; + Stats::Scalar maxVnicDistance; + Stats::Formula avgVnicDistance; + + int _maxVnicDistance; + + public: + void regStats() override; + void resetStats() override; + +/** + * Serialization stuff + */ + public: + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + public: + Device(const Params *p); + ~Device(); +}; + +/* + * Ethernet Interface for an Ethernet Device + */ +class Interface : public EtherInt +{ + private: + Device *dev; + + public: + Interface(const std::string &name, Device *d) + : EtherInt(name), dev(d) + { } + + virtual bool recvPacket(EthPacketPtr pkt) { return dev->recvPacket(pkt); } + virtual void sendDone() { dev->transferDone(); } +}; + +} // namespace Sinic + +#endif // __DEV_NET_SINIC_HH__ diff --git a/src/dev/net/sinicreg.hh b/src/dev/net/sinicreg.hh new file mode 100644 index 000000000..fc74889c1 --- /dev/null +++ b/src/dev/net/sinicreg.hh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +#ifndef __DEV_NET_SINICREG_HH__ +#define __DEV_NET_SINICREG_HH__ + +#define __SINIC_REG32(NAME, VAL) static const uint32_t NAME = (VAL); +#define __SINIC_REG64(NAME, VAL) static const uint64_t NAME = (VAL); + +#define __SINIC_VAL32(NAME, OFFSET, WIDTH) \ + static const uint32_t NAME##_width = WIDTH; \ + static const uint32_t NAME##_offset = OFFSET; \ + static const uint32_t NAME##_mask = (1 << WIDTH) - 1; \ + static const uint32_t NAME = ((1 << WIDTH) - 1) << OFFSET; \ + static inline uint32_t get_##NAME(uint32_t reg) \ + { return (reg & NAME) >> OFFSET; } \ + static inline uint32_t set_##NAME(uint32_t reg, uint32_t val) \ + { return (reg & ~NAME) | ((val << OFFSET) & NAME); } + +#define __SINIC_VAL64(NAME, OFFSET, WIDTH) \ + static const uint64_t NAME##_width = WIDTH; \ + static const uint64_t NAME##_offset = OFFSET; \ + static const uint64_t NAME##_mask = (ULL(1) << WIDTH) - 1; \ + static const uint64_t NAME = ((ULL(1) << WIDTH) - 1) << OFFSET; \ + static inline uint64_t get_##NAME(uint64_t reg) \ + { return (reg & NAME) >> OFFSET; } \ + static inline uint64_t set_##NAME(uint64_t reg, uint64_t val) \ + { return (reg & ~NAME) | ((val << OFFSET) & NAME); } + +namespace Sinic { +namespace Regs { + +static const int VirtualShift = 8; +static const int VirtualMask = 0xff; + +// Registers +__SINIC_REG32(Config, 0x00) // 32: configuration register +__SINIC_REG32(Command, 0x04) // 32: command register +__SINIC_REG32(IntrStatus, 0x08) // 32: interrupt status +__SINIC_REG32(IntrMask, 0x0c) // 32: interrupt mask +__SINIC_REG32(RxMaxCopy, 0x10) // 32: max bytes per rx copy +__SINIC_REG32(TxMaxCopy, 0x14) // 32: max bytes per tx copy +__SINIC_REG32(ZeroCopySize, 0x18) // 32: bytes to copy if below threshold +__SINIC_REG32(ZeroCopyMark, 0x1c) // 32: only zero-copy above this threshold +__SINIC_REG32(VirtualCount, 0x20) // 32: number of virutal NICs +__SINIC_REG32(RxMaxIntr, 0x24) // 32: max receives per interrupt +__SINIC_REG32(RxFifoSize, 0x28) // 32: rx fifo capacity in bytes +__SINIC_REG32(TxFifoSize, 0x2c) // 32: tx fifo capacity in bytes +__SINIC_REG32(RxFifoLow, 0x30) // 32: rx fifo low watermark +__SINIC_REG32(TxFifoLow, 0x34) // 32: tx fifo low watermark +__SINIC_REG32(RxFifoHigh, 0x38) // 32: rx fifo high watermark +__SINIC_REG32(TxFifoHigh, 0x3c) // 32: tx fifo high watermark +__SINIC_REG32(RxData, 0x40) // 64: receive data +__SINIC_REG32(RxDone, 0x48) // 64: receive done +__SINIC_REG32(RxWait, 0x50) // 64: receive done (busy wait) +__SINIC_REG32(TxData, 0x58) // 64: transmit data +__SINIC_REG32(TxDone, 0x60) // 64: transmit done +__SINIC_REG32(TxWait, 0x68) // 64: transmit done (busy wait) +__SINIC_REG32(HwAddr, 0x70) // 64: mac address +__SINIC_REG32(RxStatus, 0x78) +__SINIC_REG32(Size, 0x80) // register addres space size + +// Config register bits +__SINIC_VAL32(Config_ZeroCopy, 12, 1) // enable zero copy +__SINIC_VAL32(Config_DelayCopy,11, 1) // enable delayed copy +__SINIC_VAL32(Config_RSS, 10, 1) // enable receive side scaling +__SINIC_VAL32(Config_RxThread, 9, 1) // enable receive threads +__SINIC_VAL32(Config_TxThread, 8, 1) // enable transmit thread +__SINIC_VAL32(Config_Filter, 7, 1) // enable receive filter +__SINIC_VAL32(Config_Vlan, 6, 1) // enable vlan tagging +__SINIC_VAL32(Config_Vaddr, 5, 1) // enable virtual addressing +__SINIC_VAL32(Config_Desc, 4, 1) // enable tx/rx descriptors +__SINIC_VAL32(Config_Poll, 3, 1) // enable polling +__SINIC_VAL32(Config_IntEn, 2, 1) // enable interrupts +__SINIC_VAL32(Config_TxEn, 1, 1) // enable transmit +__SINIC_VAL32(Config_RxEn, 0, 1) // enable receive + +// Command register bits +__SINIC_VAL32(Command_Intr, 1, 1) // software interrupt +__SINIC_VAL32(Command_Reset, 0, 1) // reset chip + +// Interrupt register bits +__SINIC_VAL32(Intr_Soft, 8, 1) // software interrupt +__SINIC_VAL32(Intr_TxLow, 7, 1) // tx fifo dropped below watermark +__SINIC_VAL32(Intr_TxFull, 6, 1) // tx fifo full +__SINIC_VAL32(Intr_TxDMA, 5, 1) // tx dma completed w/ interrupt +__SINIC_VAL32(Intr_TxPacket, 4, 1) // packet transmitted +__SINIC_VAL32(Intr_RxHigh, 3, 1) // rx fifo above high watermark +__SINIC_VAL32(Intr_RxEmpty, 2, 1) // rx fifo empty +__SINIC_VAL32(Intr_RxDMA, 1, 1) // rx dma completed w/ interrupt +__SINIC_VAL32(Intr_RxPacket, 0, 1) // packet received +__SINIC_REG32(Intr_All, 0x01ff) // all valid interrupts +__SINIC_REG32(Intr_NoDelay, 0x01cc) // interrupts that aren't coalesced +__SINIC_REG32(Intr_Res, ~0x01ff) // reserved interrupt bits + +// RX Data Description +__SINIC_VAL64(RxData_NoDelay, 61, 1) // Don't Delay this copy +__SINIC_VAL64(RxData_Vaddr, 60, 1) // Addr is virtual +__SINIC_VAL64(RxData_Len, 40, 20) // 0 - 256k +__SINIC_VAL64(RxData_Addr, 0, 40) // Address 1TB + +// TX Data Description +__SINIC_VAL64(TxData_More, 63, 1) // Packet not complete (will dma more) +__SINIC_VAL64(TxData_Checksum, 62, 1) // do checksum +__SINIC_VAL64(TxData_Vaddr, 60, 1) // Addr is virtual +__SINIC_VAL64(TxData_Len, 40, 20) // 0 - 256k +__SINIC_VAL64(TxData_Addr, 0, 40) // Address 1TB + +// RX Done/Busy Information +__SINIC_VAL64(RxDone_Packets, 32, 16) // number of packets in rx fifo +__SINIC_VAL64(RxDone_Busy, 31, 1) // receive dma busy copying +__SINIC_VAL64(RxDone_Complete, 30, 1) // valid data (packet complete) +__SINIC_VAL64(RxDone_More, 29, 1) // Packet has more data (dma again) +__SINIC_VAL64(RxDone_Empty, 28, 1) // rx fifo is empty +__SINIC_VAL64(RxDone_High, 27, 1) // rx fifo is above the watermark +__SINIC_VAL64(RxDone_NotHigh, 26, 1) // rxfifo never hit the high watermark +__SINIC_VAL64(RxDone_TcpError, 25, 1) // TCP packet error (bad checksum) +__SINIC_VAL64(RxDone_UdpError, 24, 1) // UDP packet error (bad checksum) +__SINIC_VAL64(RxDone_IpError, 23, 1) // IP packet error (bad checksum) +__SINIC_VAL64(RxDone_TcpPacket, 22, 1) // this is a TCP packet +__SINIC_VAL64(RxDone_UdpPacket, 21, 1) // this is a UDP packet +__SINIC_VAL64(RxDone_IpPacket, 20, 1) // this is an IP packet +__SINIC_VAL64(RxDone_CopyLen, 0, 20) // up to 256k + +// TX Done/Busy Information +__SINIC_VAL64(TxDone_Packets, 32, 16) // number of packets in tx fifo +__SINIC_VAL64(TxDone_Busy, 31, 1) // transmit dma busy copying +__SINIC_VAL64(TxDone_Complete, 30, 1) // valid data (packet complete) +__SINIC_VAL64(TxDone_Full, 29, 1) // tx fifo is full +__SINIC_VAL64(TxDone_Low, 28, 1) // tx fifo is below the watermark +__SINIC_VAL64(TxDone_Res0, 27, 1) // reserved +__SINIC_VAL64(TxDone_Res1, 26, 1) // reserved +__SINIC_VAL64(TxDone_Res2, 25, 1) // reserved +__SINIC_VAL64(TxDone_Res3, 24, 1) // reserved +__SINIC_VAL64(TxDone_Res4, 23, 1) // reserved +__SINIC_VAL64(TxDone_Res5, 22, 1) // reserved +__SINIC_VAL64(TxDone_Res6, 21, 1) // reserved +__SINIC_VAL64(TxDone_Res7, 20, 1) // reserved +__SINIC_VAL64(TxDone_CopyLen, 0, 20) // up to 256k + +__SINIC_VAL64(RxStatus_Dirty, 48, 16) +__SINIC_VAL64(RxStatus_Mapped, 32, 16) +__SINIC_VAL64(RxStatus_Busy, 16, 16) +__SINIC_VAL64(RxStatus_Head, 0, 16) + +struct Info +{ + uint8_t size; + bool read; + bool write; + const char *name; +}; + +} // namespace Regs + +inline const Regs::Info& +regInfo(Addr daddr) +{ + static Regs::Info invalid = { 0, false, false, "invalid" }; + static Regs::Info info [] = { + { 4, true, true, "Config" }, + { 4, false, true, "Command" }, + { 4, true, true, "IntrStatus" }, + { 4, true, true, "IntrMask" }, + { 4, true, false, "RxMaxCopy" }, + { 4, true, false, "TxMaxCopy" }, + { 4, true, false, "ZeroCopySize" }, + { 4, true, false, "ZeroCopyMark" }, + { 4, true, false, "VirtualCount" }, + { 4, true, false, "RxMaxIntr" }, + { 4, true, false, "RxFifoSize" }, + { 4, true, false, "TxFifoSize" }, + { 4, true, false, "RxFifoLow" }, + { 4, true, false, "TxFifoLow" }, + { 4, true, false, "RxFifoHigh" }, + { 4, true, false, "TxFifoHigh" }, + { 8, true, true, "RxData" }, + invalid, + { 8, true, false, "RxDone" }, + invalid, + { 8, true, false, "RxWait" }, + invalid, + { 8, true, true, "TxData" }, + invalid, + { 8, true, false, "TxDone" }, + invalid, + { 8, true, false, "TxWait" }, + invalid, + { 8, true, false, "HwAddr" }, + invalid, + { 8, true, false, "RxStatus" }, + invalid, + }; + + return info[daddr / 4]; +} + +inline bool +regValid(Addr daddr) +{ + if (daddr > Regs::Size) + return false; + + if (regInfo(daddr).size == 0) + return false; + + return true; +} + +} // namespace Sinic + +#endif // __DEV_NET_SINICREG_HH__ diff --git a/src/dev/net/tcp_iface.cc b/src/dev/net/tcp_iface.cc new file mode 100644 index 000000000..035ec8fd0 --- /dev/null +++ b/src/dev/net/tcp_iface.cc @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * TCP stream socket based interface class implementation for multi gem5 runs. + */ + +#include "dev/net/tcp_iface.hh" + +#include +#include +#include +#include +#include + +#include +#include + +#include "base/types.hh" +#include "debug/MultiEthernet.hh" + +// MSG_NOSIGNAL does not exists on OS X +#if defined(__APPLE__) || defined(__MACH__) +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL SO_NOSIGPIPE +#endif +#endif + +using namespace std; + +vector TCPIface::sockRegistry; + +TCPIface::TCPIface(string server_name, unsigned server_port, + unsigned multi_rank, Tick sync_start, Tick sync_repeat, + EventManager *em) : + MultiIface(multi_rank, sync_start, sync_repeat, em) +{ + struct addrinfo addr_hint, *addr_results; + int ret; + + string port_str = to_string(server_port); + + sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + panic_if(sock < 0, "socket() failed: %s", strerror(errno)); + + bzero(&addr_hint, sizeof(addr_hint)); + addr_hint.ai_family = AF_INET; + addr_hint.ai_socktype = SOCK_STREAM; + addr_hint.ai_protocol = IPPROTO_TCP; + + ret = getaddrinfo(server_name.c_str(), port_str.c_str(), + &addr_hint, &addr_results); + panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno)); + + DPRINTF(MultiEthernet, "Connecting to %s:%u\n", + server_name.c_str(), port_str.c_str()); + + ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr), + addr_results->ai_addrlen); + panic_if(ret < 0, "connect() failed: %s", strerror(errno)); + + freeaddrinfo(addr_results); + // add our socket to the static registry + sockRegistry.push_back(sock); + // let the server know who we are + sendTCP(sock, &multi_rank, sizeof(multi_rank)); +} + +TCPIface::~TCPIface() +{ + int M5_VAR_USED ret; + + ret = close(sock); + assert(ret == 0); +} + +void +TCPIface::sendTCP(int sock, void *buf, unsigned length) +{ + ssize_t ret; + + ret = ::send(sock, buf, length, MSG_NOSIGNAL); + panic_if(ret < 0, "send() failed: %s", strerror(errno)); + panic_if(ret != length, "send() failed"); +} + +bool +TCPIface::recvTCP(int sock, void *buf, unsigned length) +{ + ssize_t ret; + + ret = ::recv(sock, buf, length, MSG_WAITALL ); + if (ret < 0) { + if (errno == ECONNRESET || errno == EPIPE) + inform("recv(): %s", strerror(errno)); + else if (ret < 0) + panic("recv() failed: %s", strerror(errno)); + } else if (ret == 0) { + inform("recv(): Connection closed"); + } else if (ret != length) + panic("recv() failed"); + + return (ret == length); +} + +void +TCPIface::syncRaw(MultiHeaderPkt::MsgType sync_req, Tick sync_tick) +{ + /* + * Barrier is simply implemented by point-to-point messages to the server + * for now. This method is called by only one TCPIface object. + * The server will send back an 'ack' message when it gets the + * sync request from all clients. + */ + MultiHeaderPkt::Header header_pkt; + header_pkt.msgType = sync_req; + header_pkt.sendTick = sync_tick; + + for (auto s : sockRegistry) + sendTCP(s, (void *)&header_pkt, sizeof(header_pkt)); +} + diff --git a/src/dev/net/tcp_iface.hh b/src/dev/net/tcp_iface.hh new file mode 100644 index 000000000..2eb2c1c07 --- /dev/null +++ b/src/dev/net/tcp_iface.hh @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabor Dozsa + */ + +/* @file + * TCP stream socket based interface class for multi gem5 runs. + * + * For a high level description about multi gem5 see comments in + * header file multi_iface.hh. + * + * The TCP subclass of MultiIface uses a separate server process + * (see tcp_server.[hh,cc] under directory gem5/util/multi). Each gem5 + * process connects to the server via a stream socket. The server process + * transfers messages and co-ordinates the synchronisation among the gem5 + * peers. + */ +#ifndef __DEV_NET_TCP_IFACE_HH__ +#define __DEV_NET_TCP_IFACE_HH__ + + +#include + +#include "dev/net/multi_iface.hh" + +class EventManager; + +class TCPIface : public MultiIface +{ + private: + /** + * The stream socket to connect to the server. + */ + int sock; + + /** + * Registry for all sockets to the server opened by this gem5 process. + */ + static std::vector sockRegistry; + + private: + + /** + * Send out a message through a TCP stream socket. + * + * @param sock TCP stream socket. + * @param buf Start address of the message. + * @param length Size of the message in bytes. + */ + void + sendTCP(int sock, void *buf, unsigned length); + + /** + * Receive the next incoming message through a TCP stream socket. + * + * @param sock TCP stream socket. + * @param buf Start address of buffer to store the message. + * @param length Exact size of the expected message in bytes. + */ + bool recvTCP(int sock, void *buf, unsigned length); + + + protected: + + virtual void + sendRaw(void *buf, unsigned length, + const MultiHeaderPkt::AddressType dest_addr=nullptr) override + { + sendTCP(sock, buf, length); + } + + virtual bool recvRaw(void *buf, unsigned length) override + { + return recvTCP(sock, buf, length); + } + + virtual void syncRaw(MultiHeaderPkt::MsgType sync_req, + Tick sync_tick) override; + + public: + /** + * The ctor creates and connects the stream socket to the server. + * @param server_name The name (or IP address) of the host running the + * server process. + * @param server_port The port number the server listening for new + * connections. + * @param sync_start The tick for the first multi synchronisation. + * @param sync_repeat The frequency of multi synchronisation. + * @param em The EventManager object associated with the simulated + * Ethernet link. + */ + TCPIface(std::string server_name, unsigned server_port, + unsigned multi_rank, Tick sync_start, Tick sync_repeat, + EventManager *em); + + ~TCPIface() override; +}; + +#endif // __DEV_NET_TCP_IFACE_HH__ -- cgit v1.2.3