From f806a25c9edb3a9a9f5bc34b88340be6b24a2022 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Mon, 21 Nov 2005 23:43:15 -0500 Subject: add support for delaying pio writes until the cache access occurs dev/ns_gige.cc: add support for delaying pio writes until the cache access occurs the only write we delay are for CR_TXE and CR_RXE dev/sinic.cc: dev/sinic.hh: the txPioRequest and rxPioRequest things were more or less bogus add support for delaying pio writes until the cache access occurs dev/sinicreg.hh: Add delay_read and delay_write to the register information struct for now, we won't delay any reads, and we'll delay the writes that initiate DMAs python/m5/objects/Ethernet.py: add a parameter to delay pio writes until the timing access actually occurs. --HG-- extra : convert_revision : 79b18ea2812c2935d7d5ea6eff1f55265114d05d --- dev/ns_gige.cc | 61 +++++++++++++++++++++++++++---- dev/ns_gige.hh | 10 +++++ dev/sinic.cc | 111 +++++++++++++++++++++++++++++++++++--------------------- dev/sinic.hh | 26 ++++++++----- dev/sinicreg.hh | 48 ++++++++++++++---------- 5 files changed, 178 insertions(+), 78 deletions(-) (limited to 'dev') diff --git a/dev/ns_gige.cc b/dev/ns_gige.cc index c8ff04ec5..979bb6b7a 100644 --- a/dev/ns_gige.cc +++ b/dev/ns_gige.cc @@ -130,6 +130,7 @@ NSGigE::NSGigE(Params *p) } else if (p->payload_bus) panic("Must define a header bus if defining a payload bus"); + pioDelayWrite = p->pio_delay_write && pioInterface; intrDelay = p->intr_delay; dmaReadDelay = p->dma_read_delay; @@ -801,6 +802,13 @@ NSGigE::write(MemReqPtr &req, const uint8_t *data) } else if (daddr > 0x3FC) panic("Something is messed up!\n"); + if (pioDelayWrite) { + int cpu = (req->xc->regs.ipr[TheISA::IPR_PALtemp16] >> 8) & 0xff; + if (cpu >= writeQueue.size()) + writeQueue.resize(cpu + 1); + writeQueue[cpu].push_back(RegWriteData(daddr, *(uint32_t *)data)); + } + if (req->size == sizeof(uint32_t)) { uint32_t reg = *(uint32_t *)data; uint16_t rfaddr; @@ -813,20 +821,24 @@ NSGigE::write(MemReqPtr &req, const uint8_t *data) if (reg & CR_TXD) { txEnable = false; } else if (reg & CR_TXE) { - txEnable = true; + if (!pioDelayWrite) { + txEnable = true; - // the kernel is enabling the transmit machine - if (txState == txIdle) - txKick(); + // the kernel is enabling the transmit machine + if (txState == txIdle) + txKick(); + } } if (reg & CR_RXD) { rxEnable = false; } else if (reg & CR_RXE) { - rxEnable = true; + if (!pioDelayWrite) { + rxEnable = true; - if (rxState == rxIdle) - rxKick(); + if (rxState == rxIdle) + rxKick(); + } } if (reg & CR_TXR) @@ -2934,8 +2946,38 @@ NSGigE::unserialize(Checkpoint *cp, const std::string §ion) Tick NSGigE::cacheAccess(MemReqPtr &req) { + Addr daddr = req->paddr & 0xfff; DPRINTF(EthernetPIO, "timing access to paddr=%#x (daddr=%#x)\n", - req->paddr, req->paddr - addr); + req->paddr, daddr); + + if (!pioDelayWrite || !req->cmd.isWrite()) + return curTick + pioLatency; + + int cpu = (req->xc->regs.ipr[TheISA::IPR_PALtemp16] >> 8) & 0xff; + std::list &wq = writeQueue[cpu]; + if (wq.empty()) + panic("WriteQueue for cpu %d empty timing daddr=%#x", cpu, daddr); + + const RegWriteData &data = wq.front(); + if (data.daddr != daddr) + panic("read mismatch on cpu %d, daddr functional=%#x timing=%#x", + cpu, data.daddr, daddr); + + if (daddr == CR) { + if ((data.value & (CR_TXD | CR_TXE)) == CR_TXE) { + txEnable = true; + if (txState == txIdle) + txKick(); + } + + if ((data.value & (CR_RXD | CR_RXE)) == CR_RXE) { + rxEnable = true; + if (rxState == rxIdle) + rxKick(); + } + } + + wq.pop_front(); return curTick + pioLatency; } @@ -2995,6 +3037,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(NSGigE) Param dma_write_factor; Param dma_no_allocate; Param pio_latency; + Param pio_delay_write; Param intr_delay; Param rx_delay; @@ -3034,6 +3077,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(NSGigE) INIT_PARAM(dma_write_factor, "multiplier for dma writes"), INIT_PARAM(dma_no_allocate, "Should DMA reads allocate cache lines"), INIT_PARAM(pio_latency, "Programmed IO latency in bus cycles"), + INIT_PARAM(pio_delay_write, ""), INIT_PARAM(intr_delay, "Interrupt Delay in microseconds"), INIT_PARAM(rx_delay, "Receive Delay"), @@ -3077,6 +3121,7 @@ CREATE_SIM_OBJECT(NSGigE) params->dma_write_factor = dma_write_factor; params->dma_no_allocate = dma_no_allocate; params->pio_latency = pio_latency; + params->pio_delay_write = pio_delay_write; params->intr_delay = intr_delay; params->rx_delay = rx_delay; diff --git a/dev/ns_gige.hh b/dev/ns_gige.hh index 36fd6050a..7db833028 100644 --- a/dev/ns_gige.hh +++ b/dev/ns_gige.hh @@ -239,6 +239,15 @@ class NSGigE : public PciDev uint32_t rxDescCnt; DmaState rxDmaState; + struct RegWriteData { + Addr daddr; + uint32_t value; + RegWriteData(Addr da, uint32_t val) : daddr(da), value(val) {} + }; + + std::vector > writeQueue; + bool pioDelayWrite; + bool extstsEnable; /** EEPROM State Machine */ @@ -376,6 +385,7 @@ class NSGigE : public PciDev Tick tx_delay; Tick rx_delay; Tick pio_latency; + bool pio_delay_write; bool dma_desc_free; bool dma_data_free; Tick dma_read_delay; diff --git a/dev/sinic.cc b/dev/sinic.cc index ef2350d11..e51518b0c 100644 --- a/dev/sinic.cc +++ b/dev/sinic.cc @@ -112,6 +112,8 @@ Device::Device(Params *p) p->dma_no_allocate); } else if (p->payload_bus) panic("must define a header bus if defining a payload bus"); + + pioDelayWrite = p->pio_delay_write && pioInterface; } Device::~Device() @@ -311,7 +313,14 @@ Device::writeConfig(int offset, int size, const uint8_t *data) } void -Device::prepareRead() +Device::prepareIO(int cpu) +{ + if (cpu >= writeQueue.size()) + writeQueue.resize(cpu + 1); +} + +void +Device::prepareRead(int cpu) { using namespace Regs; @@ -328,6 +337,12 @@ Device::prepareRead() regs.TxWait = regs.TxDone; } +void +Device::prepareWrite(int cpu) +{ + prepareIO(cpu); +} + /** * I/O read of device register */ @@ -350,6 +365,8 @@ Device::read(MemReqPtr &req, uint8_t *data) Fault Device::readBar0(MemReqPtr &req, Addr daddr, uint8_t *data) { + int cpu = (req->xc->regs.ipr[TheISA::IPR_PALtemp16] >> 8) & 0xff; + if (!regValid(daddr)) panic("invalid register: da=%#x pa=%#x va=%#x size=%d", daddr, req->paddr, req->vaddr, req->size); @@ -363,7 +380,7 @@ Device::readBar0(MemReqPtr &req, Addr daddr, uint8_t *data) panic("invalid size for reg %s: da=%#x pa=%#x va=%#x size=%d", info.name, daddr, req->paddr, req->vaddr, req->size); - prepareRead(); + prepareRead(cpu); uint64_t value = 0; if (req->size == 4) { @@ -393,7 +410,7 @@ Device::readBar0(MemReqPtr &req, Addr daddr, uint8_t *data) * IPR read of device register */ Fault -Device::iprRead(Addr daddr, uint64_t &result) +Device::iprRead(Addr daddr, int cpu, uint64_t &result) { if (!regValid(daddr)) panic("invalid address: da=%#x", daddr); @@ -404,7 +421,7 @@ Device::iprRead(Addr daddr, uint64_t &result) DPRINTF(EthernetPIO, "read reg=%s da=%#x\n", info.name, daddr); - prepareRead(); + prepareRead(cpu); if (info.size == 4) result = regData32(daddr); @@ -440,6 +457,8 @@ Device::write(MemReqPtr &req, const uint8_t *data) Fault Device::writeBar0(MemReqPtr &req, Addr daddr, const uint8_t *data) { + int cpu = (req->xc->regs.ipr[TheISA::IPR_PALtemp16] >> 8) & 0xff; + if (!regValid(daddr)) panic("invalid address: da=%#x pa=%#x va=%#x size=%d", daddr, req->paddr, req->vaddr, req->size); @@ -459,6 +478,21 @@ Device::writeBar0(MemReqPtr &req, Addr daddr, const uint8_t *data) info.name, info.size == 4 ? reg32 : reg64, daddr, req->paddr, req->vaddr, req->size); + if (pioDelayWrite) + writeQueue[cpu].push_back(RegWriteData(daddr, reg64)); + + if (!pioDelayWrite || !info.delay_write) + regWrite(daddr, cpu, data); + + return No_Fault; +} + +void +Device::regWrite(Addr daddr, int cpu, const uint8_t *data) +{ + uint32_t reg32 = *(uint32_t *)data; + uint64_t reg64 = *(uint64_t *)data; + switch (daddr) { case Regs::Config: changeConfig(reg32); @@ -502,8 +536,6 @@ Device::writeBar0(MemReqPtr &req, Addr daddr, const uint8_t *data) } break; } - - return No_Fault; } void @@ -769,12 +801,6 @@ Device::rxKick() next: switch (rxState) { case rxIdle: - if (rxPioRequest) { - DPRINTF(EthernetPIO, "rxIdle: PIO waiting responding at %d\n", - curTick + pioLatency); - pioInterface->respond(rxPioRequest, curTick); - rxPioRequest = 0; - } goto exit; case rxFifoBlock: @@ -977,12 +1003,6 @@ Device::txKick() next: switch (txState) { case txIdle: - if (txPioRequest) { - DPRINTF(EthernetPIO, "txIdle: PIO waiting responding at %d\n", - curTick + pioLatency); - pioInterface->respond(txPioRequest, curTick + pioLatency); - txPioRequest = 0; - } goto exit; case txFifoBlock: @@ -1371,42 +1391,46 @@ Device::unserialize(Checkpoint *cp, const std::string §ion) /* * re-add addrRanges to bus bridges */ - if (pioInterface) + if (pioInterface) { pioInterface->addAddrRange(RangeSize(BARAddrs[0], BARSize[0])); + pioInterface->addAddrRange(RangeSize(BARAddrs[1], BARSize[1])); + } } Tick Device::cacheAccess(MemReqPtr &req) { - //The mask is to give you only the offset into the device register file - Addr daddr = req->paddr - addr; + Addr daddr; + int bar; + if (!getBAR(req->paddr, daddr, bar)) + panic("address does not map to a BAR pa=%#x va=%#x size=%d", + req->paddr, req->vaddr, req->size); - DPRINTF(EthernetPIO, "timing access to paddr=%#x (daddr=%#x)\n", - req->paddr, daddr); + DPRINTF(EthernetPIO, "timing %s to paddr=%#x bar=%d daddr=%#x\n", + req->cmd.toString(), req->paddr, bar, daddr); - Tick when = curTick + pioLatency; + if (!pioDelayWrite || !req->cmd.isWrite()) + return curTick + pioLatency; - switch (daddr) { - case Regs::RxWait: - if (rxState != rxIdle) { - DPRINTF(EthernetPIO, "rxState=%s (not idle)... waiting\n", - TxStateStrings[txState]); - rxPioRequest = req; - when = 0; - } - break; + if (bar == 0) { + int cpu = (req->xc->regs.ipr[TheISA::IPR_PALtemp16] >> 8) & 0xff; + std::list &wq = writeQueue[cpu]; + if (wq.empty()) + panic("WriteQueue for cpu %d empty timing daddr=%#x", cpu, daddr); - case Regs::TxWait: - if (txState != txIdle) { - DPRINTF(EthernetPIO, "txState=%s (not idle)... waiting\n", - TxStateStrings[txState]); - txPioRequest = req; - when = 0; - } - break; + const RegWriteData &data = wq.front(); + if (data.daddr != daddr) + panic("read mismatch on cpu %d, daddr functional=%#x timing=%#x", + cpu, data.daddr, daddr); + + const Regs::Info &info = regInfo(data.daddr); + if (info.delay_write) + regWrite(daddr, cpu, (uint8_t *)&data.value); + + wq.pop_front(); } - return when; + return curTick + pioLatency; } BEGIN_DECLARE_SIM_OBJECT_PARAMS(Interface) @@ -1463,6 +1487,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Device) Param dma_write_factor; Param dma_no_allocate; Param pio_latency; + Param pio_delay_write; Param intr_delay; Param rx_delay; @@ -1505,6 +1530,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Device) INIT_PARAM(dma_write_factor, "multiplier for dma writes"), INIT_PARAM(dma_no_allocate, "Should we allocat on read in cache"), INIT_PARAM(pio_latency, "Programmed IO latency in bus cycles"), + INIT_PARAM(pio_delay_write, ""), INIT_PARAM(intr_delay, "Interrupt Delay"), INIT_PARAM(rx_delay, "Receive Delay"), @@ -1551,6 +1577,7 @@ CREATE_SIM_OBJECT(Device) params->dma_write_factor = dma_write_factor; params->dma_no_allocate = dma_no_allocate; params->pio_latency = pio_latency; + params->pio_delay_write = pio_delay_write; params->intr_delay = intr_delay; params->tx_delay = tx_delay; diff --git a/dev/sinic.hh b/dev/sinic.hh index b9089cd53..b3255b6c0 100644 --- a/dev/sinic.hh +++ b/dev/sinic.hh @@ -234,13 +234,6 @@ class Device : public Base Tick dmaWriteDelay; Tick dmaWriteFactor; -/** - * PIO parameters - */ - protected: - MemReqPtr rxPioRequest; - MemReqPtr txPioRequest; - /** * Interrupt management */ @@ -262,12 +255,26 @@ class Device : public Base virtual Fault read(MemReqPtr &req, uint8_t *data); virtual Fault write(MemReqPtr &req, const uint8_t *data); - void prepareRead(); - Fault iprRead(Addr daddr, uint64_t &result); + void prepareIO(int cpu); + void prepareRead(int cpu); + void prepareWrite(int cpu); + Fault iprRead(Addr daddr, int cpu, uint64_t &result); Fault readBar0(MemReqPtr &req, Addr daddr, uint8_t *data); Fault writeBar0(MemReqPtr &req, Addr daddr, const uint8_t *data); + void regWrite(Addr daddr, int cpu, const uint8_t *data); Tick cacheAccess(MemReqPtr &req); + protected: + struct RegWriteData { + Addr daddr; + uint64_t value; + RegWriteData(Addr da, uint64_t val) : daddr(da), value(val) {} + }; + + std::vector > writeQueue; + + bool pioDelayWrite; + /** * Statistics */ @@ -323,6 +330,7 @@ class Device : public Base Bus *header_bus; Bus *payload_bus; Tick pio_latency; + bool pio_delay_write; PhysicalMemory *physmem; IntrControl *intctrl; bool rx_filter; diff --git a/dev/sinicreg.hh b/dev/sinicreg.hh index 12f545255..30f5b3c95 100644 --- a/dev/sinicreg.hh +++ b/dev/sinicreg.hh @@ -140,6 +140,8 @@ struct Info uint8_t size; bool read; bool write; + bool delay_read; + bool delay_write; const char *name; }; @@ -148,26 +150,34 @@ struct Info inline const Regs::Info& regInfo(Addr daddr) { + static Regs::Info invalid = { 0, false, false, false, false, "invalid" }; static Regs::Info info [] = { - { 4, true, true, "Config" }, - { 4, false, true, "Command" }, - { 4, true, true, "IntrStatus" }, - { 4, true, true, "IntrMask" }, - { 4, true, false, "RxMaxCopy" }, - { 4, true, false, "TxMaxCopy" }, - { 4, true, false, "RxMaxIntr" }, - { 0, false, false, "invalid" }, - { 4, true, false, "RxFifoSize" }, - { 4, true, false, "TxFifoSize" }, - { 4, true, false, "RxFifoMark" }, - { 4, true, false, "TxFifoMark" }, - { 8, true, true, "RxData" }, { 0, false, false, "invalid" }, - { 8, true, false, "RxDone" }, { 0, false, false, "invalid" }, - { 8, true, false, "RxWait" }, { 0, false, false, "invalid" }, - { 8, true, true, "TxData" }, { 0, false, false, "invalid" }, - { 8, true, false, "TxDone" }, { 0, false, false, "invalid" }, - { 8, true, false, "TxWait" }, { 0, false, false, "invalid" }, - { 8, true, false, "HwAddr" }, { 0, false, false, "invalid" } + { 4, true, true, false, false, "Config" }, + { 4, false, true, false, false, "Command" }, + { 4, true, true, false, false, "IntrStatus" }, + { 4, true, true, false, false, "IntrMask" }, + { 4, true, false, false, false, "RxMaxCopy" }, + { 4, true, false, false, false, "TxMaxCopy" }, + { 4, true, false, false, false, "RxMaxIntr" }, + invalid, + { 4, true, false, false, false, "RxFifoSize" }, + { 4, true, false, false, false, "TxFifoSize" }, + { 4, true, false, false, false, "RxFifoMark" }, + { 4, true, false, false, false, "TxFifoMark" }, + { 8, true, true, false, true, "RxData" }, + invalid, + { 8, true, false, false, false, "RxDone" }, + invalid, + { 8, true, false, false, false, "RxWait" }, + invalid, + { 8, true, true, false, true, "TxData" }, + invalid, + { 8, true, false, false, false, "TxDone" }, + invalid, + { 8, true, false, false, false, "TxWait" }, + invalid, + { 8, true, false, false, false, "HwAddr" }, + invalid, }; return info[daddr / 4]; -- cgit v1.2.3