diff options
Diffstat (limited to 'src/cpu/simple')
-rw-r--r-- | src/cpu/simple/atomic.cc | 88 | ||||
-rw-r--r-- | src/cpu/simple/atomic.hh | 5 | ||||
-rw-r--r-- | src/cpu/simple/timing.cc | 59 | ||||
-rw-r--r-- | src/cpu/simple/timing.hh | 10 |
4 files changed, 110 insertions, 52 deletions
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index d96adffd5..8ee91758f 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -294,9 +294,9 @@ AtomicSimpleCPU::suspendContext(int thread_num) } -template <class T> Fault -AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) +AtomicSimpleCPU::readBytes(Addr addr, uint8_t * data, + unsigned size, unsigned flags) { // use the CPU's statically allocated read request and packet objects Request *req = &data_read_req; @@ -308,21 +308,19 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) //The block size of our peer. unsigned blockSize = dcachePort.peerBlockSize(); //The size of the data we're trying to read. - int dataSize = sizeof(T); - - uint8_t * dataPtr = (uint8_t *)&data; + int fullSize = size; //The address of the second part of this access if it needs to be split //across a cache line boundary. - Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + Addr secondAddr = roundDown(addr + size - 1, blockSize); - if(secondAddr > addr) - dataSize = secondAddr - addr; + if (secondAddr > addr) + size = secondAddr - addr; dcache_latency = 0; - while(1) { - req->setVirt(0, addr, dataSize, flags, thread->readPC()); + while (1) { + req->setVirt(0, addr, size, flags, thread->readPC()); // translate to physical address Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); @@ -332,7 +330,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) Packet pkt = Packet(req, req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, Packet::Broadcast); - pkt.dataStatic(dataPtr); + pkt.dataStatic(data); if (req->isMmapedIpr()) dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); @@ -363,10 +361,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) //If we don't need to access a second cache line, stop now. if (secondAddr <= addr) { - data = gtoh(data); - if (traceData) { - traceData->setData(data); - } if (req->isLocked() && fault == NoFault) { assert(!locked); locked = true; @@ -379,14 +373,30 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) */ //Move the pointer we're reading into to the correct location. - dataPtr += dataSize; + data += size; //Adjust the size to get the remaining bytes. - dataSize = addr + sizeof(T) - secondAddr; + size = addr + fullSize - secondAddr; //And access the right address. addr = secondAddr; } } + +template <class T> +Fault +AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + uint8_t *dataPtr = (uint8_t *)&data; + memset(dataPtr, 0, sizeof(data)); + Fault fault = readBytes(addr, dataPtr, sizeof(data), flags); + if (fault == NoFault) { + data = gtoh(data); + if (traceData) + traceData->setData(data); + } + return fault; +} + #ifndef DOXYGEN_SHOULD_SKIP_THIS template @@ -438,36 +448,33 @@ AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) } -template <class T> Fault -AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +AtomicSimpleCPU::writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) { // use the CPU's statically allocated write request and packet objects Request *req = &data_write_req; if (traceData) { traceData->setAddr(addr); - traceData->setData(data); } //The block size of our peer. unsigned blockSize = dcachePort.peerBlockSize(); //The size of the data we're trying to read. - int dataSize = sizeof(T); - - uint8_t * dataPtr = (uint8_t *)&data; + int fullSize = size; //The address of the second part of this access if it needs to be split //across a cache line boundary. - Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + Addr secondAddr = roundDown(addr + size - 1, blockSize); if(secondAddr > addr) - dataSize = secondAddr - addr; + size = secondAddr - addr; dcache_latency = 0; while(1) { - req->setVirt(0, addr, dataSize, flags, thread->readPC()); + req->setVirt(0, addr, size, flags, thread->readPC()); // translate to physical address Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); @@ -490,16 +497,12 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { Packet pkt = Packet(req, cmd, Packet::Broadcast); - pkt.dataStatic(dataPtr); + pkt.dataStatic(data); if (req->isMmapedIpr()) { dcache_latency += TheISA::handleIprWrite(thread->getTC(), &pkt); } else { - //XXX This needs to be outside of the loop in order to - //work properly for cache line boundary crossing - //accesses in transendian simulations. - data = htog(data); if (hasPhysMemPort && pkt.getAddr() == physMemAddr) dcache_latency += physmemPort.sendAtomic(&pkt); else @@ -510,7 +513,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (req->isSwap()) { assert(res); - *res = pkt.get<T>(); + memcpy(res, pkt.getPtr<uint8_t>(), fullSize); } } @@ -539,15 +542,32 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) */ //Move the pointer we're reading into to the correct location. - dataPtr += dataSize; + data += size; //Adjust the size to get the remaining bytes. - dataSize = addr + sizeof(T) - secondAddr; + size = addr + fullSize - secondAddr; //And access the right address. addr = secondAddr; } } +template <class T> +Fault +AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + uint8_t *dataPtr = (uint8_t *)&data; + if (traceData) + traceData->setData(data); + data = htog(data); + + Fault fault = writeBytes(dataPtr, sizeof(data), addr, flags, res); + if (fault == NoFault && data_write_req.isSwap()) { + *res = gtoh((T)*res); + } + return fault; +} + + #ifndef DOXYGEN_SHOULD_SKIP_THIS template diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 2a66e9341..5ec1970e7 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -134,9 +134,14 @@ class AtomicSimpleCPU : public BaseSimpleCPU template <class T> Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + template <class T> Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + /** * Print state of address in memory system via PrintReq (for * debugging). diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 8a53aac3a..b04288ca6 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -414,26 +414,25 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2, pkt2->senderState = new SplitFragmentSenderState(pkt, 1); } -template <class T> Fault -TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) +TimingSimpleCPU::readBytes(Addr addr, uint8_t *data, + unsigned size, unsigned flags) { Fault fault; const int asid = 0; const ThreadID tid = 0; const Addr pc = thread->readPC(); unsigned block_size = dcachePort.peerBlockSize(); - int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Read; if (traceData) { traceData->setAddr(addr); } - RequestPtr req = new Request(asid, addr, data_size, + RequestPtr req = new Request(asid, addr, size, flags, pc, _cpuId, tid); - Addr split_addr = roundDown(addr + data_size - 1, block_size); + Addr split_addr = roundDown(addr + size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); _status = DTBWaitResponse; @@ -443,7 +442,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) req->splitOnVaddr(split_addr, req1, req2); WholeTranslationState *state = - new WholeTranslationState(req, req1, req2, (uint8_t *)(new T), + new WholeTranslationState(req, req1, req2, new uint8_t[size], NULL, mode); DataTranslation<TimingSimpleCPU> *trans1 = new DataTranslation<TimingSimpleCPU>(this, state, 0); @@ -454,7 +453,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) thread->dtb->translateTiming(req2, tc, trans2, mode); } else { WholeTranslationState *state = - new WholeTranslationState(req, (uint8_t *)(new T), NULL, mode); + new WholeTranslationState(req, new uint8_t[size], NULL, mode); DataTranslation<TimingSimpleCPU> *translation = new DataTranslation<TimingSimpleCPU>(this, state); thread->dtb->translateTiming(req, tc, translation, mode); @@ -463,6 +462,13 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) return NoFault; } +template <class T> +Fault +TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) +{ + return readBytes(addr, (uint8_t *)&data, sizeof(T), flags); +} + #ifndef DOXYGEN_SHOULD_SKIP_THIS template @@ -532,30 +538,26 @@ TimingSimpleCPU::handleWritePacket() return dcache_pkt == NULL; } -template <class T> Fault -TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +TimingSimpleCPU::writeTheseBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) { const int asid = 0; const ThreadID tid = 0; const Addr pc = thread->readPC(); unsigned block_size = dcachePort.peerBlockSize(); - int data_size = sizeof(T); BaseTLB::Mode mode = BaseTLB::Write; if (traceData) { traceData->setAddr(addr); - traceData->setData(data); } - RequestPtr req = new Request(asid, addr, data_size, + RequestPtr req = new Request(asid, addr, size, flags, pc, _cpuId, tid); - Addr split_addr = roundDown(addr + data_size - 1, block_size); + Addr split_addr = roundDown(addr + size - 1, block_size); assert(split_addr <= addr || split_addr - addr < block_size); - T *dataP = new T; - *dataP = TheISA::htog(data); _status = DTBWaitResponse; if (split_addr > addr) { RequestPtr req1, req2; @@ -563,8 +565,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) req->splitOnVaddr(split_addr, req1, req2); WholeTranslationState *state = - new WholeTranslationState(req, req1, req2, (uint8_t *)dataP, - res, mode); + new WholeTranslationState(req, req1, req2, data, res, mode); DataTranslation<TimingSimpleCPU> *trans1 = new DataTranslation<TimingSimpleCPU>(this, state, 0); DataTranslation<TimingSimpleCPU> *trans2 = @@ -574,7 +575,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) thread->dtb->translateTiming(req2, tc, trans2, mode); } else { WholeTranslationState *state = - new WholeTranslationState(req, (uint8_t *)dataP, res, mode); + new WholeTranslationState(req, data, res, mode); DataTranslation<TimingSimpleCPU> *translation = new DataTranslation<TimingSimpleCPU>(this, state); thread->dtb->translateTiming(req, tc, translation, mode); @@ -584,6 +585,28 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) return NoFault; } +Fault +TimingSimpleCPU::writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res) +{ + uint8_t *newData = new uint8_t[size]; + memcpy(newData, data, size); + return writeTheseBytes(newData, size, addr, flags, res); +} + +template <class T> +Fault +TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(data); + } + T *dataP = new T; + *dataP = TheISA::htog(data); + + return writeTheseBytes((uint8_t *)dataP, sizeof(T), addr, flags, res); +} + #ifndef DOXYGEN_SHOULD_SKIP_THIS template diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 62c105418..65cbe3098 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -251,9 +251,14 @@ class TimingSimpleCPU : public BaseSimpleCPU template <class T> Fault read(Addr addr, T &data, unsigned flags); + Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags); + template <class T> Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + Fault writeBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + void fetch(); void sendFetch(Fault fault, RequestPtr req, ThreadContext *tc); void completeIfetch(PacketPtr ); @@ -274,6 +279,11 @@ class TimingSimpleCPU : public BaseSimpleCPU private: + // The backend for writeBytes and write. It's the same as writeBytes, but + // doesn't make a copy of data. + Fault writeTheseBytes(uint8_t *data, unsigned size, + Addr addr, unsigned flags, uint64_t *res); + typedef EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch> FetchEvent; FetchEvent fetchEvent; |