summaryrefslogtreecommitdiff
path: root/src/cpu/simple
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/simple')
-rw-r--r--src/cpu/simple/atomic.cc88
-rw-r--r--src/cpu/simple/atomic.hh5
-rw-r--r--src/cpu/simple/timing.cc59
-rw-r--r--src/cpu/simple/timing.hh10
4 files changed, 110 insertions, 52 deletions
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index d96adffd5..8ee91758f 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -294,9 +294,9 @@ AtomicSimpleCPU::suspendContext(int thread_num)
}
-template <class T>
Fault
-AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
+AtomicSimpleCPU::readBytes(Addr addr, uint8_t * data,
+ unsigned size, unsigned flags)
{
// use the CPU's statically allocated read request and packet objects
Request *req = &data_read_req;
@@ -308,21 +308,19 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
//The block size of our peer.
unsigned blockSize = dcachePort.peerBlockSize();
//The size of the data we're trying to read.
- int dataSize = sizeof(T);
-
- uint8_t * dataPtr = (uint8_t *)&data;
+ int fullSize = size;
//The address of the second part of this access if it needs to be split
//across a cache line boundary.
- Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+ Addr secondAddr = roundDown(addr + size - 1, blockSize);
- if(secondAddr > addr)
- dataSize = secondAddr - addr;
+ if (secondAddr > addr)
+ size = secondAddr - addr;
dcache_latency = 0;
- while(1) {
- req->setVirt(0, addr, dataSize, flags, thread->readPC());
+ while (1) {
+ req->setVirt(0, addr, size, flags, thread->readPC());
// translate to physical address
Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
@@ -332,7 +330,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
Packet pkt = Packet(req,
req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
Packet::Broadcast);
- pkt.dataStatic(dataPtr);
+ pkt.dataStatic(data);
if (req->isMmapedIpr())
dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
@@ -363,10 +361,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
//If we don't need to access a second cache line, stop now.
if (secondAddr <= addr)
{
- data = gtoh(data);
- if (traceData) {
- traceData->setData(data);
- }
if (req->isLocked() && fault == NoFault) {
assert(!locked);
locked = true;
@@ -379,14 +373,30 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
*/
//Move the pointer we're reading into to the correct location.
- dataPtr += dataSize;
+ data += size;
//Adjust the size to get the remaining bytes.
- dataSize = addr + sizeof(T) - secondAddr;
+ size = addr + fullSize - secondAddr;
//And access the right address.
addr = secondAddr;
}
}
+
+template <class T>
+Fault
+AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
+{
+ uint8_t *dataPtr = (uint8_t *)&data;
+ memset(dataPtr, 0, sizeof(data));
+ Fault fault = readBytes(addr, dataPtr, sizeof(data), flags);
+ if (fault == NoFault) {
+ data = gtoh(data);
+ if (traceData)
+ traceData->setData(data);
+ }
+ return fault;
+}
+
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template
@@ -438,36 +448,33 @@ AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
}
-template <class T>
Fault
-AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+AtomicSimpleCPU::writeBytes(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res)
{
// use the CPU's statically allocated write request and packet objects
Request *req = &data_write_req;
if (traceData) {
traceData->setAddr(addr);
- traceData->setData(data);
}
//The block size of our peer.
unsigned blockSize = dcachePort.peerBlockSize();
//The size of the data we're trying to read.
- int dataSize = sizeof(T);
-
- uint8_t * dataPtr = (uint8_t *)&data;
+ int fullSize = size;
//The address of the second part of this access if it needs to be split
//across a cache line boundary.
- Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+ Addr secondAddr = roundDown(addr + size - 1, blockSize);
if(secondAddr > addr)
- dataSize = secondAddr - addr;
+ size = secondAddr - addr;
dcache_latency = 0;
while(1) {
- req->setVirt(0, addr, dataSize, flags, thread->readPC());
+ req->setVirt(0, addr, size, flags, thread->readPC());
// translate to physical address
Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
@@ -490,16 +497,12 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
Packet pkt = Packet(req, cmd, Packet::Broadcast);
- pkt.dataStatic(dataPtr);
+ pkt.dataStatic(data);
if (req->isMmapedIpr()) {
dcache_latency +=
TheISA::handleIprWrite(thread->getTC(), &pkt);
} else {
- //XXX This needs to be outside of the loop in order to
- //work properly for cache line boundary crossing
- //accesses in transendian simulations.
- data = htog(data);
if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
dcache_latency += physmemPort.sendAtomic(&pkt);
else
@@ -510,7 +513,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
if (req->isSwap()) {
assert(res);
- *res = pkt.get<T>();
+ memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
}
}
@@ -539,15 +542,32 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
*/
//Move the pointer we're reading into to the correct location.
- dataPtr += dataSize;
+ data += size;
//Adjust the size to get the remaining bytes.
- dataSize = addr + sizeof(T) - secondAddr;
+ size = addr + fullSize - secondAddr;
//And access the right address.
addr = secondAddr;
}
}
+template <class T>
+Fault
+AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+ uint8_t *dataPtr = (uint8_t *)&data;
+ if (traceData)
+ traceData->setData(data);
+ data = htog(data);
+
+ Fault fault = writeBytes(dataPtr, sizeof(data), addr, flags, res);
+ if (fault == NoFault && data_write_req.isSwap()) {
+ *res = gtoh((T)*res);
+ }
+ return fault;
+}
+
+
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 2a66e9341..5ec1970e7 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -134,9 +134,14 @@ class AtomicSimpleCPU : public BaseSimpleCPU
template <class T>
Fault read(Addr addr, T &data, unsigned flags);
+ Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags);
+
template <class T>
Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+ Fault writeBytes(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res);
+
/**
* Print state of address in memory system via PrintReq (for
* debugging).
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 8a53aac3a..b04288ca6 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -414,26 +414,25 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
pkt2->senderState = new SplitFragmentSenderState(pkt, 1);
}
-template <class T>
Fault
-TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
+TimingSimpleCPU::readBytes(Addr addr, uint8_t *data,
+ unsigned size, unsigned flags)
{
Fault fault;
const int asid = 0;
const ThreadID tid = 0;
const Addr pc = thread->readPC();
unsigned block_size = dcachePort.peerBlockSize();
- int data_size = sizeof(T);
BaseTLB::Mode mode = BaseTLB::Read;
if (traceData) {
traceData->setAddr(addr);
}
- RequestPtr req = new Request(asid, addr, data_size,
+ RequestPtr req = new Request(asid, addr, size,
flags, pc, _cpuId, tid);
- Addr split_addr = roundDown(addr + data_size - 1, block_size);
+ Addr split_addr = roundDown(addr + size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size);
_status = DTBWaitResponse;
@@ -443,7 +442,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
req->splitOnVaddr(split_addr, req1, req2);
WholeTranslationState *state =
- new WholeTranslationState(req, req1, req2, (uint8_t *)(new T),
+ new WholeTranslationState(req, req1, req2, new uint8_t[size],
NULL, mode);
DataTranslation<TimingSimpleCPU> *trans1 =
new DataTranslation<TimingSimpleCPU>(this, state, 0);
@@ -454,7 +453,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
thread->dtb->translateTiming(req2, tc, trans2, mode);
} else {
WholeTranslationState *state =
- new WholeTranslationState(req, (uint8_t *)(new T), NULL, mode);
+ new WholeTranslationState(req, new uint8_t[size], NULL, mode);
DataTranslation<TimingSimpleCPU> *translation
= new DataTranslation<TimingSimpleCPU>(this, state);
thread->dtb->translateTiming(req, tc, translation, mode);
@@ -463,6 +462,13 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
return NoFault;
}
+template <class T>
+Fault
+TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
+{
+ return readBytes(addr, (uint8_t *)&data, sizeof(T), flags);
+}
+
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template
@@ -532,30 +538,26 @@ TimingSimpleCPU::handleWritePacket()
return dcache_pkt == NULL;
}
-template <class T>
Fault
-TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+TimingSimpleCPU::writeTheseBytes(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res)
{
const int asid = 0;
const ThreadID tid = 0;
const Addr pc = thread->readPC();
unsigned block_size = dcachePort.peerBlockSize();
- int data_size = sizeof(T);
BaseTLB::Mode mode = BaseTLB::Write;
if (traceData) {
traceData->setAddr(addr);
- traceData->setData(data);
}
- RequestPtr req = new Request(asid, addr, data_size,
+ RequestPtr req = new Request(asid, addr, size,
flags, pc, _cpuId, tid);
- Addr split_addr = roundDown(addr + data_size - 1, block_size);
+ Addr split_addr = roundDown(addr + size - 1, block_size);
assert(split_addr <= addr || split_addr - addr < block_size);
- T *dataP = new T;
- *dataP = TheISA::htog(data);
_status = DTBWaitResponse;
if (split_addr > addr) {
RequestPtr req1, req2;
@@ -563,8 +565,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
req->splitOnVaddr(split_addr, req1, req2);
WholeTranslationState *state =
- new WholeTranslationState(req, req1, req2, (uint8_t *)dataP,
- res, mode);
+ new WholeTranslationState(req, req1, req2, data, res, mode);
DataTranslation<TimingSimpleCPU> *trans1 =
new DataTranslation<TimingSimpleCPU>(this, state, 0);
DataTranslation<TimingSimpleCPU> *trans2 =
@@ -574,7 +575,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
thread->dtb->translateTiming(req2, tc, trans2, mode);
} else {
WholeTranslationState *state =
- new WholeTranslationState(req, (uint8_t *)dataP, res, mode);
+ new WholeTranslationState(req, data, res, mode);
DataTranslation<TimingSimpleCPU> *translation =
new DataTranslation<TimingSimpleCPU>(this, state);
thread->dtb->translateTiming(req, tc, translation, mode);
@@ -584,6 +585,28 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
return NoFault;
}
+Fault
+TimingSimpleCPU::writeBytes(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res)
+{
+ uint8_t *newData = new uint8_t[size];
+ memcpy(newData, data, size);
+ return writeTheseBytes(newData, size, addr, flags, res);
+}
+
+template <class T>
+Fault
+TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+ if (traceData) {
+ traceData->setData(data);
+ }
+ T *dataP = new T;
+ *dataP = TheISA::htog(data);
+
+ return writeTheseBytes((uint8_t *)dataP, sizeof(T), addr, flags, res);
+}
+
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 62c105418..65cbe3098 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -251,9 +251,14 @@ class TimingSimpleCPU : public BaseSimpleCPU
template <class T>
Fault read(Addr addr, T &data, unsigned flags);
+ Fault readBytes(Addr addr, uint8_t *data, unsigned size, unsigned flags);
+
template <class T>
Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+ Fault writeBytes(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res);
+
void fetch();
void sendFetch(Fault fault, RequestPtr req, ThreadContext *tc);
void completeIfetch(PacketPtr );
@@ -274,6 +279,11 @@ class TimingSimpleCPU : public BaseSimpleCPU
private:
+ // The backend for writeBytes and write. It's the same as writeBytes, but
+ // doesn't make a copy of data.
+ Fault writeTheseBytes(uint8_t *data, unsigned size,
+ Addr addr, unsigned flags, uint64_t *res);
+
typedef EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch> FetchEvent;
FetchEvent fetchEvent;