summaryrefslogtreecommitdiff
path: root/src/cpu/base_dyn_inst.hh
diff options
context:
space:
mode:
authorTimothy M. Jones <tjones1@inf.ed.ac.uk>2010-02-12 19:53:20 +0000
committerTimothy M. Jones <tjones1@inf.ed.ac.uk>2010-02-12 19:53:20 +0000
commit29e8bcead5700f638c4848d9b5710d0ebf18d64b (patch)
treee85dac6557f13146ae2cb119d3ea5b515f3f9e29 /src/cpu/base_dyn_inst.hh
parent7fe9f92cfc73147a1a024c1632c9a7619c1779d1 (diff)
downloadgem5-29e8bcead5700f638c4848d9b5710d0ebf18d64b.tar.xz
O3PCU: Split loads and stores that cross cache line boundaries.
When each load or store is sent to the LSQ, we check whether it will cross a cache line boundary and, if so, split it in two. This creates two TLB translations and two memory requests. Care has to be taken if the first packet of a split load is sent but the second blocks the cache. Similarly, for a store, if the first packet cannot be sent, we must store the second one somewhere to retry later. This modifies the LSQSenderState class to record both packets in a split load or store. Finally, a new const variable, HasUnalignedMemAcc, is added to each ISA to indicate whether unaligned memory accesses are allowed. This is used throughout the changed code so that compiler can optimise away code dealing with split requests for ISAs that don't need them.
Diffstat (limited to 'src/cpu/base_dyn_inst.hh')
-rw-r--r--src/cpu/base_dyn_inst.hh75
1 files changed, 64 insertions, 11 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 7732b71f8..65578379b 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -131,8 +131,13 @@ class BaseDynInst : public FastAlloc, public RefCounted
template <class T>
Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+ /** Splits a request in two if it crosses a dcache block. */
+ void splitRequest(RequestPtr req, RequestPtr &sreqLow,
+ RequestPtr &sreqHigh);
+
/** Initiate a DTB address translation. */
- void initiateTranslation(RequestPtr req, uint64_t *res,
+ void initiateTranslation(RequestPtr req, RequestPtr sreqLow,
+ RequestPtr sreqHigh, uint64_t *res,
BaseTLB::Mode mode);
/** Finish a DTB address translation. */
@@ -870,12 +875,19 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
Request *req = new Request(asid, addr, sizeof(T), flags, this->PC,
thread->contextId(), threadNumber);
- initiateTranslation(req, NULL, BaseTLB::Read);
+ Request *sreqLow = NULL;
+ Request *sreqHigh = NULL;
+
+ // Only split the request if the ISA supports unaligned accesses.
+ if (TheISA::HasUnalignedMemAcc) {
+ splitRequest(req, sreqLow, sreqHigh);
+ }
+ initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
if (fault == NoFault) {
effAddr = req->getVaddr();
effAddrValid = true;
- cpu->read(req, data, lqIdx);
+ cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
} else {
// Return a fixed value to keep simulation deterministic even
@@ -909,12 +921,19 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
Request *req = new Request(asid, addr, sizeof(T), flags, this->PC,
thread->contextId(), threadNumber);
- initiateTranslation(req, res, BaseTLB::Write);
+ Request *sreqLow = NULL;
+ Request *sreqHigh = NULL;
+
+ // Only split the request if the ISA supports unaligned accesses.
+ if (TheISA::HasUnalignedMemAcc) {
+ splitRequest(req, sreqLow, sreqHigh);
+ }
+ initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
if (fault == NoFault) {
effAddr = req->getVaddr();
effAddrValid = true;
- cpu->write(req, data, sqIdx);
+ cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
}
return fault;
@@ -922,14 +941,48 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
template<class Impl>
inline void
-BaseDynInst<Impl>::initiateTranslation(RequestPtr req, uint64_t *res,
+BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow,
+ RequestPtr &sreqHigh)
+{
+ // Check to see if the request crosses the next level block boundary.
+ unsigned block_size = cpu->getDcachePort()->peerBlockSize();
+ Addr addr = req->getVaddr();
+ Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
+ assert(split_addr <= addr || split_addr - addr < block_size);
+
+ // Spans two blocks.
+ if (split_addr > addr) {
+ req->splitOnVaddr(split_addr, sreqLow, sreqHigh);
+ }
+}
+
+template<class Impl>
+inline void
+BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
+ RequestPtr sreqHigh, uint64_t *res,
BaseTLB::Mode mode)
{
- WholeTranslationState *state =
- new WholeTranslationState(req, NULL, res, mode);
- DataTranslation<BaseDynInst<Impl> > *trans =
- new DataTranslation<BaseDynInst<Impl> >(this, state);
- cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+ if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
+ WholeTranslationState *state =
+ new WholeTranslationState(req, NULL, res, mode);
+
+ // One translation if the request isn't split.
+ DataTranslation<BaseDynInst<Impl> > *trans =
+ new DataTranslation<BaseDynInst<Impl> >(this, state);
+ cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
+ } else {
+ WholeTranslationState *state =
+ new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
+
+ // Two translations when the request is split.
+ DataTranslation<BaseDynInst<Impl> > *stransLow =
+ new DataTranslation<BaseDynInst<Impl> >(this, state, 0);
+ DataTranslation<BaseDynInst<Impl> > *stransHigh =
+ new DataTranslation<BaseDynInst<Impl> >(this, state, 1);
+
+ cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
+ cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
+ }
}
template<class Impl>