summaryrefslogtreecommitdiff
path: root/src/cpu/simple
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2009-04-06 10:19:36 -0700
committerGabe Black <gblack@eecs.umich.edu>2009-04-06 10:19:36 -0700
commitd080581db1f9ee4e1e6d07d2b01c13c67908a391 (patch)
treecc484b289fa5a30c4631f9faa1d8b456bffeebfc /src/cpu/simple
parent7a7c4c5fca83a8d47c7e71c9c080a882ebe204a9 (diff)
parent639cb0a42d953ee32bc7e96b0cdfa96cd40e9fc1 (diff)
downloadgem5-d080581db1f9ee4e1e6d07d2b01c13c67908a391.tar.xz
Merge ARM into the head. ARM will compile but may not actually work.
Diffstat (limited to 'src/cpu/simple')
-rw-r--r--src/cpu/simple/AtomicSimpleCPU.py13
-rw-r--r--src/cpu/simple/BaseSimpleCPU.py34
-rw-r--r--src/cpu/simple/SConscript1
-rw-r--r--src/cpu/simple/TimingSimpleCPU.py10
-rw-r--r--src/cpu/simple/atomic.cc280
-rw-r--r--src/cpu/simple/atomic.hh29
-rw-r--r--src/cpu/simple/base.cc71
-rw-r--r--src/cpu/simple/base.hh49
-rw-r--r--src/cpu/simple/timing.cc606
-rw-r--r--src/cpu/simple/timing.hh195
10 files changed, 755 insertions, 533 deletions
diff --git a/src/cpu/simple/AtomicSimpleCPU.py b/src/cpu/simple/AtomicSimpleCPU.py
index 28c2aa9c9..b7174bb43 100644
--- a/src/cpu/simple/AtomicSimpleCPU.py
+++ b/src/cpu/simple/AtomicSimpleCPU.py
@@ -28,18 +28,15 @@
from m5.params import *
from m5 import build_env
-from BaseCPU import BaseCPU
+from BaseSimpleCPU import BaseSimpleCPU
-class AtomicSimpleCPU(BaseCPU):
+class AtomicSimpleCPU(BaseSimpleCPU):
type = 'AtomicSimpleCPU'
width = Param.Int(1, "CPU width")
- simulate_stalls = Param.Bool(False, "Simulate cache stall cycles")
- function_trace = Param.Bool(False, "Enable function trace")
- function_trace_start = Param.Tick(0, "Cycle to start function trace")
- if build_env['FULL_SYSTEM']:
- profile = Param.Latency('0ns', "trace the kernel stack")
+ simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles")
+ simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles")
icache_port = Port("Instruction Port")
dcache_port = Port("Data Port")
physmem_port = Port("Physical Memory Port")
- _mem_ports = BaseCPU._mem_ports + \
+ _mem_ports = BaseSimpleCPU._mem_ports + \
['icache_port', 'dcache_port', 'physmem_port']
diff --git a/src/cpu/simple/BaseSimpleCPU.py b/src/cpu/simple/BaseSimpleCPU.py
new file mode 100644
index 000000000..9f528bc20
--- /dev/null
+++ b/src/cpu/simple/BaseSimpleCPU.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2008 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+from m5.params import *
+from BaseCPU import BaseCPU
+
+class BaseSimpleCPU(BaseCPU):
+ type = 'BaseSimpleCPU'
+ abstract = True
diff --git a/src/cpu/simple/SConscript b/src/cpu/simple/SConscript
index c090a938c..76598666f 100644
--- a/src/cpu/simple/SConscript
+++ b/src/cpu/simple/SConscript
@@ -47,3 +47,4 @@ if 'AtomicSimpleCPU' in env['CPU_MODELS'] or \
if need_simple_base:
Source('base.cc')
+ SimObject('BaseSimpleCPU.py')
diff --git a/src/cpu/simple/TimingSimpleCPU.py b/src/cpu/simple/TimingSimpleCPU.py
index 7e777e813..ce6839241 100644
--- a/src/cpu/simple/TimingSimpleCPU.py
+++ b/src/cpu/simple/TimingSimpleCPU.py
@@ -28,14 +28,10 @@
from m5.params import *
from m5 import build_env
-from BaseCPU import BaseCPU
+from BaseSimpleCPU import BaseSimpleCPU
-class TimingSimpleCPU(BaseCPU):
+class TimingSimpleCPU(BaseSimpleCPU):
type = 'TimingSimpleCPU'
- function_trace = Param.Bool(False, "Enable function trace")
- function_trace_start = Param.Tick(0, "Cycle to start function trace")
- if build_env['FULL_SYSTEM']:
- profile = Param.Latency('0ns', "trace the kernel stack")
icache_port = Port("Instruction Port")
dcache_port = Port("Data Port")
- _mem_ports = BaseCPU._mem_ports + ['icache_port', 'dcache_port']
+ _mem_ports = BaseSimpleCPU._mem_ports + ['icache_port', 'dcache_port']
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 23bd40b9b..17f93c882 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -43,7 +43,7 @@ using namespace std;
using namespace TheISA;
AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
- : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+ : Event(CPU_Tick_Pri), cpu(c)
{
}
@@ -79,13 +79,12 @@ void
AtomicSimpleCPU::init()
{
BaseCPU::init();
- cpuId = tc->readCpuId();
#if FULL_SYSTEM
for (int i = 0; i < threadContexts.size(); ++i) {
ThreadContext *tc = threadContexts[i];
// initialize CPU, including PC
- TheISA::initCPU(tc, cpuId);
+ TheISA::initCPU(tc, tc->contextId());
}
#endif
if (hasPhysMemPort) {
@@ -94,9 +93,10 @@ AtomicSimpleCPU::init()
physmemPort.getPeerAddressRanges(pmAddrList, snoop);
physMemAddr = *pmAddrList.begin();
}
- ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
- data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
- data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
+ // Atomic doesn't do MT right now, so contextId == threadId
+ ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+ data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+ data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
}
bool
@@ -148,13 +148,14 @@ AtomicSimpleCPU::DcachePort::setPeer(Port *port)
#if FULL_SYSTEM
// Update the ThreadContext's memory ports (Functional/Virtual
// Ports)
- cpu->tcBase()->connectMemPorts();
+ cpu->tcBase()->connectMemPorts(cpu->tcBase());
#endif
}
-AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
- : BaseSimpleCPU(p), tickEvent(this),
- width(p->width), simulate_stalls(p->simulate_stalls),
+AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
+ : BaseSimpleCPU(p), tickEvent(this), width(p->width),
+ simulate_data_stalls(p->simulate_data_stalls),
+ simulate_inst_stalls(p->simulate_inst_stalls),
icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
physmemPort(name() + "-iport", this), hasPhysMemPort(false)
{
@@ -175,8 +176,6 @@ AtomicSimpleCPU::serialize(ostream &os)
{
SimObject::State so_state = SimObject::getState();
SERIALIZE_ENUM(so_state);
- Status _status = status();
- SERIALIZE_ENUM(_status);
BaseSimpleCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
@@ -187,7 +186,6 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
{
SimObject::State so_state;
UNSERIALIZE_ENUM(so_state);
- UNSERIALIZE_ENUM(_status);
BaseSimpleCPU::unserialize(cp, section);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
}
@@ -203,16 +201,15 @@ AtomicSimpleCPU::resume()
changeState(SimObject::Running);
if (thread->status() == ThreadContext::Active) {
- if (!tickEvent.scheduled()) {
- tickEvent.schedule(nextCycle());
- }
+ if (!tickEvent.scheduled())
+ schedule(tickEvent, nextCycle());
}
}
void
AtomicSimpleCPU::switchOut()
{
- assert(status() == Running || status() == Idle);
+ assert(_status == Running || _status == Idle);
_status = SwitchedOut;
tickEvent.squash();
@@ -232,7 +229,7 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
ThreadContext *tc = threadContexts[i];
if (tc->status() == ThreadContext::Active && _status != Running) {
_status = Running;
- tickEvent.schedule(nextCycle());
+ schedule(tickEvent, nextCycle());
break;
}
}
@@ -240,10 +237,9 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
_status = Idle;
}
assert(threadContexts.size() == 1);
- cpuId = tc->readCpuId();
- ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
- data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
- data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
+ ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+ data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+ data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
}
@@ -262,7 +258,7 @@ AtomicSimpleCPU::activateContext(int thread_num, int delay)
numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
//Make sure ticks are still on multiples of cycles
- tickEvent.schedule(nextCycle(curTick + ticks(delay)));
+ schedule(tickEvent, nextCycle(curTick + ticks(delay)));
_status = Running;
}
@@ -280,7 +276,7 @@ AtomicSimpleCPU::suspendContext(int thread_num)
// tick event may not be scheduled if this gets called from inside
// an instruction's execution, e.g. "quiesce"
if (tickEvent.scheduled())
- tickEvent.deschedule();
+ deschedule(tickEvent);
notIdleFraction--;
_status = Idle;
@@ -318,7 +314,7 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
req->setVirt(0, addr, dataSize, flags, thread->readPC());
// translate to physical address
- Fault fault = thread->translateDataReadReq(req);
+ Fault fault = thread->dtb->translateAtomic(req, tc, false);
// Now do the access.
if (fault == NoFault) {
@@ -355,6 +351,9 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
if (secondAddr <= addr)
{
data = gtoh(data);
+ if (traceData) {
+ traceData->setData(data);
+ }
return fault;
}
@@ -371,61 +370,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
}
}
-Fault
-AtomicSimpleCPU::translateDataReadAddr(Addr vaddr, Addr & paddr,
- int size, unsigned flags)
-{
- // use the CPU's statically allocated read request and packet objects
- Request *req = &data_read_req;
-
- if (traceData) {
- traceData->setAddr(vaddr);
- }
-
- //The block size of our peer.
- int blockSize = dcachePort.peerBlockSize();
- //The size of the data we're trying to read.
- int dataSize = size;
-
- bool firstTimeThrough = true;
-
- //The address of the second part of this access if it needs to be split
- //across a cache line boundary.
- Addr secondAddr = roundDown(vaddr + dataSize - 1, blockSize);
-
- if(secondAddr > vaddr)
- dataSize = secondAddr - vaddr;
-
- while(1) {
- req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
-
- // translate to physical address
- Fault fault = thread->translateDataReadReq(req);
-
- //If there's a fault, return it
- if (fault != NoFault)
- return fault;
-
- if (firstTimeThrough) {
- paddr = req->getPaddr();
- firstTimeThrough = false;
- }
-
- //If we don't need to access a second cache line, stop now.
- if (secondAddr <= vaddr)
- return fault;
-
- /*
- * Set up for accessing the second cache line.
- */
-
- //Adjust the size to get the remaining bytes.
- dataSize = vaddr + size - secondAddr;
- //And access the right address.
- vaddr = secondAddr;
- }
-}
-
#ifndef DOXYGEN_SHOULD_SKIP_THIS
template
@@ -508,7 +452,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
req->setVirt(0, addr, dataSize, flags, thread->readPC());
// translate to physical address
- Fault fault = thread->translateDataWriteReq(req);
+ Fault fault = thread->dtb->translateAtomic(req, tc, true);
// Now do the access.
if (fault == NoFault) {
@@ -568,6 +512,9 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
// If the write needs to have a fault on the access, consider
// calling changeStatus() and changing it to "bad addr write"
// or something.
+ if (traceData) {
+ traceData->setData(gtoh(data));
+ }
return fault;
}
@@ -584,64 +531,6 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
}
}
-Fault
-AtomicSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags)
-{
- // use the CPU's statically allocated write request and packet objects
- Request *req = &data_write_req;
-
- if (traceData) {
- traceData->setAddr(vaddr);
- }
-
- //The block size of our peer.
- int blockSize = dcachePort.peerBlockSize();
-
- //The address of the second part of this access if it needs to be split
- //across a cache line boundary.
- Addr secondAddr = roundDown(vaddr + size - 1, blockSize);
-
- //The size of the data we're trying to read.
- int dataSize = size;
-
- bool firstTimeThrough = true;
-
- if(secondAddr > vaddr)
- dataSize = secondAddr - vaddr;
-
- dcache_latency = 0;
-
- while(1) {
- req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
-
- // translate to physical address
- Fault fault = thread->translateDataWriteReq(req);
-
- //If there's a fault or we don't need to access a second cache line,
- //stop now.
- if (fault != NoFault)
- return fault;
-
- if (firstTimeThrough) {
- paddr = req->getPaddr();
- firstTimeThrough = false;
- }
-
- if (secondAddr <= vaddr)
- return fault;
-
- /*
- * Set up for accessing the second cache line.
- */
-
- //Adjust the size to get the remaining bytes.
- dataSize = vaddr + size - secondAddr;
- //And access the right address.
- vaddr = secondAddr;
- }
-}
-
#ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -705,7 +594,7 @@ AtomicSimpleCPU::tick()
{
DPRINTF(SimpleCPU, "Tick\n");
- Tick latency = ticks(1); // instruction takes one cycle by default
+ Tick latency = 0;
for (int i = 0; i < width; ++i) {
numCycles++;
@@ -715,31 +604,43 @@ AtomicSimpleCPU::tick()
checkPcEventQueue();
- Fault fault = setupFetchRequest(&ifetch_req);
+ Fault fault = NoFault;
+
+ bool fromRom = isRomMicroPC(thread->readMicroPC());
+ if (!fromRom && !curMacroStaticInst) {
+ setupFetchRequest(&ifetch_req);
+ fault = thread->itb->translateAtomic(&ifetch_req, tc);
+ }
if (fault == NoFault) {
Tick icache_latency = 0;
bool icache_access = false;
dcache_access = false; // assume no dcache access
- //Fetch more instruction memory if necessary
- //if(predecoder.needMoreBytes())
- //{
- icache_access = true;
- Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
- Packet::Broadcast);
- ifetch_pkt.dataStatic(&inst);
-
- if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
- icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
- else
- icache_latency = icachePort.sendAtomic(&ifetch_pkt);
+ if (!fromRom && !curMacroStaticInst) {
+ // This is commented out because the predecoder would act like
+ // a tiny cache otherwise. It wouldn't be flushed when needed
+ // like the I cache. It should be flushed, and when that works
+ // this code should be uncommented.
+ //Fetch more instruction memory if necessary
+ //if(predecoder.needMoreBytes())
+ //{
+ icache_access = true;
+ Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
+ Packet::Broadcast);
+ ifetch_pkt.dataStatic(&inst);
+
+ if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
+ icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
+ else
+ icache_latency = icachePort.sendAtomic(&ifetch_pkt);
- assert(!ifetch_pkt.isError());
+ assert(!ifetch_pkt.isError());
- // ifetch_req is initialized to read the instruction directly
- // into the CPU object's inst field.
- //}
+ // ifetch_req is initialized to read the instruction directly
+ // into the CPU object's inst field.
+ //}
+ }
preExecute();
@@ -763,16 +664,21 @@ AtomicSimpleCPU::tick()
curStaticInst->isFirstMicroop()))
instCnt++;
- if (simulate_stalls) {
- Tick icache_stall =
- icache_access ? icache_latency - ticks(1) : 0;
- Tick dcache_stall =
- dcache_access ? dcache_latency - ticks(1) : 0;
- Tick stall_cycles = (icache_stall + dcache_stall) / ticks(1);
- if (ticks(stall_cycles) < (icache_stall + dcache_stall))
- latency += ticks(stall_cycles+1);
- else
- latency += ticks(stall_cycles);
+ Tick stall_ticks = 0;
+ if (simulate_inst_stalls && icache_access)
+ stall_ticks += icache_latency;
+
+ if (simulate_data_stalls && dcache_access)
+ stall_ticks += dcache_latency;
+
+ if (stall_ticks) {
+ Tick stall_cycles = stall_ticks / ticks(1);
+ Tick aligned_stall_ticks = ticks(stall_cycles);
+
+ if (aligned_stall_ticks < stall_ticks)
+ aligned_stall_ticks += 1;
+
+ latency += aligned_stall_ticks;
}
}
@@ -780,8 +686,12 @@ AtomicSimpleCPU::tick()
advancePC(fault);
}
+ // instruction takes at least one cycle
+ if (latency < ticks(1))
+ latency = ticks(1);
+
if (_status != Idle)
- tickEvent.schedule(curTick + latency);
+ schedule(tickEvent, curTick + latency);
}
@@ -799,38 +709,10 @@ AtomicSimpleCPU::printAddr(Addr a)
AtomicSimpleCPU *
AtomicSimpleCPUParams::create()
{
- AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params();
- params->name = name;
- params->numberOfThreads = 1;
- params->max_insts_any_thread = max_insts_any_thread;
- params->max_insts_all_threads = max_insts_all_threads;
- params->max_loads_any_thread = max_loads_any_thread;
- params->max_loads_all_threads = max_loads_all_threads;
- params->progress_interval = progress_interval;
- params->deferRegistration = defer_registration;
- params->phase = phase;
- params->clock = clock;
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
- params->width = width;
- params->simulate_stalls = simulate_stalls;
- params->system = system;
- params->cpu_id = cpu_id;
- params->tracer = tracer;
-
- params->itb = itb;
- params->dtb = dtb;
-#if FULL_SYSTEM
- params->profile = profile;
- params->do_quiesce = do_quiesce;
- params->do_checkpoint_insts = do_checkpoint_insts;
- params->do_statistics_insts = do_statistics_insts;
-#else
+ numThreads = 1;
+#if !FULL_SYSTEM
if (workload.size() != 1)
panic("only one workload allowed");
- params->process = workload[0];
#endif
-
- AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params);
- return cpu;
+ return new AtomicSimpleCPU(this);
}
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 19bc0e13b..190097637 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -32,34 +32,17 @@
#define __CPU_SIMPLE_ATOMIC_HH__
#include "cpu/simple/base.hh"
+#include "params/AtomicSimpleCPU.hh"
class AtomicSimpleCPU : public BaseSimpleCPU
{
public:
- struct Params : public BaseSimpleCPU::Params {
- int width;
- bool simulate_stalls;
- };
-
- AtomicSimpleCPU(Params *params);
+ AtomicSimpleCPU(AtomicSimpleCPUParams *params);
virtual ~AtomicSimpleCPU();
virtual void init();
- public:
- //
- enum Status {
- Running,
- Idle,
- SwitchedOut
- };
-
- protected:
- Status _status;
-
- Status status() const { return _status; }
-
private:
struct TickEvent : public Event
@@ -74,7 +57,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU
TickEvent tickEvent;
const int width;
- const bool simulate_stalls;
+ const bool simulate_data_stalls;
+ const bool simulate_inst_stalls;
// main simulation loop (one cycle)
void tick();
@@ -152,11 +136,6 @@ class AtomicSimpleCPU : public BaseSimpleCPU
template <class T>
Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
- Fault translateDataReadAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags);
- Fault translateDataWriteAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags);
-
/**
* Print state of address in memory system via PrintReq (for
* debugging).
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 4a91a9e12..348d2392f 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -31,6 +31,7 @@
#include "arch/utility.hh"
#include "arch/faults.hh"
#include "base/cprintf.hh"
+#include "base/cp_annotate.hh"
#include "base/inifile.hh"
#include "base/loader/symtab.hh"
#include "base/misc.hh"
@@ -65,16 +66,18 @@
#include "mem/mem_object.hh"
#endif // FULL_SYSTEM
+#include "params/BaseSimpleCPU.hh"
+
using namespace std;
using namespace TheISA;
-BaseSimpleCPU::BaseSimpleCPU(Params *p)
+BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
: BaseCPU(p), traceData(NULL), thread(NULL), predecoder(NULL)
{
#if FULL_SYSTEM
thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
#else
- thread = new SimpleThread(this, /* thread_num */ 0, p->process,
+ thread = new SimpleThread(this, /* thread_num */ 0, p->workload[0],
p->itb, p->dtb, /* asid */ 0);
#endif // !FULL_SYSTEM
@@ -174,12 +177,13 @@ void
BaseSimpleCPU::resetStats()
{
// startNumInst = numInst;
- // notIdleFraction = (_status != Idle);
+ notIdleFraction = (_status != Idle);
}
void
BaseSimpleCPU::serialize(ostream &os)
{
+ SERIALIZE_ENUM(_status);
BaseCPU::serialize(os);
// SERIALIZE_SCALAR(inst);
nameOut(os, csprintf("%s.xc.0", name()));
@@ -189,6 +193,7 @@ BaseSimpleCPU::serialize(ostream &os)
void
BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
{
+ UNSERIALIZE_ENUM(_status);
BaseCPU::unserialize(cp, section);
// UNSERIALIZE_SCALAR(inst);
thread->unserialize(cp, csprintf("%s.xc.0", section));
@@ -299,14 +304,13 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
#if FULL_SYSTEM
void
-BaseSimpleCPU::post_interrupt(int int_num, int index)
+BaseSimpleCPU::wakeup()
{
- BaseCPU::post_interrupt(int_num, index);
+ if (thread->status() != ThreadContext::Suspended)
+ return;
- if (thread->status() == ThreadContext::Suspended) {
- DPRINTF(Quiesce,"Suspended Processor awoke\n");
- thread->activate();
- }
+ DPRINTF(Quiesce,"Suspended Processor awoke\n");
+ thread->activate();
}
#endif // FULL_SYSTEM
@@ -314,11 +318,12 @@ void
BaseSimpleCPU::checkForInterrupts()
{
#if FULL_SYSTEM
- if (check_interrupts(tc)) {
- Fault interrupt = interrupts.getInterrupt(tc);
+ if (checkInterrupts(tc)) {
+ Fault interrupt = interrupts->getInterrupt(tc);
if (interrupt != NoFault) {
- interrupts.updateIntrInfo(tc);
+ predecoder.reset();
+ interrupts->updateIntrInfo(tc);
interrupt->invoke(tc);
}
}
@@ -326,7 +331,7 @@ BaseSimpleCPU::checkForInterrupts()
}
-Fault
+void
BaseSimpleCPU::setupFetchRequest(Request *req)
{
Addr threadPC = thread->readPC();
@@ -342,10 +347,6 @@ BaseSimpleCPU::setupFetchRequest(Request *req)
Addr fetchPC = (threadPC & PCMask) + fetchOffset;
req->setVirt(0, fetchPC, sizeof(MachInst), 0, threadPC);
-
- Fault fault = thread->translateInstReq(req);
-
- return fault;
}
@@ -364,9 +365,13 @@ BaseSimpleCPU::preExecute()
// decode the instruction
inst = gtoh(inst);
- //If we're not in the middle of a macro instruction
- if (!curMacroStaticInst) {
+ MicroPC upc = thread->readMicroPC();
+ if (isRomMicroPC(upc)) {
+ stayAtPC = false;
+ curStaticInst = microcodeRom.fetchMicroop(upc, curMacroStaticInst);
+ } else if (!curMacroStaticInst) {
+ //We're not in the middle of a macro instruction
StaticInstPtr instPtr = NULL;
//Predecode, ie bundle up an ExtMachInst
@@ -397,23 +402,22 @@ BaseSimpleCPU::preExecute()
//out micro ops
if (instPtr && instPtr->isMacroop()) {
curMacroStaticInst = instPtr;
- curStaticInst = curMacroStaticInst->
- fetchMicroop(thread->readMicroPC());
+ curStaticInst = curMacroStaticInst->fetchMicroop(upc);
} else {
curStaticInst = instPtr;
}
} else {
//Read the next micro op from the macro op
- curStaticInst = curMacroStaticInst->
- fetchMicroop(thread->readMicroPC());
+ curStaticInst = curMacroStaticInst->fetchMicroop(upc);
}
//If we decoded an instruction this "tick", record information about it.
if(curStaticInst)
{
#if TRACING_ON
- traceData = tracer->getInstRecord(curTick, tc, curStaticInst,
- thread->readPC());
+ traceData = tracer->getInstRecord(curTick, tc,
+ curStaticInst, thread->readPC(),
+ curMacroStaticInst, thread->readMicroPC());
DPRINTF(Decode,"Decode: Decoded %s instruction: 0x%x\n",
curStaticInst->getName(), curStaticInst->machInst);
@@ -447,6 +451,10 @@ BaseSimpleCPU::postExecute()
comLoadEventQueue[0]->serviceEvents(numLoad);
}
+ if (CPA::available()) {
+ CPA::cpa()->swAutoBegin(tc, thread->readNextPC());
+ }
+
traceFunctions(thread->readPC());
if (traceData) {
@@ -465,22 +473,21 @@ BaseSimpleCPU::advancePC(Fault fault)
if (fault != NoFault) {
curMacroStaticInst = StaticInst::nullStaticInstPtr;
predecoder.reset();
- thread->setMicroPC(0);
- thread->setNextMicroPC(1);
fault->invoke(tc);
} else {
//If we're at the last micro op for this instruction
if (curStaticInst && curStaticInst->isLastMicroop()) {
- //We should be working with a macro op
- assert(curMacroStaticInst);
+ //We should be working with a macro op or be in the ROM
+ assert(curMacroStaticInst ||
+ isRomMicroPC(thread->readMicroPC()));
//Close out this macro op, and clean up the
//microcode state
curMacroStaticInst = StaticInst::nullStaticInstPtr;
- thread->setMicroPC(0);
- thread->setNextMicroPC(1);
+ thread->setMicroPC(normalMicroPC(0));
+ thread->setNextMicroPC(normalMicroPC(1));
}
//If we're still in a macro op
- if (curMacroStaticInst) {
+ if (curMacroStaticInst || isRomMicroPC(thread->readMicroPC())) {
//Advance the micro pc
thread->setMicroPC(thread->readNextMicroPC());
//Advance the "next" micro pc. Note that there are no delay
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 918965fdb..e80606388 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -76,6 +76,8 @@ namespace Trace {
class InstRecord;
}
+class BaseSimpleCPUParams;
+
class BaseSimpleCPU : public BaseCPU
{
@@ -96,7 +98,7 @@ class BaseSimpleCPU : public BaseCPU
}
public:
- void post_interrupt(int int_num, int index);
+ void wakeup();
void zero_fill_64(Addr addr) {
static int warned = 0;
@@ -107,15 +109,7 @@ class BaseSimpleCPU : public BaseCPU
};
public:
- struct Params : public BaseCPU::Params
- {
- TheISA::ITB *itb;
- TheISA::DTB *dtb;
-#if !FULL_SYSTEM
- Process *process;
-#endif
- };
- BaseSimpleCPU(Params *params);
+ BaseSimpleCPU(BaseSimpleCPUParams *params);
virtual ~BaseSimpleCPU();
public:
@@ -127,7 +121,22 @@ class BaseSimpleCPU : public BaseCPU
*/
ThreadContext *tc;
protected:
- int cpuId;
+
+ enum Status {
+ Idle,
+ Running,
+ ITBWaitResponse,
+ IcacheRetry,
+ IcacheWaitResponse,
+ IcacheWaitSwitch,
+ DTBWaitResponse,
+ DcacheRetry,
+ DcacheWaitResponse,
+ DcacheWaitSwitch,
+ SwitchedOut
+ };
+
+ Status _status;
public:
@@ -153,7 +162,7 @@ class BaseSimpleCPU : public BaseCPU
bool stayAtPC;
void checkForInterrupts();
- Fault setupFetchRequest(Request *req);
+ void setupFetchRequest(Request *req);
void preExecute();
void postExecute();
void advancePC(Fault fault);
@@ -168,7 +177,7 @@ class BaseSimpleCPU : public BaseCPU
// number of simulated instructions
Counter numInst;
Counter startNumInst;
- Stats::Scalar<> numInsts;
+ Stats::Scalar numInsts;
void countInst()
{
@@ -187,30 +196,30 @@ class BaseSimpleCPU : public BaseCPU
static const Addr PCMask = ~((Addr)sizeof(TheISA::MachInst) - 1);
// number of simulated memory references
- Stats::Scalar<> numMemRefs;
+ Stats::Scalar numMemRefs;
// number of simulated loads
Counter numLoad;
Counter startNumLoad;
// number of idle cycles
- Stats::Average<> notIdleFraction;
+ Stats::Average notIdleFraction;
Stats::Formula idleFraction;
// number of cycles stalled for I-cache responses
- Stats::Scalar<> icacheStallCycles;
+ Stats::Scalar icacheStallCycles;
Counter lastIcacheStall;
// number of cycles stalled for I-cache retries
- Stats::Scalar<> icacheRetryCycles;
+ Stats::Scalar icacheRetryCycles;
Counter lastIcacheRetry;
// number of cycles stalled for D-cache responses
- Stats::Scalar<> dcacheStallCycles;
+ Stats::Scalar dcacheStallCycles;
Counter lastDcacheStall;
// number of cycles stalled for D-cache retries
- Stats::Scalar<> dcacheRetryCycles;
+ Stats::Scalar dcacheRetryCycles;
Counter lastDcacheRetry;
virtual void serialize(std::ostream &os);
@@ -219,7 +228,7 @@ class BaseSimpleCPU : public BaseCPU
// These functions are only used in CPU models that split
// effective address computation from the actual memory access.
void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); }
- Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n");
+ Addr getEA() { panic("BaseSimpleCPU::getEA() not implemented\n");
M5_DUMMY_RETURN}
void prefetch(Addr addr, unsigned flags)
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index a76824ff3..a8f86f8d2 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -57,13 +57,12 @@ void
TimingSimpleCPU::init()
{
BaseCPU::init();
- cpuId = tc->readCpuId();
#if FULL_SYSTEM
for (int i = 0; i < threadContexts.size(); ++i) {
ThreadContext *tc = threadContexts[i];
// initialize CPU, including PC
- TheISA::initCPU(tc, cpuId);
+ TheISA::initCPU(tc, _cpuId);
}
#endif
}
@@ -101,11 +100,12 @@ void
TimingSimpleCPU::CpuPort::TickEvent::schedule(PacketPtr _pkt, Tick t)
{
pkt = _pkt;
- Event::schedule(t);
+ cpu->schedule(this, t);
}
-TimingSimpleCPU::TimingSimpleCPU(Params *p)
- : BaseSimpleCPU(p), icachePort(this, p->clock), dcachePort(this, p->clock)
+TimingSimpleCPU::TimingSimpleCPU(TimingSimpleCPUParams *p)
+ : BaseSimpleCPU(p), fetchTranslation(this), icachePort(this, p->clock),
+ dcachePort(this, p->clock), fetchEvent(this)
{
_status = Idle;
@@ -114,7 +114,6 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p)
ifetch_pkt = dcache_pkt = NULL;
drainEvent = NULL;
- fetchEvent = NULL;
previousTick = 0;
changeState(SimObject::Running);
}
@@ -145,7 +144,7 @@ TimingSimpleCPU::drain(Event *drain_event)
{
// TimingSimpleCPU is ready to drain if it's not waiting for
// an access to complete.
- if (status() == Idle || status() == Running || status() == SwitchedOut) {
+ if (_status == Idle || _status == Running || _status == SwitchedOut) {
changeState(SimObject::Drained);
return 0;
} else {
@@ -162,15 +161,10 @@ TimingSimpleCPU::resume()
if (_status != SwitchedOut && _status != Idle) {
assert(system->getMemoryMode() == Enums::timing);
- // Delete the old event if it existed.
- if (fetchEvent) {
- if (fetchEvent->scheduled())
- fetchEvent->deschedule();
+ if (fetchEvent.scheduled())
+ deschedule(fetchEvent);
- delete fetchEvent;
- }
-
- fetchEvent = new FetchEvent(this, nextCycle());
+ schedule(fetchEvent, nextCycle());
}
changeState(SimObject::Running);
@@ -179,14 +173,14 @@ TimingSimpleCPU::resume()
void
TimingSimpleCPU::switchOut()
{
- assert(status() == Running || status() == Idle);
+ assert(_status == Running || _status == Idle);
_status = SwitchedOut;
numCycles += tickToCycles(curTick - previousTick);
// If we've been scheduled to resume but are then told to switch out,
// we'll need to cancel it.
- if (fetchEvent && fetchEvent->scheduled())
- fetchEvent->deschedule();
+ if (fetchEvent.scheduled())
+ deschedule(fetchEvent);
}
@@ -209,7 +203,6 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
_status = Idle;
}
assert(threadContexts.size() == 1);
- cpuId = tc->readCpuId();
previousTick = curTick;
}
@@ -228,7 +221,7 @@ TimingSimpleCPU::activateContext(int thread_num, int delay)
_status = Running;
// kick things off by initiating the fetch of the next instruction
- fetchEvent = new FetchEvent(this, nextCycle(curTick + ticks(delay)));
+ schedule(fetchEvent, nextCycle(curTick + ticks(delay)));
}
@@ -249,74 +242,239 @@ TimingSimpleCPU::suspendContext(int thread_num)
_status = Idle;
}
+bool
+TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
+{
+ RequestPtr req = pkt->req;
+ if (req->isMmapedIpr()) {
+ Tick delay;
+ delay = TheISA::handleIprRead(thread->getTC(), pkt);
+ new IprEvent(pkt, this, nextCycle(curTick + delay));
+ _status = DcacheWaitResponse;
+ dcache_pkt = NULL;
+ } else if (!dcachePort.sendTiming(pkt)) {
+ _status = DcacheRetry;
+ dcache_pkt = pkt;
+ } else {
+ _status = DcacheWaitResponse;
+ // memory system takes ownership of packet
+ dcache_pkt = NULL;
+ }
+ return dcache_pkt == NULL;
+}
-template <class T>
-Fault
-TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
+void
+TimingSimpleCPU::sendData(Fault fault, RequestPtr req,
+ uint8_t *data, uint64_t *res, bool read)
{
- Request *req =
- new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
- cpuId, /* thread ID */ 0);
+ _status = Running;
+ if (fault != NoFault) {
+ delete data;
+ delete req;
- if (traceData) {
- traceData->setAddr(req->getVaddr());
+ translationFault(fault);
+ return;
}
+ PacketPtr pkt;
+ buildPacket(pkt, req, read);
+ pkt->dataDynamic<uint8_t>(data);
+ if (req->getFlags().isSet(Request::NO_ACCESS)) {
+ assert(!dcache_pkt);
+ pkt->makeResponse();
+ completeDataAccess(pkt);
+ } else if (read) {
+ handleReadPacket(pkt);
+ } else {
+ bool do_access = true; // flag to suppress cache access
- // translate to physical address
- Fault fault = thread->translateDataReadReq(req);
+ if (req->isLocked()) {
+ do_access = TheISA::handleLockedWrite(thread, req);
+ } else if (req->isCondSwap()) {
+ assert(res);
+ req->setExtraData(*res);
+ }
- // Now do the access.
- if (fault == NoFault) {
- PacketPtr pkt =
- new Packet(req,
- (req->isLocked() ?
- MemCmd::LoadLockedReq : MemCmd::ReadReq),
- Packet::Broadcast);
- pkt->dataDynamic<T>(new T);
-
- if (req->isMmapedIpr()) {
- Tick delay;
- delay = TheISA::handleIprRead(thread->getTC(), pkt);
- new IprEvent(pkt, this, nextCycle(curTick + delay));
- _status = DcacheWaitResponse;
- dcache_pkt = NULL;
- } else if (!dcachePort.sendTiming(pkt)) {
- _status = DcacheRetry;
+ if (do_access) {
dcache_pkt = pkt;
+ handleWritePacket();
} else {
_status = DcacheWaitResponse;
- // memory system takes ownership of packet
- dcache_pkt = NULL;
+ completeDataAccess(pkt);
}
+ }
+}
- // This will need a new way to tell if it has a dcache attached.
- if (req->isUncacheable())
- recordEvent("Uncached Read");
+void
+TimingSimpleCPU::sendSplitData(Fault fault1, Fault fault2,
+ RequestPtr req1, RequestPtr req2, RequestPtr req,
+ uint8_t *data, bool read)
+{
+ _status = Running;
+ if (fault1 != NoFault || fault2 != NoFault) {
+ delete data;
+ delete req1;
+ delete req2;
+ if (fault1 != NoFault)
+ translationFault(fault1);
+ else if (fault2 != NoFault)
+ translationFault(fault2);
+ return;
+ }
+ PacketPtr pkt1, pkt2;
+ buildSplitPacket(pkt1, pkt2, req1, req2, req, data, read);
+ if (req->getFlags().isSet(Request::NO_ACCESS)) {
+ assert(!dcache_pkt);
+ pkt1->makeResponse();
+ completeDataAccess(pkt1);
+ } else if (read) {
+ if (handleReadPacket(pkt1)) {
+ SplitFragmentSenderState * send_state =
+ dynamic_cast<SplitFragmentSenderState *>(pkt1->senderState);
+ send_state->clearFromParent();
+ if (handleReadPacket(pkt2)) {
+ send_state = dynamic_cast<SplitFragmentSenderState *>(
+ pkt1->senderState);
+ send_state->clearFromParent();
+ }
+ }
} else {
- delete req;
+ dcache_pkt = pkt1;
+ if (handleWritePacket()) {
+ SplitFragmentSenderState * send_state =
+ dynamic_cast<SplitFragmentSenderState *>(pkt1->senderState);
+ send_state->clearFromParent();
+ dcache_pkt = pkt2;
+ if (handleWritePacket()) {
+ send_state = dynamic_cast<SplitFragmentSenderState *>(
+ pkt1->senderState);
+ send_state->clearFromParent();
+ }
+ }
+ }
+}
+
+void
+TimingSimpleCPU::translationFault(Fault fault)
+{
+ numCycles += tickToCycles(curTick - previousTick);
+ previousTick = curTick;
+
+ if (traceData) {
+ // Since there was a fault, we shouldn't trace this instruction.
+ delete traceData;
+ traceData = NULL;
}
- return fault;
+ postExecute();
+
+ if (getState() == SimObject::Draining) {
+ advancePC(fault);
+ completeDrain();
+ } else {
+ advanceInst(fault);
+ }
}
+void
+TimingSimpleCPU::buildPacket(PacketPtr &pkt, RequestPtr req, bool read)
+{
+ MemCmd cmd;
+ if (read) {
+ cmd = MemCmd::ReadReq;
+ if (req->isLocked())
+ cmd = MemCmd::LoadLockedReq;
+ } else {
+ cmd = MemCmd::WriteReq;
+ if (req->isLocked()) {
+ cmd = MemCmd::StoreCondReq;
+ } else if (req->isSwap()) {
+ cmd = MemCmd::SwapReq;
+ }
+ }
+ pkt = new Packet(req, cmd, Packet::Broadcast);
+}
+
+void
+TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
+ RequestPtr req1, RequestPtr req2, RequestPtr req,
+ uint8_t *data, bool read)
+{
+ pkt1 = pkt2 = NULL;
+
+ assert(!req1->isMmapedIpr() && !req2->isMmapedIpr());
+
+ if (req->getFlags().isSet(Request::NO_ACCESS)) {
+ buildPacket(pkt1, req, read);
+ return;
+ }
+
+ buildPacket(pkt1, req1, read);
+ buildPacket(pkt2, req2, read);
+
+ req->setPhys(req1->getPaddr(), req->getSize(), req1->getFlags());
+ PacketPtr pkt = new Packet(req, pkt1->cmd.responseCommand(),
+ Packet::Broadcast);
+
+ pkt->dataDynamic<uint8_t>(data);
+ pkt1->dataStatic<uint8_t>(data);
+ pkt2->dataStatic<uint8_t>(data + req1->getSize());
+
+ SplitMainSenderState * main_send_state = new SplitMainSenderState;
+ pkt->senderState = main_send_state;
+ main_send_state->fragments[0] = pkt1;
+ main_send_state->fragments[1] = pkt2;
+ main_send_state->outstanding = 2;
+ pkt1->senderState = new SplitFragmentSenderState(pkt, 0);
+ pkt2->senderState = new SplitFragmentSenderState(pkt, 1);
+}
+
+template <class T>
Fault
-TimingSimpleCPU::translateDataReadAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags)
+TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
{
- Request *req =
- new Request(0, vaddr, size, flags, thread->readPC(), cpuId, 0);
+ Fault fault;
+ const int asid = 0;
+ const int thread_id = 0;
+ const Addr pc = thread->readPC();
+ int block_size = dcachePort.peerBlockSize();
+ int data_size = sizeof(T);
+
+ RequestPtr req = new Request(asid, addr, data_size,
+ flags, pc, _cpuId, thread_id);
+
+ Addr split_addr = roundDown(addr + data_size - 1, block_size);
+ assert(split_addr <= addr || split_addr - addr < block_size);
+
+
+ _status = DTBWaitResponse;
+ if (split_addr > addr) {
+ RequestPtr req1, req2;
+ assert(!req->isLocked() && !req->isSwap());
+ req->splitOnVaddr(split_addr, req1, req2);
+
+ typedef SplitDataTranslation::WholeTranslationState WholeState;
+ WholeState *state = new WholeState(req1, req2, req,
+ (uint8_t *)(new T), true);
+ thread->dtb->translateTiming(req1, tc,
+ new SplitDataTranslation(this, 0, state), false);
+ thread->dtb->translateTiming(req2, tc,
+ new SplitDataTranslation(this, 1, state), false);
+ } else {
+ thread->dtb->translateTiming(req, tc,
+ new DataTranslation(this, (uint8_t *)(new T), NULL, true),
+ false);
+ }
if (traceData) {
- traceData->setAddr(vaddr);
+ traceData->setData(data);
+ traceData->setAddr(addr);
}
- Fault fault = thread->translateDataWriteReq(req);
+ // This will need a new way to tell if it has a dcache attached.
+ if (req->isUncacheable())
+ recordEvent("Uncached Read");
- if (fault == NoFault)
- paddr = req->getPaddr();
-
- delete req;
- return fault;
+ return NoFault;
}
#ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -369,92 +527,75 @@ TimingSimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
return read(addr, (uint32_t&)data, flags);
}
+bool
+TimingSimpleCPU::handleWritePacket()
+{
+ RequestPtr req = dcache_pkt->req;
+ if (req->isMmapedIpr()) {
+ Tick delay;
+ delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
+ new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
+ _status = DcacheWaitResponse;
+ dcache_pkt = NULL;
+ } else if (!dcachePort.sendTiming(dcache_pkt)) {
+ _status = DcacheRetry;
+ } else {
+ _status = DcacheWaitResponse;
+ // memory system takes ownership of packet
+ dcache_pkt = NULL;
+ }
+ return dcache_pkt == NULL;
+}
template <class T>
Fault
TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
{
- Request *req =
- new Request(/* asid */ 0, addr, sizeof(T), flags, thread->readPC(),
- cpuId, /* thread ID */ 0);
+ const int asid = 0;
+ const int thread_id = 0;
+ const Addr pc = thread->readPC();
+ int block_size = dcachePort.peerBlockSize();
+ int data_size = sizeof(T);
+
+ RequestPtr req = new Request(asid, addr, data_size,
+ flags, pc, _cpuId, thread_id);
+
+ Addr split_addr = roundDown(addr + data_size - 1, block_size);
+ assert(split_addr <= addr || split_addr - addr < block_size);
+
+ T *dataP = new T;
+ *dataP = TheISA::htog(data);
+ _status = DTBWaitResponse;
+ if (split_addr > addr) {
+ RequestPtr req1, req2;
+ assert(!req->isLocked() && !req->isSwap());
+ req->splitOnVaddr(split_addr, req1, req2);
+
+ typedef SplitDataTranslation::WholeTranslationState WholeState;
+ WholeState *state = new WholeState(req1, req2, req,
+ (uint8_t *)dataP, false);
+ thread->dtb->translateTiming(req1, tc,
+ new SplitDataTranslation(this, 0, state), true);
+ thread->dtb->translateTiming(req2, tc,
+ new SplitDataTranslation(this, 1, state), true);
+ } else {
+ thread->dtb->translateTiming(req, tc,
+ new DataTranslation(this, (uint8_t *)dataP, res, false),
+ true);
+ }
if (traceData) {
traceData->setAddr(req->getVaddr());
+ traceData->setData(data);
}
- // translate to physical address
- Fault fault = thread->translateDataWriteReq(req);
-
- // Now do the access.
- if (fault == NoFault) {
- MemCmd cmd = MemCmd::WriteReq; // default
- bool do_access = true; // flag to suppress cache access
-
- if (req->isLocked()) {
- cmd = MemCmd::StoreCondReq;
- do_access = TheISA::handleLockedWrite(thread, req);
- } else if (req->isSwap()) {
- cmd = MemCmd::SwapReq;
- if (req->isCondSwap()) {
- assert(res);
- req->setExtraData(*res);
- }
- }
-
- // Note: need to allocate dcache_pkt even if do_access is
- // false, as it's used unconditionally to call completeAcc().
- assert(dcache_pkt == NULL);
- dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
- dcache_pkt->allocate();
- dcache_pkt->set(data);
-
- if (do_access) {
- if (req->isMmapedIpr()) {
- Tick delay;
- dcache_pkt->set(htog(data));
- delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
- new IprEvent(dcache_pkt, this, nextCycle(curTick + delay));
- _status = DcacheWaitResponse;
- dcache_pkt = NULL;
- } else if (!dcachePort.sendTiming(dcache_pkt)) {
- _status = DcacheRetry;
- } else {
- _status = DcacheWaitResponse;
- // memory system takes ownership of packet
- dcache_pkt = NULL;
- }
- }
- // This will need a new way to tell if it's hooked up to a cache or not.
- if (req->isUncacheable())
- recordEvent("Uncached Write");
- } else {
- delete req;
- }
-
+ // This will need a new way to tell if it's hooked up to a cache or not.
+ if (req->isUncacheable())
+ recordEvent("Uncached Write");
// If the write needs to have a fault on the access, consider calling
// changeStatus() and changing it to "bad addr write" or something.
- return fault;
-}
-
-Fault
-TimingSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags)
-{
- Request *req =
- new Request(0, vaddr, size, flags, thread->readPC(), cpuId, 0);
-
- if (traceData) {
- traceData->setAddr(vaddr);
- }
-
- Fault fault = thread->translateDataWriteReq(req);
-
- if (fault == NoFault)
- paddr = req->getPaddr();
-
- delete req;
- return fault;
+ return NoFault;
}
@@ -524,14 +665,31 @@ TimingSimpleCPU::fetch()
checkPcEventQueue();
- Request *ifetch_req = new Request();
- ifetch_req->setThreadContext(cpuId, /* thread ID */ 0);
- Fault fault = setupFetchRequest(ifetch_req);
+ bool fromRom = isRomMicroPC(thread->readMicroPC());
- ifetch_pkt = new Packet(ifetch_req, MemCmd::ReadReq, Packet::Broadcast);
- ifetch_pkt->dataStatic(&inst);
+ if (!fromRom && !curMacroStaticInst) {
+ Request *ifetch_req = new Request();
+ ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
+ setupFetchRequest(ifetch_req);
+ thread->itb->translateTiming(ifetch_req, tc,
+ &fetchTranslation);
+ } else {
+ _status = IcacheWaitResponse;
+ completeIfetch(NULL);
+ numCycles += tickToCycles(curTick - previousTick);
+ previousTick = curTick;
+ }
+}
+
+
+void
+TimingSimpleCPU::sendFetch(Fault fault, RequestPtr req, ThreadContext *tc)
+{
if (fault == NoFault) {
+ ifetch_pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+ ifetch_pkt->dataStatic(&inst);
+
if (!icachePort.sendTiming(ifetch_pkt)) {
// Need to wait for retry
_status = IcacheRetry;
@@ -542,8 +700,7 @@ TimingSimpleCPU::fetch()
ifetch_pkt = NULL;
}
} else {
- delete ifetch_req;
- delete ifetch_pkt;
+ delete req;
// fetch fault: advance directly to next instruction (fault handler)
advanceInst(fault);
}
@@ -556,7 +713,8 @@ TimingSimpleCPU::fetch()
void
TimingSimpleCPU::advanceInst(Fault fault)
{
- advancePC(fault);
+ if (fault != NoFault || !stayAtPC)
+ advancePC(fault);
if (_status == Running) {
// kick off fetch of next instruction... callback from icache
@@ -574,7 +732,8 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
// received a response from the icache: execute the received
// instruction
- assert(!pkt->isError());
+
+ assert(!pkt || !pkt->isError());
assert(_status == IcacheWaitResponse);
_status = Running;
@@ -583,41 +742,27 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
previousTick = curTick;
if (getState() == SimObject::Draining) {
- delete pkt->req;
- delete pkt;
+ if (pkt) {
+ delete pkt->req;
+ delete pkt;
+ }
completeDrain();
return;
}
preExecute();
- if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) {
+ if (curStaticInst &&
+ curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) {
// load or store: just send to dcache
Fault fault = curStaticInst->initiateAcc(this, traceData);
if (_status != Running) {
// instruction will complete in dcache response callback
- assert(_status == DcacheWaitResponse || _status == DcacheRetry);
+ assert(_status == DcacheWaitResponse ||
+ _status == DcacheRetry || DTBWaitResponse);
assert(fault == NoFault);
} else {
- if (fault == NoFault) {
- // Note that ARM can have NULL packets if the instruction gets
- // squashed due to predication
- // early fail on store conditional: complete now
- assert(dcache_pkt != NULL || THE_ISA == ARM_ISA);
-
- fault = curStaticInst->completeAcc(dcache_pkt, this,
- traceData);
- if (dcache_pkt != NULL)
- {
- delete dcache_pkt->req;
- delete dcache_pkt;
- dcache_pkt = NULL;
- }
-
- // keep an instruction count
- if (fault == NoFault)
- countInst();
- } else if (traceData) {
+ if (fault != NoFault && traceData) {
// If there was a fault, we shouldn't trace this instruction.
delete traceData;
traceData = NULL;
@@ -630,7 +775,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
instCnt++;
advanceInst(fault);
}
- } else {
+ } else if (curStaticInst) {
// non-memory instruction: execute completely now
Fault fault = curStaticInst->execute(this, traceData);
@@ -649,10 +794,14 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
curStaticInst->isFirstMicroop()))
instCnt++;
advanceInst(fault);
+ } else {
+ advanceInst(NoFault);
}
- delete pkt->req;
- delete pkt;
+ if (pkt) {
+ delete pkt->req;
+ delete pkt;
+ }
}
void
@@ -707,12 +856,38 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
// received a response from the dcache: complete the load or store
// instruction
assert(!pkt->isError());
- assert(_status == DcacheWaitResponse);
- _status = Running;
numCycles += tickToCycles(curTick - previousTick);
previousTick = curTick;
+ if (pkt->senderState) {
+ SplitFragmentSenderState * send_state =
+ dynamic_cast<SplitFragmentSenderState *>(pkt->senderState);
+ assert(send_state);
+ delete pkt->req;
+ delete pkt;
+ PacketPtr big_pkt = send_state->bigPkt;
+ delete send_state;
+
+ SplitMainSenderState * main_send_state =
+ dynamic_cast<SplitMainSenderState *>(big_pkt->senderState);
+ assert(main_send_state);
+ // Record the fact that this packet is no longer outstanding.
+ assert(main_send_state->outstanding != 0);
+ main_send_state->outstanding--;
+
+ if (main_send_state->outstanding) {
+ return;
+ } else {
+ delete main_send_state;
+ big_pkt->senderState = NULL;
+ pkt = big_pkt;
+ }
+ }
+
+ assert(_status == DcacheWaitResponse || _status == DTBWaitResponse);
+ _status = Running;
+
Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
// keep an instruction count
@@ -724,7 +899,9 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
traceData = NULL;
}
- if (pkt->isRead() && pkt->isLocked()) {
+ // the locked flag may be cleared on the response packet, so check
+ // pkt->req and not pkt to see if it was a load-locked
+ if (pkt->isRead() && pkt->req->isLocked()) {
TheISA::handleLockedRead(thread, pkt->req);
}
@@ -760,7 +937,7 @@ TimingSimpleCPU::DcachePort::setPeer(Port *port)
#if FULL_SYSTEM
// Update the ThreadContext's memory ports (Functional/Virtual
// Ports)
- cpu->tcBase()->connectMemPorts();
+ cpu->tcBase()->connectMemPorts(cpu->tcBase());
#endif
}
@@ -771,10 +948,11 @@ TimingSimpleCPU::DcachePort::recvTiming(PacketPtr pkt)
// delay processing of returned data until next CPU clock edge
Tick next_tick = cpu->nextCycle(curTick);
- if (next_tick == curTick)
+ if (next_tick == curTick) {
cpu->completeDataAccess(pkt);
- else
+ } else {
tickEvent.schedule(pkt, next_tick);
+ }
return true;
}
@@ -804,17 +982,47 @@ TimingSimpleCPU::DcachePort::recvRetry()
assert(cpu->dcache_pkt != NULL);
assert(cpu->_status == DcacheRetry);
PacketPtr tmp = cpu->dcache_pkt;
- if (sendTiming(tmp)) {
+ if (tmp->senderState) {
+ // This is a packet from a split access.
+ SplitFragmentSenderState * send_state =
+ dynamic_cast<SplitFragmentSenderState *>(tmp->senderState);
+ assert(send_state);
+ PacketPtr big_pkt = send_state->bigPkt;
+
+ SplitMainSenderState * main_send_state =
+ dynamic_cast<SplitMainSenderState *>(big_pkt->senderState);
+ assert(main_send_state);
+
+ if (sendTiming(tmp)) {
+ // If we were able to send without retrying, record that fact
+ // and try sending the other fragment.
+ send_state->clearFromParent();
+ int other_index = main_send_state->getPendingFragment();
+ if (other_index > 0) {
+ tmp = main_send_state->fragments[other_index];
+ cpu->dcache_pkt = tmp;
+ if ((big_pkt->isRead() && cpu->handleReadPacket(tmp)) ||
+ (big_pkt->isWrite() && cpu->handleWritePacket())) {
+ main_send_state->fragments[other_index] = NULL;
+ }
+ } else {
+ cpu->_status = DcacheWaitResponse;
+ // memory system takes ownership of packet
+ cpu->dcache_pkt = NULL;
+ }
+ }
+ } else if (sendTiming(tmp)) {
cpu->_status = DcacheWaitResponse;
// memory system takes ownership of packet
cpu->dcache_pkt = NULL;
}
}
-TimingSimpleCPU::IprEvent::IprEvent(Packet *_pkt, TimingSimpleCPU *_cpu, Tick t)
- : Event(&mainEventQueue), pkt(_pkt), cpu(_cpu)
+TimingSimpleCPU::IprEvent::IprEvent(Packet *_pkt, TimingSimpleCPU *_cpu,
+ Tick t)
+ : pkt(_pkt), cpu(_cpu)
{
- schedule(t);
+ cpu->schedule(this, t);
}
void
@@ -844,36 +1052,10 @@ TimingSimpleCPU::printAddr(Addr a)
TimingSimpleCPU *
TimingSimpleCPUParams::create()
{
- TimingSimpleCPU::Params *params = new TimingSimpleCPU::Params();
- params->name = name;
- params->numberOfThreads = 1;
- params->max_insts_any_thread = max_insts_any_thread;
- params->max_insts_all_threads = max_insts_all_threads;
- params->max_loads_any_thread = max_loads_any_thread;
- params->max_loads_all_threads = max_loads_all_threads;
- params->progress_interval = progress_interval;
- params->deferRegistration = defer_registration;
- params->clock = clock;
- params->phase = phase;
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
- params->system = system;
- params->cpu_id = cpu_id;
- params->tracer = tracer;
-
- params->itb = itb;
- params->dtb = dtb;
-#if FULL_SYSTEM
- params->profile = profile;
- params->do_quiesce = do_quiesce;
- params->do_checkpoint_insts = do_checkpoint_insts;
- params->do_statistics_insts = do_statistics_insts;
-#else
+ numThreads = 1;
+#if !FULL_SYSTEM
if (workload.size() != 1)
panic("only one workload allowed");
- params->process = workload[0];
#endif
-
- TimingSimpleCPU *cpu = new TimingSimpleCPU(params);
- return cpu;
+ return new TimingSimpleCPU(this);
}
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index f8b77604a..a02ec48c9 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -33,40 +33,181 @@
#include "cpu/simple/base.hh"
+#include "params/TimingSimpleCPU.hh"
+
class TimingSimpleCPU : public BaseSimpleCPU
{
public:
- struct Params : public BaseSimpleCPU::Params {
- };
-
- TimingSimpleCPU(Params *params);
+ TimingSimpleCPU(TimingSimpleCPUParams * params);
virtual ~TimingSimpleCPU();
virtual void init();
public:
- //
- enum Status {
- Idle,
- Running,
- IcacheRetry,
- IcacheWaitResponse,
- IcacheWaitSwitch,
- DcacheRetry,
- DcacheWaitResponse,
- DcacheWaitSwitch,
- SwitchedOut
+ Event *drainEvent;
+
+ private:
+
+ /*
+ * If an access needs to be broken into fragments, currently at most two,
+ * the the following two classes are used as the sender state of the
+ * packets so the CPU can keep track of everything. In the main packet
+ * sender state, there's an array with a spot for each fragment. If a
+ * fragment has already been accepted by the CPU, aka isn't waiting for
+ * a retry, it's pointer is NULL. After each fragment has successfully
+ * been processed, the "outstanding" counter is decremented. Once the
+ * count is zero, the entire larger access is complete.
+ */
+ class SplitMainSenderState : public Packet::SenderState
+ {
+ public:
+ int outstanding;
+ PacketPtr fragments[2];
+
+ int
+ getPendingFragment()
+ {
+ if (fragments[0]) {
+ return 0;
+ } else if (fragments[1]) {
+ return 1;
+ } else {
+ return -1;
+ }
+ }
};
- protected:
- Status _status;
+ class SplitFragmentSenderState : public Packet::SenderState
+ {
+ public:
+ SplitFragmentSenderState(PacketPtr _bigPkt, int _index) :
+ bigPkt(_bigPkt), index(_index)
+ {}
+ PacketPtr bigPkt;
+ int index;
+
+ void
+ clearFromParent()
+ {
+ SplitMainSenderState * main_send_state =
+ dynamic_cast<SplitMainSenderState *>(bigPkt->senderState);
+ main_send_state->fragments[index] = NULL;
+ }
+ };
- Status status() const { return _status; }
+ class FetchTranslation : public BaseTLB::Translation
+ {
+ protected:
+ TimingSimpleCPU *cpu;
- Event *drainEvent;
+ public:
+ FetchTranslation(TimingSimpleCPU *_cpu) : cpu(_cpu)
+ {}
- private:
+ void finish(Fault fault, RequestPtr req,
+ ThreadContext *tc, bool write)
+ {
+ cpu->sendFetch(fault, req, tc);
+ }
+ };
+ FetchTranslation fetchTranslation;
+
+ class DataTranslation : public BaseTLB::Translation
+ {
+ protected:
+ TimingSimpleCPU *cpu;
+ uint8_t *data;
+ uint64_t *res;
+ bool read;
+
+ public:
+ DataTranslation(TimingSimpleCPU *_cpu,
+ uint8_t *_data, uint64_t *_res, bool _read) :
+ cpu(_cpu), data(_data), res(_res), read(_read)
+ {}
+
+ void
+ finish(Fault fault, RequestPtr req,
+ ThreadContext *tc, bool write)
+ {
+ cpu->sendData(fault, req, data, res, read);
+ delete this;
+ }
+ };
+
+ class SplitDataTranslation : public BaseTLB::Translation
+ {
+ public:
+ struct WholeTranslationState
+ {
+ public:
+ int outstanding;
+ RequestPtr requests[2];
+ RequestPtr mainReq;
+ Fault faults[2];
+ uint8_t *data;
+ bool read;
+
+ WholeTranslationState(RequestPtr req1, RequestPtr req2,
+ RequestPtr main, uint8_t *_data, bool _read)
+ {
+ outstanding = 2;
+ requests[0] = req1;
+ requests[1] = req2;
+ mainReq = main;
+ faults[0] = faults[1] = NoFault;
+ data = _data;
+ read = _read;
+ }
+ };
+
+ TimingSimpleCPU *cpu;
+ int index;
+ WholeTranslationState *state;
+
+ SplitDataTranslation(TimingSimpleCPU *_cpu, int _index,
+ WholeTranslationState *_state) :
+ cpu(_cpu), index(_index), state(_state)
+ {}
+
+ void
+ finish(Fault fault, RequestPtr req,
+ ThreadContext *tc, bool write)
+ {
+ assert(state);
+ assert(state->outstanding);
+ state->faults[index] = fault;
+ if (--state->outstanding == 0) {
+ cpu->sendSplitData(state->faults[0],
+ state->faults[1],
+ state->requests[0],
+ state->requests[1],
+ state->mainReq,
+ state->data,
+ state->read);
+ delete state;
+ }
+ delete this;
+ }
+ };
+
+ void sendData(Fault fault, RequestPtr req,
+ uint8_t *data, uint64_t *res, bool read);
+ void sendSplitData(Fault fault1, Fault fault2,
+ RequestPtr req1, RequestPtr req2, RequestPtr req,
+ uint8_t *data, bool read);
+
+ void translationFault(Fault fault);
+
+ void buildPacket(PacketPtr &pkt, RequestPtr req, bool read);
+ void buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
+ RequestPtr req1, RequestPtr req2, RequestPtr req,
+ uint8_t *data, bool read);
+
+ bool handleReadPacket(PacketPtr pkt);
+ // This function always implicitly uses dcache_pkt.
+ bool handleWritePacket();
class CpuPort : public Port
{
@@ -99,8 +240,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
PacketPtr pkt;
TimingSimpleCPU *cpu;
- TickEvent(TimingSimpleCPU *_cpu)
- :Event(&mainEventQueue), cpu(_cpu) {}
+ TickEvent(TimingSimpleCPU *_cpu) : cpu(_cpu) {}
const char *description() const { return "Timing CPU tick"; }
void schedule(PacketPtr _pkt, Tick t);
};
@@ -189,18 +329,13 @@ class TimingSimpleCPU : public BaseSimpleCPU
template <class T>
Fault read(Addr addr, T &data, unsigned flags);
- Fault translateDataReadAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags);
-
template <class T>
Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
- Fault translateDataWriteAddr(Addr vaddr, Addr &paddr,
- int size, unsigned flags);
-
void fetch();
+ void sendFetch(Fault fault, RequestPtr req, ThreadContext *tc);
void completeIfetch(PacketPtr );
- void completeDataAccess(PacketPtr );
+ void completeDataAccess(PacketPtr pkt);
void advanceInst(Fault fault);
/**
@@ -212,7 +347,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
private:
typedef EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch> FetchEvent;
- FetchEvent *fetchEvent;
+ FetchEvent fetchEvent;
struct IprEvent : Event {
Packet *pkt;