summaryrefslogtreecommitdiff
path: root/src/arch/x86/decoder.cc
diff options
context:
space:
mode:
authorGabe Black <gblack@eecs.umich.edu>2013-01-04 19:00:44 -0600
committerGabe Black <gblack@eecs.umich.edu>2013-01-04 19:00:44 -0600
commitd1965af22045d2a62b1cd1bc473b836413d79b46 (patch)
tree3b5a67ba03489fc8789923b724bc04f04070bea3 /src/arch/x86/decoder.cc
parent63b10907ef822aa6873d7f4f4d1ee849a188b2c4 (diff)
downloadgem5-d1965af22045d2a62b1cd1bc473b836413d79b46.tar.xz
X86: Move address based decode caching in front of the predecoder.
The predecoder in x86 does a lot of work, most of which can be skipped if the decoder cache is put in front of it. Committed by: Nilay Vaish <nilay@cs.wisc.edu>
Diffstat (limited to 'src/arch/x86/decoder.cc')
-rw-r--r--src/arch/x86/decoder.cc157
1 files changed, 123 insertions, 34 deletions
diff --git a/src/arch/x86/decoder.cc b/src/arch/x86/decoder.cc
index 5fb2dcaf4..9dcb02902 100644
--- a/src/arch/x86/decoder.cc
+++ b/src/arch/x86/decoder.cc
@@ -38,10 +38,15 @@
namespace X86ISA
{
-void Decoder::doReset()
+
+Decoder::State
+Decoder::doResetState()
{
origPC = basePC + offset;
DPRINTF(Decoder, "Setting origPC to %#x\n", origPC);
+ instBytes = &decodePages->lookup(origPC);
+ chunkIdx = 0;
+
emi.rex = 0;
emi.legacy = 0;
emi.opcode.num = 0;
@@ -55,12 +60,17 @@ void Decoder::doReset()
emi.modRM = 0;
emi.sib = 0;
- m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
- emi.mode.mode = m5Reg.mode;
- emi.mode.submode = m5Reg.submode;
+
+ if (instBytes->si) {
+ return FromCacheState;
+ } else {
+ instBytes->chunks.clear();
+ return PrefixState;
+ }
}
-void Decoder::process()
+void
+Decoder::process()
{
//This function drives the decoder state machine.
@@ -70,15 +80,18 @@ void Decoder::process()
assert(!outOfBytes);
assert(!instDone);
+ if (state == ResetState)
+ state = doResetState();
+ if (state == FromCacheState) {
+ state = doFromCacheState();
+ } else {
+ instBytes->chunks.push_back(fetchChunk);
+ }
+
//While there's still something to do...
- while(!instDone && !outOfBytes)
- {
+ while (!instDone && !outOfBytes) {
uint8_t nextByte = getNextByte();
- switch(state)
- {
- case ResetState:
- doReset();
- state = PrefixState;
+ switch (state) {
case PrefixState:
state = doPrefixState(nextByte);
break;
@@ -105,9 +118,42 @@ void Decoder::process()
}
}
+Decoder::State
+Decoder::doFromCacheState()
+{
+ DPRINTF(Decoder, "Looking at cache state.\n");
+ if ((fetchChunk & instBytes->masks[chunkIdx]) !=
+ instBytes->chunks[chunkIdx]) {
+ DPRINTF(Decoder, "Decode cache miss.\n");
+ // The chached chunks didn't match what was fetched. Fall back to the
+ // predecoder.
+ instBytes->chunks[chunkIdx] = fetchChunk;
+ instBytes->chunks.resize(chunkIdx + 1);
+ instBytes->si = NULL;
+ chunkIdx = 0;
+ fetchChunk = instBytes->chunks[0];
+ offset = origPC % sizeof(MachInst);
+ basePC = origPC - offset;
+ return PrefixState;
+ } else if (chunkIdx == instBytes->chunks.size() - 1) {
+ // We matched the cache, so use its value.
+ instDone = true;
+ offset = instBytes->lastOffset;
+ if (offset == sizeof(MachInst))
+ outOfBytes = true;
+ return ResetState;
+ } else {
+ // We matched so far, but need to check more chunks.
+ chunkIdx++;
+ outOfBytes = true;
+ return FromCacheState;
+ }
+}
+
//Either get a prefix and record it in the ExtMachInst, or send the
//state machine on to get the opcode(s).
-Decoder::State Decoder::doPrefixState(uint8_t nextByte)
+Decoder::State
+Decoder::doPrefixState(uint8_t nextByte)
{
uint8_t prefix = Prefixes[nextByte];
State nextState = PrefixState;
@@ -164,7 +210,8 @@ Decoder::State Decoder::doPrefixState(uint8_t nextByte)
//Load all the opcodes (currently up to 2) and then figure out
//what immediate and/or ModRM is needed.
-Decoder::State Decoder::doOpcodeState(uint8_t nextByte)
+Decoder::State
+Decoder::doOpcodeState(uint8_t nextByte)
{
State nextState = ErrorState;
emi.opcode.num++;
@@ -194,9 +241,9 @@ Decoder::State Decoder::doOpcodeState(uint8_t nextByte)
if (emi.rex.w)
logOpSize = 3; // 64 bit operand size
else if (emi.legacy.op)
- logOpSize = m5Reg.altOp;
+ logOpSize = altOp;
else
- logOpSize = m5Reg.defOp;
+ logOpSize = defOp;
//Set the actual op size
emi.opSize = 1 << logOpSize;
@@ -205,16 +252,16 @@ Decoder::State Decoder::doOpcodeState(uint8_t nextByte)
//a fixed value at the decoder level.
int logAddrSize;
if(emi.legacy.addr)
- logAddrSize = m5Reg.altAddr;
+ logAddrSize = altAddr;
else
- logAddrSize = m5Reg.defAddr;
+ logAddrSize = defAddr;
//Set the actual address size
emi.addrSize = 1 << logAddrSize;
//Figure out the effective stack width. This can be overriden to
//a fixed value at the decoder level.
- emi.stackSize = 1 << m5Reg.stack;
+ emi.stackSize = 1 << stack;
//Figure out how big of an immediate we'll retreive based
//on the opcode.
@@ -242,13 +289,14 @@ Decoder::State Decoder::doOpcodeState(uint8_t nextByte)
//Get the ModRM byte and determine what displacement, if any, there is.
//Also determine whether or not to get the SIB byte, displacement, or
//immediate next.
-Decoder::State Decoder::doModRMState(uint8_t nextByte)
+Decoder::State
+Decoder::doModRMState(uint8_t nextByte)
{
State nextState = ErrorState;
ModRM modRM;
modRM = nextByte;
DPRINTF(Decoder, "Found modrm byte %#x.\n", nextByte);
- if (m5Reg.defOp == 1) {
+ if (defOp == 1) {
//figure out 16 bit displacement size
if ((modRM.mod == 0 && modRM.rm == 6) || modRM.mod == 2)
displacementSize = 2;
@@ -297,7 +345,8 @@ Decoder::State Decoder::doModRMState(uint8_t nextByte)
//Get the SIB byte. We don't do anything with it at this point, other
//than storing it in the ExtMachInst. Determine if we need to get a
//displacement or immediate next.
-Decoder::State Decoder::doSIBState(uint8_t nextByte)
+Decoder::State
+Decoder::doSIBState(uint8_t nextByte)
{
State nextState = ErrorState;
emi.sib = nextByte;
@@ -318,7 +367,8 @@ Decoder::State Decoder::doSIBState(uint8_t nextByte)
//Gather up the displacement, or at least as much of it
//as we can get.
-Decoder::State Decoder::doDisplacementState()
+Decoder::State
+Decoder::doDisplacementState()
{
State nextState = ErrorState;
@@ -365,7 +415,8 @@ Decoder::State Decoder::doDisplacementState()
//Gather up the immediate, or at least as much of it
//as we can get
-Decoder::State Decoder::doImmediateState()
+Decoder::State
+Decoder::doImmediateState()
{
State nextState = ErrorState;
@@ -408,24 +459,62 @@ Decoder::State Decoder::doImmediateState()
return nextState;
}
-DecodeCache::InstMap Decoder::instMap;
-DecodeCache::AddrMap<StaticInstPtr> Decoder::decodePages;
+Decoder::InstBytes Decoder::dummy;
+Decoder::InstCacheMap Decoder::instCacheMap;
StaticInstPtr
Decoder::decode(ExtMachInst mach_inst, Addr addr)
{
- StaticInstPtr &si = decodePages.lookup(addr);
- if (si && (si->machInst == mach_inst))
- return si;
+ DecodeCache::InstMap::iterator iter = instMap->find(mach_inst);
+ if (iter != instMap->end())
+ return iter->second;
+
+ StaticInstPtr si = decodeInst(mach_inst);
+ (*instMap)[mach_inst] = si;
+ return si;
+}
+
+StaticInstPtr
+Decoder::decode(PCState &nextPC)
+{
+ if (!instDone)
+ return NULL;
+ instDone = false;
+ updateNPC(nextPC);
- DecodeCache::InstMap::iterator iter = instMap.find(mach_inst);
- if (iter != instMap.end()) {
- si = iter->second;
+ StaticInstPtr &si = instBytes->si;
+ if (si)
return si;
+
+ // We didn't match in the AddrMap, but we still populated an entry. Fix
+ // up its byte masks.
+ const int chunkSize = sizeof(MachInst);
+
+ instBytes->lastOffset = offset;
+
+ Addr firstBasePC = basePC - (instBytes->chunks.size() - 1) * chunkSize;
+ Addr firstOffset = origPC - firstBasePC;
+ Addr totalSize = instBytes->lastOffset - firstOffset +
+ (instBytes->chunks.size() - 1) * chunkSize;
+ int start = firstOffset;
+ instBytes->masks.clear();
+
+ while (totalSize) {
+ int end = start + totalSize;
+ end = (chunkSize < end) ? chunkSize : end;
+ int size = end - start;
+ int idx = instBytes->masks.size();
+
+ MachInst maskVal = mask(size * 8) << (start * 8);
+ assert(maskVal);
+
+ instBytes->masks.push_back(maskVal);
+ instBytes->chunks[idx] &= instBytes->masks[idx];
+ totalSize -= size;
+ start = 0;
}
- si = decodeInst(mach_inst);
- instMap[mach_inst] = si;
+ si = decode(emi, origPC);
return si;
}