x86: Rework opcode parsing to support 3 byte opcodes properly.

Instead of counting the number of opcode bytes in an instruction and recording each byte before the actual opcode, we can represent the path we took to get to the actual opcode byte by using a type code. That has a couple of advantages. First, we can disambiguate the properties of opcodes of the same length which have different properties. Second, it reduces the amount of data stored in an ExtMachInst, making them slightly easier/faster to create and process. This also adds some flexibility as far as how different types of opcodes are handled, which might come in handy if we decide to support VEX or XOP instructions. This change also adds tables to support properly decoding 3 byte opcodes. Before we would fall off the end of some arrays, on top of the ambiguity described above. This change doesn't measureably affect performance on the twolf benchmark. --HG-- rename : src/arch/x86/isa/decoder/three_byte_opcodes.isa => src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa rename : src/arch/x86/isa/decoder/three_byte_opcodes.isa => src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa
author: Gabe Black <gabeblack@google.com> 2014-12-04 15:53:54 -0800
committer: Gabe Black <gabeblack@google.com> 2014-12-04 15:53:54 -0800
commit: 22aaa5867f2449e2a73b7891fc34072c12c199b3 (patch)
tree: 6083ecfd34307076c8d9c55a450e6cc118025b95 /src/arch/x86/decoder.cc
parent: 3069c28a021d3f8c29221e537d48ee382c30646f (diff)
download: gem5-22aaa5867f2449e2a73b7891fc34072c12c199b3.tar.xz
1 files changed, 126 insertions, 65 deletions
diff --git a/src/arch/x86/decoder.cc b/src/arch/x86/decoder.cc
index f42fb28bf..59f2e0f4f 100644
--- a/src/arch/x86/decoder.cc
+++ b/src/arch/x86/decoder.cc
@@ -48,9 +48,8 @@ Decoder::doResetState()
 
     emi.rex = 0;
     emi.legacy = 0;
-    emi.opcode.num = 0;
+    emi.opcode.type = BadOpcode;
     emi.opcode.op = 0;
-    emi.opcode.prefixA = emi.opcode.prefixB = 0;
 
     immediateCollected = 0;
     emi.immediate = 0;
@@ -94,8 +93,17 @@ Decoder::process()
           case PrefixState:
             state = doPrefixState(nextByte);
             break;
-          case OpcodeState:
-            state = doOpcodeState(nextByte);
+          case OneByteOpcodeState:
+            state = doOneByteOpcodeState(nextByte);
+            break;
+          case TwoByteOpcodeState:
+            state = doTwoByteOpcodeState(nextByte);
+            break;
+          case ThreeByte0F38OpcodeState:
+            state = doThreeByte0F38OpcodeState(nextByte);
+            break;
+          case ThreeByte0F3AOpcodeState:
+            state = doThreeByte0F3AOpcodeState(nextByte);
             break;
           case ModRMState:
             state = doModRMState(nextByte);
@@ -199,7 +207,7 @@ Decoder::doPrefixState(uint8_t nextByte)
         emi.rex = nextByte;
         break;
       case 0:
-        nextState = OpcodeState;
+        nextState = OneByteOpcodeState;
         break;
       default:
         panic("Unrecognized prefix %#x\n", nextByte);
@@ -207,79 +215,132 @@ Decoder::doPrefixState(uint8_t nextByte)
     return nextState;
 }
 
-//Load all the opcodes (currently up to 2) and then figure out
-//what immediate and/or ModRM is needed.
+// Load the first opcode byte. Determine if there are more opcode bytes, and
+// if not, what immediate and/or ModRM is needed.
 Decoder::State
-Decoder::doOpcodeState(uint8_t nextByte)
+Decoder::doOneByteOpcodeState(uint8_t nextByte)
 {
     State nextState = ErrorState;
-    emi.opcode.num++;
-    //We can't handle 3+ byte opcodes right now
-    assert(emi.opcode.num < 4);
     consumeByte();
-    if(emi.opcode.num == 1 && nextByte == 0x0f)
-    {
-        nextState = OpcodeState;
-        DPRINTF(Decoder, "Found two byte opcode.\n");
-        emi.opcode.prefixA = nextByte;
-    }
-    else if(emi.opcode.num == 2 && (nextByte == 0x38 || nextByte == 0x3A))
-    {
-        nextState = OpcodeState;
-        DPRINTF(Decoder, "Found three byte opcode.\n");
-        emi.opcode.prefixB = nextByte;
+    if (nextByte == 0x0f) {
+        nextState = TwoByteOpcodeState;
+        DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+    } else {
+        DPRINTF(Decoder, "Found one byte opcode %#x.\n", nextByte);
+        emi.opcode.type = OneByteOpcode;
+        emi.opcode.op = nextByte;
+
+        nextState = processOpcode(ImmediateTypeOneByte, UsesModRMOneByte,
+                                  nextByte >= 0xA0 && nextByte <= 0xA3);
     }
-    else
-    {
-        DPRINTF(Decoder, "Found opcode %#x.\n", nextByte);
+    return nextState;
+}
+
+// Load the second opcode byte. Determine if there are more opcode bytes, and
+// if not, what immediate and/or ModRM is needed.
+Decoder::State
+Decoder::doTwoByteOpcodeState(uint8_t nextByte)
+{
+    State nextState = ErrorState;
+    consumeByte();
+    if (nextByte == 0x38) {
+        nextState = ThreeByte0F38OpcodeState;
+        DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+    } else if (nextByte == 0x3a) {
+        nextState = ThreeByte0F3AOpcodeState;
+        DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+    } else {
+        DPRINTF(Decoder, "Found two byte opcode %#x.\n", nextByte);
+        emi.opcode.type = TwoByteOpcode;
         emi.opcode.op = nextByte;
 
-        //Figure out the effective operand size. This can be overriden to
-        //a fixed value at the decoder level.
-        int logOpSize;
-        if (emi.rex.w)
-            logOpSize = 3; // 64 bit operand size
-        else if (emi.legacy.op)
-            logOpSize = altOp;
-        else
-            logOpSize = defOp;
+        nextState = processOpcode(ImmediateTypeTwoByte, UsesModRMTwoByte);
+    }
+    return nextState;
+}
 
-        //Set the actual op size
-        emi.opSize = 1 << logOpSize;
+// Load the third opcode byte and determine what immediate and/or ModRM is
+// needed.
+Decoder::State
+Decoder::doThreeByte0F38OpcodeState(uint8_t nextByte)
+{
+    consumeByte();
 
-        //Figure out the effective address size. This can be overriden to
-        //a fixed value at the decoder level.
-        int logAddrSize;
-        if(emi.legacy.addr)
-            logAddrSize = altAddr;
-        else
-            logAddrSize = defAddr;
+    DPRINTF(Decoder, "Found three byte 0F38 opcode %#x.\n", nextByte);
+    emi.opcode.type = ThreeByte0F38Opcode;
+    emi.opcode.op = nextByte;
 
-        //Set the actual address size
-        emi.addrSize = 1 << logAddrSize;
+    return processOpcode(ImmediateTypeThreeByte0F38, UsesModRMThreeByte0F38);
+}
 
-        //Figure out the effective stack width. This can be overriden to
-        //a fixed value at the decoder level.
-        emi.stackSize = 1 << stack;
+// Load the third opcode byte and determine what immediate and/or ModRM is
+// needed.
+Decoder::State
+Decoder::doThreeByte0F3AOpcodeState(uint8_t nextByte)
+{
+    consumeByte();
 
-        //Figure out how big of an immediate we'll retreive based
-        //on the opcode.
-        int immType = ImmediateType[emi.opcode.num - 1][nextByte];
-        if (emi.opcode.num == 1 && nextByte >= 0xA0 && nextByte <= 0xA3)
-            immediateSize = SizeTypeToSize[logAddrSize - 1][immType];
-        else
-            immediateSize = SizeTypeToSize[logOpSize - 1][immType];
+    DPRINTF(Decoder, "Found three byte 0F3A opcode %#x.\n", nextByte);
+    emi.opcode.type = ThreeByte0F3AOpcode;
+    emi.opcode.op = nextByte;
+
+    return processOpcode(ImmediateTypeThreeByte0F3A, UsesModRMThreeByte0F3A);
+}
+
+// Generic opcode processing which determines the immediate size, and whether
+// or not there's a modrm byte.
+Decoder::State
+Decoder::processOpcode(ByteTable &immTable, ByteTable &modrmTable,
+                       bool addrSizedImm)
+{
+    State nextState = ErrorState;
+    const uint8_t opcode = emi.opcode.op;
+
+    //Figure out the effective operand size. This can be overriden to
+    //a fixed value at the decoder level.
+    int logOpSize;
+    if (emi.rex.w)
+        logOpSize = 3; // 64 bit operand size
+    else if (emi.legacy.op)
+        logOpSize = altOp;
+    else
+        logOpSize = defOp;
 
-        //Determine what to expect next
-        if (UsesModRM[emi.opcode.num - 1][nextByte]) {
-            nextState = ModRMState;
+    //Set the actual op size
+    emi.opSize = 1 << logOpSize;
+
+    //Figure out the effective address size. This can be overriden to
+    //a fixed value at the decoder level.
+    int logAddrSize;
+    if(emi.legacy.addr)
+        logAddrSize = altAddr;
+    else
+        logAddrSize = defAddr;
+
+    //Set the actual address size
+    emi.addrSize = 1 << logAddrSize;
+
+    //Figure out the effective stack width. This can be overriden to
+    //a fixed value at the decoder level.
+    emi.stackSize = 1 << stack;
+
+    //Figure out how big of an immediate we'll retreive based
+    //on the opcode.
+    int immType = immTable[opcode];
+    if (addrSizedImm)
+        immediateSize = SizeTypeToSize[logAddrSize - 1][immType];
+    else
+        immediateSize = SizeTypeToSize[logOpSize - 1][immType];
+
+    //Determine what to expect next
+    if (modrmTable[opcode]) {
+        nextState = ModRMState;
+    } else {
+        if(immediateSize) {
+            nextState = ImmediateState;
         } else {
-            if(immediateSize) {
-                nextState = ImmediateState;
-            } else {
-                instDone = true;
-                nextState = ResetState;
-            }
+            instDone = true;
+            nextState = ResetState;
         }
     }
     return nextState;
@@ -315,7 +376,7 @@ Decoder::doModRMState(uint8_t nextByte)
 
     // The "test" instruction in group 3 needs an immediate, even though
     // the other instructions with the same actual opcode don't.
-    if (emi.opcode.num == 1 && (modRM.reg & 0x6) == 0) {
+    if (emi.opcode.type == OneByteOpcode && (modRM.reg & 0x6) == 0) {
        if (emi.opcode.op == 0xF6)
            immediateSize = 1;
        else if (emi.opcode.op == 0xF7)
author	Gabe Black <gabeblack@google.com>	2014-12-04 15:53:54 -0800
committer	Gabe Black <gabeblack@google.com>	2014-12-04 15:53:54 -0800
commit	22aaa5867f2449e2a73b7891fc34072c12c199b3 (patch)
tree	6083ecfd34307076c8d9c55a450e6cc118025b95 /src/arch/x86/decoder.cc
parent	3069c28a021d3f8c29221e537d48ee382c30646f (diff)
download	gem5-22aaa5867f2449e2a73b7891fc34072c12c199b3.tar.xz