x86: Rework how VEX prefixes are decoded.

Remove redundant information from the ExtMachInst, hash the vex information to ensure the decode cache works properly, print the vex info when printing an ExtMachInst, consider the vex info when comparing two ExtMachInsts, fold the info from the vex prefixes into existing settings, remove redundant decode code, handle vex prefixes one byte at a time and don't bother building up the entire prefix, and let instructions that care about vex use it in their implementation, instead of developing an entire parallel decode tree. This also eliminates the error prone vex immediate decode table which was incomplete and would result in an out of bounds access for incorrectly encoded instructions or when the CPU was mispeculating, as it was (as far as I can tell) redundant with the tables that already existed for two and three byte opcodes. There were differences, but I think those may have been mistakes based on the documentation I found. Also, in 32 bit mode, the VEX prefixes might actually be LDS or LES instructions which are still legal in that mode. A valid VEX prefix would look like an LDS/LES with an otherwise invalid modrm encoding, so use that as a signal to abort processing the VEX and turn the instruction into an LES/LDS as appropriate. Change-Id: Icb367eaaa35590692df1c98862f315da4c139f5c Reviewed-on: https://gem5-review.googlesource.com/3501 Reviewed-by: Joe Gross <joe.gross@amd.com> Reviewed-by: Jason Lowe-Power <jason@lowepower.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
author: Gabe Black <gabeblack@google.com> 2017-05-24 03:09:56 -0700
committer: Gabe Black <gabeblack@google.com> 2017-05-26 20:01:03 +0000
commit: 7159ea669825a2a876dc3f0f2022336b517299a0 (patch)
tree: 24775f9c47c96ccfcf82dcebb815bf69caafbfc2 /src/arch/x86/decoder.cc
parent: 91228e9b222513ffc8008558fd4b3f468cccdbbe (diff)
download: gem5-7159ea669825a2a876dc3f0f2022336b517299a0.tar.xz
1 files changed, 126 insertions, 93 deletions
diff --git a/src/arch/x86/decoder.cc b/src/arch/x86/decoder.cc
index 324eb0219..930c2b951 100644
--- a/src/arch/x86/decoder.cc
+++ b/src/arch/x86/decoder.cc
@@ -96,19 +96,18 @@ Decoder::process()
           case PrefixState:
             state = doPrefixState(nextByte);
             break;
-
-          case TwoByteVexState:
-            state = doTwoByteVexState(nextByte);
+          case Vex2Of2State:
+            state = doVex2Of2State(nextByte);
             break;
-
-          case ThreeByteVexFirstState:
-            state = doThreeByteVexFirstState(nextByte);
+          case Vex2Of3State:
+            state = doVex2Of3State(nextByte);
             break;
-
-          case ThreeByteVexSecondState:
-            state = doThreeByteVexSecondState(nextByte);
+          case Vex3Of3State:
+            state = doVex3Of3State(nextByte);
+            break;
+          case VexOpcodeState:
+            state = doVexOpcodeState(nextByte);
             break;
-
           case OneByteOpcodeState:
             state = doOneByteOpcodeState(nextByte);
             break;
@@ -222,19 +221,16 @@ Decoder::doPrefixState(uint8_t nextByte)
         DPRINTF(Decoder, "Found Rex prefix %#x.\n", nextByte);
         emi.rex = nextByte;
         break;
-
       case Vex2Prefix:
         DPRINTF(Decoder, "Found VEX two-byte prefix %#x.\n", nextByte);
-        emi.vex.zero = nextByte;
-        nextState = TwoByteVexState;
+        emi.vex.present = 1;
+        nextState = Vex2Of2State;
         break;
-
       case Vex3Prefix:
         DPRINTF(Decoder, "Found VEX three-byte prefix %#x.\n", nextByte);
-        emi.vex.zero = nextByte;
-        nextState = ThreeByteVexFirstState;
+        emi.vex.present = 1;
+        nextState = Vex2Of3State;
         break;
-
       case 0:
         nextState = OneByteOpcodeState;
         break;
@@ -246,42 +242,132 @@ Decoder::doPrefixState(uint8_t nextByte)
 }
 
 Decoder::State
-Decoder::doTwoByteVexState(uint8_t nextByte)
+Decoder::doVex2Of2State(uint8_t nextByte)
 {
-    assert(emi.vex.zero == 0xc5);
     consumeByte();
-    TwoByteVex tbe = 0;
-    tbe.first = nextByte;
+    Vex2Of2 vex = nextByte;
+
+    emi.rex.r = !vex.r;
 
-    emi.vex.first.r = tbe.first.r;
-    emi.vex.first.x = 1;
-    emi.vex.first.b = 1;
-    emi.vex.first.map_select = 1;
+    emi.vex.l = vex.l;
+    emi.vex.v = ~vex.v;
+
+    switch (vex.p) {
+      case 0:
+        break;
+      case 1:
+        emi.legacy.op = 1;
+        break;
+      case 2:
+        emi.legacy.rep = 1;
+        break;
+      case 3:
+        emi.legacy.repne = 1;
+        break;
+    }
 
-    emi.vex.second.w = 0;
-    emi.vex.second.vvvv = tbe.first.vvvv;
-    emi.vex.second.l = tbe.first.l;
-    emi.vex.second.pp = tbe.first.pp;
+    emi.opcode.type = TwoByteOpcode;
 
-    emi.opcode.type = Vex;
-    return OneByteOpcodeState;
+    return VexOpcodeState;
 }
 
 Decoder::State
-Decoder::doThreeByteVexFirstState(uint8_t nextByte)
+Decoder::doVex2Of3State(uint8_t nextByte)
 {
+    if (emi.mode.submode != SixtyFourBitMode && bits(nextByte, 7, 6) == 0x3) {
+        // This was actually an LDS instruction. Reroute to that path.
+        emi.vex.present = 0;
+        emi.opcode.type = OneByteOpcode;
+        emi.opcode.op = 0xC4;
+        return processOpcode(ImmediateTypeOneByte, UsesModRMOneByte,
+                             nextByte >= 0xA0 && nextByte <= 0xA3);
+    }
+
     consumeByte();
-    emi.vex.first = nextByte;
-    return ThreeByteVexSecondState;
+    Vex2Of3 vex = nextByte;
+
+    emi.rex.r = !vex.r;
+    emi.rex.x = !vex.x;
+    emi.rex.b = !vex.b;
+
+    switch (vex.m) {
+      case 1:
+        emi.opcode.type = TwoByteOpcode;
+        break;
+      case 2:
+        emi.opcode.type = ThreeByte0F38Opcode;
+        break;
+      case 3:
+        emi.opcode.type = ThreeByte0F3AOpcode;
+        break;
+      default:
+        // These encodings are reserved. Pretend this was an undefined
+        // instruction so the main decoder will behave correctly, and stop
+        // trying to interpret bytes.
+        emi.opcode.type = TwoByteOpcode;
+        emi.opcode.op = 0x0B;
+        instDone = true;
+        return ResetState;
+    }
+    return Vex3Of3State;
 }
 
 Decoder::State
-Decoder::doThreeByteVexSecondState(uint8_t nextByte)
+Decoder::doVex3Of3State(uint8_t nextByte)
 {
+    if (emi.mode.submode != SixtyFourBitMode && bits(nextByte, 7, 6) == 0x3) {
+        // This was actually an LES instruction. Reroute to that path.
+        emi.vex.present = 0;
+        emi.opcode.type = OneByteOpcode;
+        emi.opcode.op = 0xC5;
+        return processOpcode(ImmediateTypeOneByte, UsesModRMOneByte,
+                             nextByte >= 0xA0 && nextByte <= 0xA3);
+    }
+
     consumeByte();
-    emi.vex.second = nextByte;
-    emi.opcode.type = Vex;
-    return OneByteOpcodeState;
+    Vex3Of3 vex = nextByte;
+
+    emi.rex.w = vex.w;
+
+    emi.vex.l = vex.l;
+    emi.vex.v = ~vex.v;
+
+    switch (vex.p) {
+      case 0:
+        break;
+      case 1:
+        emi.legacy.op = 1;
+        break;
+      case 2:
+        emi.legacy.rep = 1;
+        break;
+      case 3:
+        emi.legacy.repne = 1;
+        break;
+    }
+
+    return VexOpcodeState;
+}
+
+Decoder::State
+Decoder::doVexOpcodeState(uint8_t nextByte)
+{
+    DPRINTF(Decoder, "Found VEX opcode %#x.\n", nextByte);
+
+    emi.opcode.op = nextByte;
+
+    switch (emi.opcode.type) {
+      case TwoByteOpcode:
+        return processOpcode(ImmediateTypeTwoByte, UsesModRMTwoByte);
+      case ThreeByte0F38Opcode:
+        return processOpcode(ImmediateTypeThreeByte0F38,
+                             UsesModRMThreeByte0F38);
+      case ThreeByte0F3AOpcode:
+        return processOpcode(ImmediateTypeThreeByte0F3A,
+                             UsesModRMThreeByte0F3A);
+      default:
+        panic("Unrecognized opcode type %d.\n", emi.opcode.type);
+    }
 }
 
 // Load the first opcode byte. Determine if there are more opcode bytes, and
@@ -292,14 +378,9 @@ Decoder::doOneByteOpcodeState(uint8_t nextByte)
     State nextState = ErrorState;
     consumeByte();
 
-    if (emi.vex.zero != 0) {
-        DPRINTF(Decoder, "Found VEX opcode %#x.\n", nextByte);
-        emi.opcode.op = nextByte;
-        const uint8_t opcode_map = emi.vex.first.map_select;
-        nextState = processExtendedOpcode(ImmediateTypeVex[opcode_map]);
-    } else if (nextByte == 0x0f) {
-        nextState = TwoByteOpcodeState;
+    if (nextByte == 0x0f) {
         DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
+        nextState = TwoByteOpcodeState;
     } else {
         DPRINTF(Decoder, "Found one byte opcode %#x.\n", nextByte);
         emi.opcode.type = OneByteOpcode;
@@ -421,54 +502,6 @@ Decoder::processOpcode(ByteTable &immTable, ByteTable &modrmTable,
     return nextState;
 }
 
-Decoder::State
-Decoder::processExtendedOpcode(ByteTable &immTable)
-{
-    //Figure out the effective operand size. This can be overriden to
-    //a fixed value at the decoder level.
-    int logOpSize;
-    if (emi.vex.second.w)
-        logOpSize = 3; // 64 bit operand size
-    else if (emi.vex.second.pp == 1)
-        logOpSize = altOp;
-    else
-        logOpSize = defOp;
-
-    //Set the actual op size
-    emi.opSize = 1 << logOpSize;
-
-    //Figure out the effective address size. This can be overriden to
-    //a fixed value at the decoder level.
-    int logAddrSize;
-    if (emi.legacy.addr)
-        logAddrSize = altAddr;
-    else
-        logAddrSize = defAddr;
-
-    //Set the actual address size
-    emi.addrSize = 1 << logAddrSize;
-
-    //Figure out the effective stack width. This can be overriden to
-    //a fixed value at the decoder level.
-    emi.stackSize = 1 << stack;
-
-    //Figure out how big of an immediate we'll retreive based
-    //on the opcode.
-    const uint8_t opcode = emi.opcode.op;
-
-    if (emi.vex.zero == 0xc5 || emi.vex.zero == 0xc4) {
-        int immType = immTable[opcode];
-        // Assume 64-bit mode;
-        immediateSize = SizeTypeToSize[2][immType];
-    }
-
-    if (opcode == 0x77) {
-        instDone = true;
-        return ResetState;
-    }
-    return ModRMState;
-}
-
 //Get the ModRM byte and determine what displacement, if any, there is.
 //Also determine whether or not to get the SIB byte, displacement, or
 //immediate next.
author	Gabe Black <gabeblack@google.com>	2017-05-24 03:09:56 -0700
committer	Gabe Black <gabeblack@google.com>	2017-05-26 20:01:03 +0000
commit	7159ea669825a2a876dc3f0f2022336b517299a0 (patch)
tree	24775f9c47c96ccfcf82dcebb815bf69caafbfc2 /src/arch/x86/decoder.cc
parent	91228e9b222513ffc8008558fd4b3f468cccdbbe (diff)
download	gem5-7159ea669825a2a876dc3f0f2022336b517299a0.tar.xz