summaryrefslogtreecommitdiff
path: root/src/arch
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch')
-rw-r--r--src/arch/x86/SConscript1
-rw-r--r--src/arch/x86/insts/microldstop.hh4
-rw-r--r--src/arch/x86/insts/micromediaop.cc63
-rw-r--r--src/arch/x86/insts/micromediaop.hh113
-rw-r--r--src/arch/x86/isa/decoder/decoder.isa20
-rw-r--r--src/arch/x86/isa/decoder/locked_opcodes.isa4
-rw-r--r--src/arch/x86/isa/decoder/three_byte_opcodes.isa151
-rw-r--r--src/arch/x86/isa/decoder/two_byte_opcodes.isa1346
-rw-r--r--src/arch/x86/isa/decoder/x87.isa4
-rw-r--r--src/arch/x86/isa/includes.isa2
-rw-r--r--src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py447
-rw-r--r--src/arch/x86/isa/insts/general_purpose/data_transfer/move.py11
-rw-r--r--src/arch/x86/isa/insts/general_purpose/semaphores.py134
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py147
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py63
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py74
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py71
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py89
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py43
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py74
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py206
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py13
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py17
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py91
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py46
-rw-r--r--src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py45
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py167
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py61
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py83
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py32
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py167
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py20
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py125
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py83
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py33
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py39
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py34
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py24
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py65
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py53
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py154
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py39
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py6
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py10
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py22
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/logical/pand.py45
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/logical/por.py22
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py23
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py73
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py47
-rw-r--r--src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py73
-rw-r--r--src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py15
-rw-r--r--src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py31
-rw-r--r--src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py5
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py127
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py31
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py79
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py21
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py127
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py15
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py95
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py63
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_conversion.py3
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py20
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py47
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py18
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py95
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py51
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py5
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py7
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py15
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/logical/pand.py31
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/logical/por.py15
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py59
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py39
-rw-r--r--src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py59
-rw-r--r--src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py8
-rw-r--r--src/arch/x86/isa/microasm.isa4
-rw-r--r--src/arch/x86/isa/microops/ldstop.isa3
-rw-r--r--src/arch/x86/isa/microops/limmop.isa31
-rw-r--r--src/arch/x86/isa/microops/mediaop.isa1554
-rw-r--r--src/arch/x86/isa/microops/microops.isa3
-rw-r--r--src/arch/x86/isa/specialize.isa27
-rw-r--r--src/arch/x86/predecoder.cc7
-rw-r--r--src/arch/x86/process.cc4
-rw-r--r--src/arch/x86/types.hh6
93 files changed, 6463 insertions, 1547 deletions
diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript
index 3d1f6f8e3..97868986f 100644
--- a/src/arch/x86/SConscript
+++ b/src/arch/x86/SConscript
@@ -91,6 +91,7 @@ if env['TARGET_ISA'] == 'x86':
Source('faults.cc')
Source('insts/microfpop.cc')
Source('insts/microldstop.cc')
+ Source('insts/micromediaop.cc')
Source('insts/microop.cc')
Source('insts/microregop.cc')
Source('insts/static_inst.cc')
diff --git a/src/arch/x86/insts/microldstop.hh b/src/arch/x86/insts/microldstop.hh
index 309a2e6b7..048535a27 100644
--- a/src/arch/x86/insts/microldstop.hh
+++ b/src/arch/x86/insts/microldstop.hh
@@ -64,8 +64,8 @@
namespace X86ISA
{
- static const Request::FlagsType SegmentFlagMask = mask(4);
- static const int FlagShift = 4;
+ const Request::FlagsType SegmentFlagMask = mask(4);
+ const int FlagShift = 4;
enum FlagBit {
CPL0FlagBit = 1,
AddrSizeFlagBit = 2,
diff --git a/src/arch/x86/insts/micromediaop.cc b/src/arch/x86/insts/micromediaop.cc
new file mode 100644
index 000000000..07ae360ee
--- /dev/null
+++ b/src/arch/x86/insts/micromediaop.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2009 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/x86/insts/micromediaop.hh"
+#include "arch/x86/miscregs.hh"
+#include <string>
+
+namespace X86ISA
+{
+ std::string MediaOpReg::generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, instMnem, mnemonic);
+ printDestReg(response, 0, destSize);
+ response << ", ";
+ printSrcReg(response, 0, srcSize);
+ response << ", ";
+ printSrcReg(response, 1, srcSize);
+ return response.str();
+ }
+
+ std::string MediaOpImm::generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, instMnem, mnemonic);
+ printDestReg(response, 0, destSize);
+ response << ", ";
+ printSrcReg(response, 0, srcSize);
+ ccprintf(response, ", %#x", imm8);
+ return response.str();
+ }
+}
diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh
new file mode 100644
index 000000000..508ef4e26
--- /dev/null
+++ b/src/arch/x86/insts/micromediaop.hh
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2009 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_X86_INSTS_MICROMEDIAOP_HH__
+#define __ARCH_X86_INSTS_MICROMEDIAOP_HH__
+
+#include "arch/x86/insts/microop.hh"
+
+namespace X86ISA
+{
+ class MediaOpBase : public X86MicroopBase
+ {
+ protected:
+ const RegIndex src1;
+ const RegIndex dest;
+ const uint8_t srcSize;
+ const uint8_t destSize;
+ const uint8_t ext;
+ static const RegIndex foldOBit = 0;
+
+ // Constructor
+ MediaOpBase(ExtMachInst _machInst,
+ const char *mnem, const char *_instMnem,
+ bool isMicro, bool isDelayed,
+ bool isFirst, bool isLast,
+ InstRegIndex _src1, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint8_t _ext,
+ OpClass __opClass) :
+ X86MicroopBase(_machInst, mnem, _instMnem,
+ isMicro, isDelayed, isFirst, isLast,
+ __opClass),
+ src1(_src1.idx), dest(_dest.idx),
+ srcSize(_srcSize), destSize(_destSize), ext(_ext)
+ {}
+ };
+
+ class MediaOpReg : public MediaOpBase
+ {
+ protected:
+ const RegIndex src2;
+
+ // Constructor
+ MediaOpReg(ExtMachInst _machInst,
+ const char *mnem, const char *_instMnem,
+ bool isMicro, bool isDelayed,
+ bool isFirst, bool isLast,
+ InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint8_t _ext,
+ OpClass __opClass) :
+ MediaOpBase(_machInst, mnem, _instMnem,
+ isMicro, isDelayed, isFirst, isLast,
+ _src1, _dest, _srcSize, _destSize, _ext,
+ __opClass),
+ src2(_src2.idx)
+ {}
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+
+ class MediaOpImm : public MediaOpBase
+ {
+ protected:
+ uint8_t imm8;
+
+ // Constructor
+ MediaOpImm(ExtMachInst _machInst,
+ const char *mnem, const char *_instMnem,
+ bool isMicro, bool isDelayed,
+ bool isFirst, bool isLast,
+ InstRegIndex _src1, uint8_t _imm8, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint8_t _ext,
+ OpClass __opClass) :
+ MediaOpBase(_machInst, mnem, _instMnem,
+ isMicro, isDelayed, isFirst, isLast,
+ _src1, _dest, _srcSize, _destSize, _ext,
+ __opClass),
+ imm8(_imm8)
+ {}
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}
+
+#endif //__ARCH_X86_INSTS_MICROMEDIAOP_HH__
diff --git a/src/arch/x86/isa/decoder/decoder.isa b/src/arch/x86/isa/decoder/decoder.isa
index dcf6ce089..f757abef9 100644
--- a/src/arch/x86/isa/decoder/decoder.isa
+++ b/src/arch/x86/isa/decoder/decoder.isa
@@ -70,25 +70,7 @@ decode LEGACY_LOCK default Unknown::unknown()
//2 byte opcodes
##include "two_byte_opcodes.isa"
//3 byte opcodes
- 0x3: decode OPCODE_PREFIXA {
- 0xF0: decode OPCODE_PREFIXB {
- //We don't handle these properly in the predecoder yet, so
- //there's no reason to implement them for now.
- 0x38: decode OPCODE_OP {
- default: FailUnimpl::sseThreeEight();
- }
- 0x3A: decode OPCODE_OP {
- default: FailUnimpl::sseThreeA();
- }
- 0xF0: decode OPCODE_OP {
- default: FailUnimpl::threednow();
- }
- default: M5InternalError::error(
- {{"Unexpected second opcode byte in three byte opcode!"}});
- }
- default: M5InternalError::error(
- {{"Unexpected first opcode byte in three byte opcode!"}});
- }
+ ##include "three_byte_opcodes.isa"
}
//Lock prefix
##include "locked_opcodes.isa"
diff --git a/src/arch/x86/isa/decoder/locked_opcodes.isa b/src/arch/x86/isa/decoder/locked_opcodes.isa
index 14d5e58a3..e776d1320 100644
--- a/src/arch/x86/isa/decoder/locked_opcodes.isa
+++ b/src/arch/x86/isa/decoder/locked_opcodes.isa
@@ -139,6 +139,10 @@
}
0x2: decode OPCODE_PREFIXA {
0x0F: decode OPCODE_OP_TOP5 {
+ 0x04: decode OPCODE_OP_BOTTOM3 {
+ 0x0: WarnUnimpl::mov_Rd_CR8D();
+ 0x2: WarnUnimpl::mov_CR8D_Rd();
+ }
0x15: decode OPCODE_OP_BOTTOM3 {
0x3: BTS_LOCKED(Mv,Gv);
}
diff --git a/src/arch/x86/isa/decoder/three_byte_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_opcodes.isa
new file mode 100644
index 000000000..7587e3dad
--- /dev/null
+++ b/src/arch/x86/isa/decoder/three_byte_opcodes.isa
@@ -0,0 +1,151 @@
+// Copyright (c) 2008 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode the three byte opcodes
+//
+0x3: decode OPCODE_PREFIXA {
+ 0x0F: decode OPCODE_PREFIXB {
+ 0x38: decode LEGACY_OP {
+ format WarnUnimpl {
+ 1: decode OPCODE_OP {
+ 0x00: pshufb_Vdq_Wdq();
+ 0x01: phaddw_Vdq_Wdq();
+ 0x02: phaddd_Vdq_Wdq();
+ 0x03: phaddsw_Vdq_Wdq();
+ 0x04: pmaddubsw_Vdq_Wdq();
+ 0x05: phsubw_Vdq_Wdq();
+ 0x06: phsubd_Vdq_Wdq();
+ 0x07: phsubsw_Vdq_Wdq();
+ 0x08: psignb_Vdq_Wdq();
+ 0x09: psignw_Vdq_Wdq();
+ 0x0A: psignd_Vdq_Wdq();
+ 0x0B: pmulhrsw_Vdq_Wdq();
+ 0x10: pblendvb_Vdq_Wdq();
+ 0x14: blendvps_Vdq_Wdq();
+ 0x15: blendvpd_Vdq_Wdq();
+ 0x17: ptest_Vdq_Wdq();
+ 0x1C: pabsb_Vdq_Wdq();
+ 0x1D: pabsw_Vdq_Wdq();
+ 0x1E: pabsd_Vdq_Wdq();
+ 0x20: pmovsxbw_Vdq_Udq_or_Mq();
+ 0x21: pmovsxbd_Vdq_Udq_or_Md();
+ 0x22: pmovsxbq_Vdq_Udq_or_Mw();
+ 0x23: pmovsxwd_Vdq_Udq_or_Mq();
+ 0x24: pmovsxwq_Vdq_Udq_or_Md();
+ 0x25: pmovsxdq_Vdq_Udq_or_Mq();
+ 0x28: pmuldq_Vdq_Wdq();
+ 0x29: pcmpeqq_Vdq_Wdq();
+ 0x2A: movntdqa_Vdq_Mdq();
+ 0x2B: packusdw_Vdq_Wdq();
+ 0x30: pmovzxbw_Vdq_Udq_or_Mq();
+ 0x31: pmovzxbd_Vdq_Udq_or_Md();
+ 0x32: pmovzxbq_Vdq_Udq_or_Mw();
+ 0x33: pmovzxwd_Vdq_Udq_or_Mq();
+ 0x34: pmovzxwq_Vdq_Udq_or_Md();
+ 0x35: pmovzxdq_Vdq_Udq_or_Mq();
+ 0x37: pcmpgtq_Vdq_Wdq();
+ 0x38: pminsb_Vdq_Wdq();
+ 0x39: pminsd_Vdq_Wdq();
+ 0x3A: pminuw_Vdq_Wdq();
+ 0x3B: pminud_Vdq_Wdq();
+ 0x3C: pmaxsb_Vdq_Wdq();
+ 0x3D: pmaxsd_Vdq_Wdq();
+ 0x3E: pmaxuw_Vdq_Wdq();
+ 0x3F: pmaxud_Vdq_Wdq();
+ 0x40: pmulld_Vdq_Wdq();
+ 0x41: phminposuw_Vdq_Wdq();
+ default: Inst::UD2();
+ }
+ default: decode LEGACY_REPNE {
+ 1: decode OPCODE_OP {
+ 0xF0: crc32_Gd_Eb();
+ 0xF1: crc32_Gd_Ev();
+ default: Inst::UD2();
+ }
+ default: decode OPCODE_OP {
+ 0x00: pshufb_Pq_Qq();
+ 0x01: phaddw_Pq_Qq();
+ 0x02: phaddd_Pq_Qq();
+ 0x03: phaddsw_Pq_Qq();
+ 0x04: pmaddubsw_Pq_Qq();
+ 0x05: phsubw_Pq_Qq();
+ 0x06: phsubd_Pq_Qq();
+ 0x07: phsubsw_Pq_Qq();
+ 0x08: psignb_Pq_Qq();
+ 0x09: psignw_Pq_Qq();
+ 0x0A: psignd_Pq_Qq();
+ 0x0B: pmulhrsw_Pq_Qq();
+ 0x1C: pabsb_Pq_Qq();
+ 0x1D: pabsw_Pq_Qq();
+ 0x1E: pabsd_Pq_Qq();
+ default: Inst::UD2();
+ }
+ }
+ }
+ }
+ 0x3A: decode LEGACY_OP {
+ format WarnUnimpl {
+ 1: decode OPCODE_OP {
+ 0x08: roundps_Vdq_Wdq_Ib();
+ 0x09: roundpd_Vdq_Wdq_Ib();
+ 0x0A: roundss_Vss_Wss_Ib();
+ 0x0B: roundsd_Vsd_Wsd_Ib();
+ 0x0C: blendps_Vdq_Wdq_Ib();
+ 0x0D: blendpd_Vdq_Wdq_Ib();
+ 0x0E: pblendw_Vdq_Wdq_Ib();
+ 0x0F: palignr_Vdq_Wdq_Ib();
+ 0x14: pextrb_Rd_or_Mb_Vdq_Ib();
+ 0x15: decode MODRM_MOD {
+ 0x3: Inst::PEXTRW(Rd,Vdq,Ib);
+ default: pextrw_Mw_Vdq_Ib();
+ }
+ 0x16: pextrd_pextrq_Ed_or_Eq_Vdq_Ib();
+ 0x17: extractps_Ed_Vdq_Ib();
+ 0x20: pinsrb_Vdq_Rd_or_Rq_or_Mb_Ib();
+ 0x21: insertps_Vdq_Udq_or_Md_Ib();
+ 0x22: pinsrd_pinsrq_Vdq_Ed_or_Eq_Ib();
+ 0x40: dpps_Vdq_Wdq_Ib();
+ 0x41: dppd_Vdq_Wdq_Ib();
+ 0x42: pcmpistrm_Vdq_Wdq_Ib();
+ 0x43: pcmpistri_Vdq_Wdq_Ib();
+ default: Inst::UD2();
+ }
+ default: decode OPCODE_OP {
+ 0x0F: palignr_Pq_Qq_Ib();
+ default: Inst::UD2();
+ }
+ }
+ }
+ default: M5InternalError::error(
+ {{"Unexpected second opcode byte in three byte opcode!"}});
+ }
+ default: M5InternalError::error(
+ {{"Unexpected first opcode byte in three byte opcode!"}});
+}
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 55056da81..c23eeccab 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -101,43 +101,31 @@
default: Inst::UD2();
}
//0x01: group7(); // Ugly, ugly, ugly...
- 0x01: decode MODRM_MOD {
- 0x3: decode MODRM_REG {
- 0x0: decode MODRM_RM {
+ 0x01: decode MODRM_REG {
+ 0x0: decode MODRM_MOD {
+ 0x3: decode MODRM_RM {
0x1: vmcall();
0x2: vmlaunch();
0x3: vmresume();
0x4: vmxoff();
default: Inst::UD2();
}
- 0x1: decode MODRM_RM {
+ default: sgdt_Ms();
+ }
+ 0x1: decode MODRM_MOD {
+ 0x3: decode MODRM_RM {
0x0: monitor();
0x1: mwait();
default: Inst::UD2();
}
+ default: sidt_Ms();
+ }
+ 0x2: decode MODRM_MOD {
0x3: decode MODRM_RM {
- 0x0: vmrun();
- 0x1: vmmcall();
- 0x2: vmload();
- 0x3: vmsave();
- 0x4: stgi();
- 0x5: clgi();
- 0x6: skinit();
- 0x7: invlpga();
- }
- 0x4: Inst::SMSW(Rv);
- 0x6: Inst::LMSW(Rv);
- 0x7: decode MODRM_RM {
- 0x0: Inst::SWAPGS();
- 0x1: rdtscp();
- default: Inst::UD2();
+ 0x0: xgetbv();
+ 0x1: xsetbv();
}
- default: Inst::UD2();
- }
- default: decode MODRM_REG {
- 0x0: sgdt_Ms();
- 0x1: sidt_Ms();
- 0x2: decode MODE_SUBMODE {
+ default: decode MODE_SUBMODE {
0x0: Inst::LGDT(M);
default: decode OPSIZE {
// 16 bit operand sizes are special, but only
@@ -146,7 +134,19 @@
default: Inst::LGDT(M);
}
}
- 0x3: decode MODE_SUBMODE {
+ }
+ 0x3: decode MODRM_MOD {
+ 0x3: decode MODRM_RM {
+ 0x0: vmrun();
+ 0x1: vmmcall();
+ 0x2: vmload();
+ 0x3: vmsave();
+ 0x4: stgi();
+ 0x5: clgi();
+ 0x6: skinit();
+ 0x7: invlpga();
+ }
+ default: decode MODE_SUBMODE {
0x0: Inst::LIDT(M);
default: decode OPSIZE {
// 16 bit operand sizes are special, but only
@@ -155,10 +155,19 @@
default: Inst::LIDT(M);
}
}
- 0x4: Inst::SMSW(Mw);
- 0x6: Inst::LMSW(Mw);
- 0x7: Inst::INVLPG(M);
- default: Inst::UD2();
+ }
+ 0x4: decode MODRM_MOD {
+ 0x3: Inst::SMSW(Rv);
+ default: Inst::SMSW(Mw);
+ }
+ 0x6: Inst::LMSW(Ew);
+ 0x7: decode MODRM_MOD {
+ 0x3: decode MODRM_RM {
+ 0x0: Inst::SWAPGS();
+ 0x1: rdtscp();
+ default: Inst::UD2();
+ }
+ default: Inst::INVLPG(M);
}
}
0x02: lar_Gv_Ew();
@@ -283,137 +292,152 @@
0x4: Inst::UD2();
0x5: Inst::PREFETCH(Mb);
0x6: FailUnimpl::femms();
- 0x7: FailUnimpl::threednow();
- }
- 0x02: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: movups_Vo_Wo();
- 0x1: movups_Wo_Vo();
- 0x2: decode MODRM_MOD {
- 0x3: movhlps_Vq_VRq();
- default: movlps_Vq_Mq();
- }
- 0x3: movlps_Mq_Vq();
- 0x4: unpcklps();
- 0x5: unpckhps();
- 0x6: decode MODRM_MOD {
- 0x3: movlhps_Vq_VRq();
- default: movhps_Vq_Mq();
- }
- 0x7: movhps_Mq_Vq();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x0: movss_Vd_Wd();
- 0x1: movss_Wd_Vd();
- 0x2: movsldup_Vo_Wo();
- 0x6: movshdup_Vo_Wo();
- default: Inst::UD2();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: movupd_Vo_Wo();
- 0x1: movupd_Wo_Vo();
- 0x2: Inst::MOVLPD(Vq,Mq);
- 0x3: Inst::MOVLPD(Mq,Vq);
- 0x4: unpcklpd_Vo_Wq();
- 0x5: unpckhpd_Vo_Wo();
- 0x6: movhpd_Vq_Mq();
- 0x7: movhpd_Mq_Vq();
- }
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x0: Inst::MOVSD(Vq,Wq);
- 0x1: Inst::MOVSD(Wq,Vq);
- 0x2: movddup_Vo_Wq();
- default: Inst::UD2();
- }
- default: Inst::UD2();
- }
- 0x03: decode OPCODE_OP_BOTTOM3 {
- //group17();
- 0x0: decode MODRM_REG {
- 0x0: prefetch_nta();
- 0x1: Inst::PREFETCH_T0(Mb);
- 0x2: prefetch_t1();
- 0x3: prefetch_t2();
- default: Inst::HINT_NOP();
- }
- 0x1: Inst::HINT_NOP();
- 0x2: Inst::HINT_NOP();
- 0x3: Inst::HINT_NOP();
- 0x4: Inst::HINT_NOP();
- 0x5: Inst::HINT_NOP();
- 0x6: Inst::HINT_NOP();
- 0x7: Inst::HINT_NOP();
- }
- 0x04: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: Inst::MOV(Rd,Cd);
- 0x1: Inst::MOV(Rd,Dd);
- 0x2: Inst::MOV(Cd,Rd);
- 0x3: Inst::MOV(Dd,Rd);
- 0x4: mov_Rd_Td();
- 0x6: mov_Td_Rd();
+ 0x7: decode IMMEDIATE {
+ 0x0C: pi2fw_Pq_Qq();
+ 0x0D: pi2fd_Pq_Qq();
+ 0x1C: pf2iw_Pq_Qq();
+ 0x1D: pf2id_Pq_Qq();
+ 0x8A: pfnacc_Pq_Qq();
+ 0x8E: pfpnacc_Pq_Qq();
+ 0x90: pfcmpge_Pq_Qq();
+ 0x94: pfmin_Pq_Qq();
+ 0x96: pfrcp_Pq_Qq();
+ 0x97: pfrsqrt_Pq_Qq();
+ 0x9A: Inst::PFSUB(Pq,Qq);
+ 0x9E: pfadd_Pq_Qq();
+ 0xA0: pfcmpgt_Pq_Qq();
+ 0xA4: pfmax_Pq_Qq();
+ 0xA6: pfrcpit1_Pq_Qq();
+ 0xA7: pfrsqit1_Pq_Qq();
+ 0xAA: Inst::PFSUBR(Pq,Qq);
+ 0xAE: pfacc_Pq_Qq();
+ 0xB0: pfcmpeq_Pq_Qq();
+ 0xB4: Inst::PFMUL(Pq,Qq);
+ 0xB6: pfrcpit2_Pq_Qq();
+ 0xB7: Inst::PMULHRW(Pq,Qq);
+ 0xBB: pswapd_Pq_Qq();
+ 0xBF: pavgusb_Pq_Qq();
default: Inst::UD2();
}
- // lock prefix (0xF0)
- 0x2: decode OPCODE_OP_BOTTOM3 {
- 0x0: mov_Rd_CR8D();
- 0x2: mov_CR8D_Rd();
- }
- default: Inst::UD2();
}
- 0x05: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- //These moves should really use size o (octword), but
- //because they are split in two, they use q (quadword).
- 0x0: Inst::MOVAPS(Vq,Wq);
- 0x1: Inst::MOVAPS(Wq,Vq);
- 0x2: decode MODRM_MOD {
- 0x3: cvtpi2pS_Vq_Pq();
- default: cvtpi2ps_Vq_Mq();
+ format Inst{
+ 0x02: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVUPS(Vo,Wo);
+ 0x1: MOVUPS(Wo,Vo);
+ 0x2: decode MODRM_MOD {
+ 0x3: MOVHLPS(Vps,VRq);
+ default: MOVLPS(Vps,Mq);
+ }
+ 0x3: MOVLPS(Mq,Vps);
+ 0x4: UNPCKLPS(Vps,Wq);
+ 0x5: UNPCKHPS(Vps,Wq);
+ 0x6: decode MODRM_MOD {
+ 0x3: MOVLHPS(Vps,VRq);
+ default: MOVHPS(Vps,Mq);
+ }
+ 0x7: MOVHPS(Mq,Vq);
}
- 0x3: movntps_Mo_Vo();
- 0x4: cvttps2pi_Pq_Wq();
- 0x5: cvtpS2pi_Pq_Wq();
- 0x6: ucomiss_Vd_Wd();
- 0x7: comiss_Vd_Wd();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x2: cvtsi2ss_Vd_Ed();
- 0x4: cvttss2si_Gd_Wd();
- 0x5: cvtss2si_Gd_Wd();
- default: Inst::UD2();
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVSS(Vd,Wd);
+ 0x1: MOVSS(Wd,Vd);
+ 0x2: WarnUnimpl::movsldup_Vo_Wo();
+ 0x6: WarnUnimpl::movshdup_Vo_Wo();
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVUPD(Vo,Wo);
+ 0x1: MOVUPD(Wo,Vo);
+ 0x2: MOVLPD(Vq,Mq);
+ 0x3: MOVLPD(Mq,Vq);
+ 0x4: UNPCKLPD(Vo,Wq);
+ 0x5: UNPCKHPD(Vo,Wo);
+ 0x6: MOVHPD(Vq,Mq);
+ 0x7: MOVHPD(Mq,Vq);
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVSD(Vq,Wq);
+ 0x1: MOVSD(Wq,Vq);
+ 0x2: MOVDDUP(Vo,Wq);
+ default: UD2();
+ }
+ default: UD2();
}
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: movapd_Vo_Wo();
- 0x1: movapd_Wo_Vo();
- 0x2: decode MODRM_MOD {
- 0x3: cvtpi2pd_Vo_Pq();
- default: cvtpi2pd_Vo_Mq();
+ 0x03: decode OPCODE_OP_BOTTOM3 {
+ //group16();
+ 0x0: decode MODRM_REG {
+ 0x0: WarnUnimpl::prefetch_nta();
+ 0x1: PREFETCH_T0(Mb);
+ 0x2: WarnUnimpl::prefetch_t1();
+ 0x3: WarnUnimpl::prefetch_t2();
+ default: HINT_NOP();
+ }
+ 0x1: HINT_NOP();
+ 0x2: HINT_NOP();
+ 0x3: HINT_NOP();
+ 0x4: HINT_NOP();
+ 0x5: HINT_NOP();
+ 0x6: HINT_NOP();
+ 0x7: HINT_NOP();
+ }
+ 0x04: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOV(Rd,Cd);
+ 0x1: MOV(Rd,Dd);
+ 0x2: MOV(Cd,Rd);
+ 0x3: MOV(Dd,Rd);
+ default: UD2();
}
- 0x3: movntpd_Mo_Vo();
- 0x4: cvttpd2pi_Pq_Wo();
- 0x5: cvtpd2pi_Pq_Wo();
- 0x6: Inst::UCOMISD(Vq,Wq);
- 0x7: comisd_Vq_Wq();
+ default: UD2();
}
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- // The size of the V operand should be q, not dp
- 0x2: Inst::CVTSI2SD(Vdp,Edp);
- // The size of the W operand should be q, not dp
- 0x4: Inst::CVTTSD2SI(Gdp,Wdp);
- 0x5: cvtsd2si_Gd_Wq();
- default: Inst::UD2();
+ 0x05: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ //These moves should really use size o (octword), but
+ //because they are split in two, they use q (quadword).
+ 0x0: MOVAPS(Vq,Wq);
+ 0x1: MOVAPS(Wq,Vq);
+ 0x2: CVTPI2PS(Vq,Qq);
+ 0x3: WarnUnimpl::movntps_Mo_Vo();
+ 0x4: CVTTPS2PI(Pq,Wq);
+ 0x5: CVTPS2PI(Pq,Wq);
+ 0x6: UCOMISS(Vd,Wd);
+ 0x7: COMISS(Vd,Wd);
+ }
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x2: CVTSI2SS(Vd,Ed);
+ 0x4: CVTTSS2SI(Gd,Wd);
+ 0x5: CVTSS2SI(Gd,Wd);
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVAPD(Vo,Wo);
+ 0x1: MOVAPD(Wo,Vo);
+ 0x2: CVTPI2PD(Vo,Qq);
+ 0x3: WarnUnimpl::movntpd_Mo_Vo();
+ 0x4: CVTTPD2PI(Pq,Wo);
+ 0x5: CVTPD2PI(Pq,Wo);
+ 0x6: UCOMISD(Vq,Wq);
+ 0x7: COMISD(Vq,Wq);
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ // The size of the V operand should be q, not dp
+ 0x2: CVTSI2SD(Vdp,Edp);
+ // The size of the W operand should be q, not dp
+ 0x4: CVTTSD2SI(Gdp,Wdp);
+ 0x5: CVTSD2SI(Gd,Wq);
+ default: UD2();
+ }
+ default: UD2();
}
- default: Inst::UD2();
}
0x06: decode OPCODE_OP_BOTTOM3 {
0x0: Inst::WRMSR();
@@ -430,14 +454,13 @@
0x7: getsec();
}
0x07: decode OPCODE_OP_BOTTOM3 {
- 0x0: three_byte_opcode();
- 0x1: three_byte_opcode();
- 0x2: three_byte_opcode();
- 0x3: three_byte_opcode();
- 0x4: three_byte_opcode();
- 0x5: three_byte_opcode();
- 0x6: three_byte_opcode();
- 0x7: three_byte_opcode();
+ 0x0: M5InternalError::error(
+ {{"Three byte opcode shouldn't be handled by "
+ "two_byte_opcodes.isa!"}});
+ 0x2: M5InternalError::error(
+ {{"Three byte opcode shouldn't be handled by "
+ "two_byte_opcodes.isa!"}});
+ default: UD2();
}
format Inst {
0x08: decode OPCODE_OP_BOTTOM3 {
@@ -460,39 +483,35 @@
0x6: CMOVLE(Gv,Ev);
0x7: CMOVNLE(Gv,Ev);
}
- }
- 0x0A: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: movmskps_Gd_VRo();
- 0x1: sqrtps_Vo_Wo();
- 0x2: rqsrtps_Vo_Wo();
- 0x3: rcpps_Vo_Wo();
- 0x4: andps_Vo_Wo();
- 0x5: andnps_Vo_Wo();
- 0x6: orps_Vo_Wo();
- 0x7: xorps_Vo_Wo();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x1: sqrtss_Vd_Wd();
- 0x2: rsqrtss_Vd_Wd();
- 0x3: rcpss_Vd_Wd();
- default: Inst::UD2();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: movmskpd_Gd_VRo();
- 0x1: sqrtpd_Vo_Wo();
- 0x4: andpd_Vo_Wo();
- 0x5: andnpd_Vo_Wo();
- 0x6: orpd_Vo_Wo();
- //This really should be type o, but it works on q sized
- //chunks at a time.
- 0x7: Inst::XORPD(Vq,Wq);
- default: Inst::UD2();
- }
- format Inst {
+ 0x0A: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVMSKPS(Gd,VRo);
+ 0x1: SQRTPS(Vo,Wo);
+ 0x2: WarnUnimpl::rqsrtps_Vo_Wo();
+ 0x3: WarnUnimpl::rcpps_Vo_Wo();
+ 0x4: ANDPS(Vo,Wo);
+ 0x5: ANDNPS(Vo,Wo);
+ 0x6: ORPS(Vo,Wo);
+ 0x7: XORPS(Vo,Wo);
+ }
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x1: SQRTSS(Vd,Wd);
+ 0x2: WarnUnimpl::rsqrtss_Vd_Wd();
+ 0x3: WarnUnimpl::rcpss_Vd_Wd();
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: MOVMSKPD(Gd,VRo);
+ 0x1: SQRTPD(Vo,Wo);
+ 0x4: ANDPD(Vo,Wo);
+ 0x5: ANDNPD(Vo,Wo);
+ 0x6: ORPD(Vo,Wo);
+ 0x7: XORPD(Vo,Wo);
+ default: UD2();
+ }
// repne (0xF2)
0x8: decode OPCODE_OP_BOTTOM3 {
0x1: SQRTSD(Vq,Wq);
@@ -500,273 +519,208 @@
}
default: UD2();
}
- }
- 0x0B: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: addps_Vo_Wo();
- 0x1: mulps_Vo_Wo();
- 0x2: cvtps2pd_Vo_Wq();
- 0x3: cvtdq2ps_Vo_Wo();
- 0x4: subps_Vo_Wo();
- 0x5: minps_Vo_Wo();
- 0x6: divps_Vo_Wo();
- 0x7: maxps_Vo_Wo();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x0: addss_Vd_Wd();
- 0x1: mulss_Vd_Wd();
- 0x2: cvtss2sd_Vq_Wd();
- 0x3: cvttps2dq_Vo_Wo();
- 0x4: subss_Vd_Wd();
- 0x5: minss_Vd_Wd();
- 0x6: divss_Vd_Wd();
- 0x7: maxss_Vd_Wd();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: addpd_Vo_Wo();
- 0x1: mulpd_Vo_Wo();
- 0x2: cvtpd2ps_Vo_Wo();
- 0x3: cvtps2dq_Vo_Wo();
- 0x4: subpd_Vo_Wo();
- 0x5: minpd_Vo_Wo();
- 0x6: divpd_Vo_Wo();
- 0x7: maxpd_Vo_Wo();
- }
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x0: Inst::ADDSD(Vq,Wq);
- 0x1: Inst::MULSD(Vq,Wq);
- 0x2: cvtsd2ss_Vd_Wq();
- 0x4: Inst::SUBSD(Vq,Wq);
- 0x5: minsd_Vq_Wq();
- 0x6: Inst::DIVSD(Vq,Wq);
- 0x7: maxsd_Vq_Wq();
- default: Inst::UD2();
- }
- default: Inst::UD2();
- }
- 0x0C: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: punpcklbw_Pq_Qd();
- 0x1: punpcklwd_Pq_Qd();
- 0x2: punpckldq_Pq_Qd();
- 0x3: packsswb_Pq_Qq();
- 0x4: pcmpgtb_Pq_Qq();
- 0x5: pcmpgtw_Pq_Qq();
- 0x6: pcmpgtd_Pq_Qq();
- 0x7: packuswb_Pq_Qq();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: punpcklbw_Vo_Wq();
- 0x1: punpcklwd_Vo_Wq();
- 0x2: punpckldq_Vo_Wq();
- 0x3: packsswb_Vo_Wo();
- 0x4: pcmpgtb_Vo_Wo();
- 0x5: pcmpgtw_Vo_Wo();
- 0x6: pcmpgtd_Vo_Wo();
- 0x7: packuswb_Vo_Wo();
- }
- default: Inst::UD2();
- }
- 0x0D: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: punpckhbw_Pq_Qq();
- 0x1: punpckhwd_Pq_Qq();
- 0x2: punpckhdq_Pq_Qq();
- 0x3: packssdw_Pq_Qq();
- 0x6: movd_Pq_Ed();
- 0x7: movq_Pq_Qq();
- default: Inst::UD2();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x7: movdqu_Vo_Wo();
- default: Inst::UD2();
+ 0x0B: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: ADDPS(Vo,Wo);
+ 0x1: MULPS(Vo,Wo);
+ 0x2: CVTPS2PD(Vo,Wq);
+ 0x3: CVTDQ2PS(Vo,Wo);
+ 0x4: SUBPS(Vo,Wo);
+ 0x5: MINPS(Vo,Wo);
+ 0x6: DIVPS(Vo,Wo);
+ 0x7: MAXPS(Vo,Wo);
+ }
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x0: ADDSS(Vd,Wd);
+ 0x1: MULSS(Vd,Wd);
+ 0x2: CVTSS2SD(Vq,Wd);
+ 0x3: CVTTPS2DQ(Vo,Wo);
+ 0x4: SUBSS(Vd,Wd);
+ 0x5: MINSS(Vd,Wd);
+ 0x6: DIVSS(Vd,Wd);
+ 0x7: MAXSS(Vd,Wd);
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: ADDPD(Vo,Wo);
+ 0x1: MULPD(Vo,Wo);
+ 0x2: CVTPD2PS(Vo,Wo);
+ 0x3: CVTPS2DQ(Vo,Wo);
+ 0x4: SUBPD(Vo,Wo);
+ 0x5: MINPD(Vo,Wo);
+ 0x6: DIVPD(Vo,Wo);
+ 0x7: MAXPD(Vo,Wo);
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x0: ADDSD(Vq,Wq);
+ 0x1: MULSD(Vq,Wq);
+ 0x2: CVTSD2SS(Vd,Wq);
+ 0x4: SUBSD(Vq,Wq);
+ 0x5: MINSD(Vq,Wq);
+ 0x6: DIVSD(Vq,Wq);
+ 0x7: MAXSD(Vq,Wq);
+ default: UD2();
+ }
+ default: UD2();
}
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: punpckhbw_Vo_Wo();
- 0x1: punpckhwd_Vo_Wo();
- 0x2: punpckhdq_Vo_Wo();
- 0x3: packssdw_Vo_Wo();
- 0x4: punpcklqdq_Vo_Wq();
- 0x5: punpcklqdq_Vo_Wq();
- 0x6: movd_Vo_Ed();
- 0x7: movdqa_Vo_Wo();
+ 0x0C: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PUNPCKLBW(Pq,Qd);
+ 0x1: PUNPCKLWD(Pq,Qd);
+ 0x2: PUNPCKLDQ(Pq,Qd);
+ 0x3: PACKSSWB(Pq,Qq);
+ 0x4: PCMPGTB(Pq,Qq);
+ 0x5: PCMPGTW(Pq,Qq);
+ 0x6: PCMPGTD(Pq,Qq);
+ 0x7: PACKUSWB(Pq,Qq);
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PUNPCKLBW(Vo,Wq);
+ 0x1: PUNPCKLWD(Vo,Wq);
+ 0x2: PUNPCKLDQ(Vo,Wq);
+ 0x3: PACKSSWB(Vo,Wo);
+ 0x4: PCMPGTB(Vo,Wo);
+ 0x5: PCMPGTW(Vo,Wo);
+ 0x6: PCMPGTD(Vo,Wo);
+ 0x7: PACKUSWB(Vo,Wo);
+ }
+ default: UD2();
}
- default: Inst::UD2();
- }
- 0x0E: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: pshufw_Pq_Qq_Ib();
- //0x1: group13_pshimw();
- 0x1: decode MODRM_REG {
- 0x2: decode LEGACY_OP {
- 0x0: psrlw_PRq_Ib();
- 0x1: psrlw_VRo_Ib();
- }
- 0x4: decode LEGACY_OP {
- 0x0: psraw_PRq_Ib();
- 0x1: psraw_VRo_Ib();
- }
- 0x6: decode LEGACY_OP {
- 0x0: psllw_PRq_Ib();
- 0x1: psllw_VRo_Ib();
- }
- default: Inst::UD2();
+ 0x0D: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PUNPCKHBW(Pq,Qq);
+ 0x1: PUNPCKHWD(Pq,Qq);
+ 0x2: PUNPCKHDQ(Pq,Qq);
+ 0x3: PACKSSDW(Pq,Qq);
+ 0x6: MOVD(Pq,Edp);
+ 0x7: MOVQ(Pq,Qq);
+ default: UD2();
}
- //0x2: group14_pshimd();
- 0x2: decode MODRM_REG {
- 0x2: decode LEGACY_OP {
- 0x0: psrld_PRq_Ib();
- 0x1: psrld_VRo_Ib();
- }
- 0x4: decode LEGACY_OP {
- 0x0: psrad_PRq_Ib();
- 0x1: psrad_VRo_Ib();
- }
- 0x6: decode LEGACY_OP {
- 0x0: pslld_PRq_Ib();
- 0x1: pslld_VRo_Ib();
- }
- default: Inst::UD2();
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x7: WarnUnimpl::movdqu_Vo_Wo();
+ default: UD2();
}
- //0x3: group15_pshimq();
- 0x3: decode MODRM_REG {
- 0x2: decode LEGACY_OP {
- 0x0: psrlq_PRq_Ib();
- 0x1: psrlq_VRo_Ib();
- }
- 0x3: decode LEGACY_OP {
- 0x0: Inst::UD2();
- 0x1: psrldq_VRo_Ib();
- }
- 0x6: decode LEGACY_OP {
- 0x0: psllq_PRq_Ib();
- 0x1: psllq_VRo_Ib();
- }
- 0x7: decode LEGACY_OP {
- 0x0: Inst::UD2();
- 0x1: pslldq_VRo_Ib();
- }
- default: Inst::UD2();
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PUNPCKHBW(Vo,Wo);
+ 0x1: PUNPCKHWD(Vo,Wo);
+ 0x2: PUNPCKHDQ(Vo,Wo);
+ 0x3: PACKSSDW(Vo,Wo);
+ 0x4: PUNPCKLQDQ(Vo,Wq);
+ 0x5: PUNPCKHQDQ(Vo,Wq);
+ 0x6: WarnUnimpl::movd_Vo_Ed();
+ 0x7: WarnUnimpl::movdqa_Vo_Wo();
}
- 0x4: pcmpeqb_Pq_Qq();
- 0x5: pcmpeqw_Pq_Qq();
- 0x6: pcmpeqd_Pq_Qq();
- 0x7: emms();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x0: pshufhw_Vo_Wo_Ib();
- default: Inst::UD2();
+ default: UD2();
}
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: pshufd_Vo_Wo_Ib();
- //0x1: group13_pshimw();
- 0x1: decode MODRM_REG {
- 0x2: decode LEGACY_OP {
- 0x0: psrlw_PRq_Ib();
- 0x1: psrlw_VRo_Ib();
+ 0x0E: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSHUFW(Pq,Qq,Ib);
+ //0x1: group12_pshimw();
+ 0x1: decode MODRM_REG {
+ 0x2: PSRLW(PRq,Ib);
+ 0x4: PSRAW(PRq,Ib);
+ 0x6: PSLLW(PRq,Ib);
+ default: UD2();
}
- 0x4: decode LEGACY_OP {
- 0x0: psraw_PRq_Ib();
- 0x1: psraw_VRo_Ib();
+ //0x2: group13_pshimd();
+ 0x2: decode MODRM_REG {
+ 0x2: PSRLD(PRq,Ib);
+ 0x4: PSRAD(PRq,Ib);
+ 0x6: PSLLD(PRq,Ib);
+ default: UD2();
}
- 0x6: decode LEGACY_OP {
- 0x0: psllw_PRq_Ib();
- 0x1: psllw_VRo_Ib();
+ //0x3: group14_pshimq();
+ 0x3: decode MODRM_REG {
+ 0x2: PSRLQ(PRq,Ib);
+ 0x6: PSLLQ(PRq,Ib);
+ default: Inst::UD2();
}
- default: Inst::UD2();
+ 0x4: Inst::PCMPEQB(Pq,Qq);
+ 0x5: Inst::PCMPEQW(Pq,Qq);
+ 0x6: Inst::PCMPEQD(Pq,Qq);
+ 0x7: WarnUnimpl::emms();
}
- //0x2: group14_pshimd();
- 0x2: decode MODRM_REG {
- 0x2: decode LEGACY_OP {
- 0x0: psrld_PRq_Ib();
- 0x1: psrld_VRo_Ib();
- }
- 0x4: decode LEGACY_OP {
- 0x0: psrad_PRq_Ib();
- 0x1: psrad_VRo_Ib();
- }
- 0x6: decode LEGACY_OP {
- 0x0: pslld_PRq_Ib();
- 0x1: pslld_VRo_Ib();
- }
- default: Inst::UD2();
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSHUFHW(Vo,Wo,Ib);
+ default: UD2();
}
- //0x3: group15_pshimq();
- 0x3: decode MODRM_REG {
- 0x2: decode LEGACY_OP {
- 0x0: psrlq_PRq_Ib();
- 0x1: psrlq_VRo_Ib();
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSHUFD(Vo,Wo,Ib);
+ //0x1: group12_pshimw();
+ 0x1: decode MODRM_REG {
+ 0x2: PSRLW(VRo,Ib);
+ 0x4: PSRAW(VRo,Ib);
+ 0x6: PSLLW(VRo,Ib);
}
- 0x3: decode LEGACY_OP {
- 0x0: Inst::UD2();
- 0x1: psrldq_VRo_Ib();
+ //0x2: group13_pshimd();
+ 0x2: decode MODRM_REG {
+ 0x2: PSRLD(VRo,Ib);
+ 0x4: PSRAD(VRo,Ib);
+ 0x6: PSLLD(VRo,Ib);
+ default: UD2();
}
- 0x6: decode LEGACY_OP {
- 0x0: psllq_PRq_Ib();
- 0x1: psllq_VRo_Ib();
+ //0x3: group14_pshimq();
+ 0x3: decode MODRM_REG {
+ 0x2: PSRLQ(VRo,Ib);
+ 0x3: WarnUnimpl::psrldq_VRo_Ib();
+ 0x6: PSLLQ(VRo,Ib);
+ 0x7: WarnUnimpl::pslldq_VRo_Ib();
+ default: UD2();
}
- 0x7: decode LEGACY_OP {
- 0x0: Inst::UD2();
- 0x1: pslldq_VRo_Ib();
- }
- default: Inst::UD2();
+ 0x4: PCMPEQB(Vo,Wo);
+ 0x5: PCMPEQW(Vo,Wo);
+ 0x6: PCMPEQD(Vo,Wo);
+ default: UD2();
}
- 0x4: pcmpeqb_Vo_Wo();
- 0x5: pcmpeqw_Vo_Wo();
- 0x6: pcmpeqd_Vo_Wo();
- default: Inst::UD2();
- }
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x0: pshuflw_Vo_Wo_Ib();
- default: Inst::UD2();
- }
- default: Inst::UD2();
- }
- 0x0F: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: vmread_Ed_or_Eq_Gd_or_Gq();
- 0x1: vmwrite_Gd_or_Gq_Ed_or_Eq();
- 0x6: mov_Ed_Pd();
- 0x7: mov_Qq_Pq();
- default: Inst::UD2();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x6: movq_Vo_Mq_or_Vq_Vq();
- 0x7: movdqu_Wo_Vo();
- default: Inst::UD2();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x4: haddpd_Vo_Wo();
- 0x5: hsubpd_Vo_Wo();
- 0x6: movd_Ed_Vd();
- 0x7: movdqa_Wo_Vo();
- default: Inst::UD2();
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSHUFLW(Vo,Wo,Ib);
+ default: UD2();
+ }
+ default: UD2();
}
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x4: haddps_Vo_Wo();
- 0x5: hsubps_Vo_Wo();
- default: Inst::UD2();
+ 0x0F: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: WarnUnimpl::vmread_Edp_Gdp();
+ 0x1: WarnUnimpl::vmwrite_Gdp_Edp();
+ 0x6: MOVD(Edp,Pdp);
+ 0x7: MOVQ(Qq,Pq);
+ default: UD2();
+ }
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x6: MOVQ(Vq,Wq);
+ 0x7: WarnUnimpl::movdqu_Wo_Vo();
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x4: WarnUnimpl::haddpd_Vo_Wo();
+ 0x5: WarnUnimpl::hsubpd_Vo_Wo();
+ 0x6: WarnUnimpl::movd_Ed_Vd();
+ 0x7: WarnUnimpl::movdqa_Wo_Vo();
+ default: UD2();
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x4: WarnUnimpl::haddps_Vo_Wo();
+ 0x5: WarnUnimpl::hsubps_Vo_Wo();
+ default: UD2();
+ }
+ default: UD2();
}
- default: Inst::UD2();
- }
- format Inst {
0x10: decode OPCODE_OP_BOTTOM3 {
0x0: JO(Jz);
0x1: JNO(Jz);
@@ -822,8 +776,7 @@
0x3: Inst::BT(Ev,Gv);
0x4: Inst::SHLD(Ev,Gv,Ib);
0x5: Inst::SHLD(Ev,Gv);
- 0x6: xbts_and_cmpxchg();
- 0x7: ibts_and_cmpxchg();
+ default: Inst::UD2();
}
0x15: decode OPCODE_OP_BOTTOM3 {
0x0: push_gs();
@@ -832,48 +785,51 @@
0x3: Inst::BTS(Ev,Gv);
0x4: Inst::SHRD(Ev,Gv,Ib);
0x5: Inst::SHRD(Ev,Gv);
- //0x6: group16();
- 0x6: decode MODRM_REG {
- 0x0: fxsave();
- 0x1: fxrstor();
- 0x2: ldmxcsr();
- 0x3: stmxcsr();
- 0x4: Inst::UD2();
- 0x5: decode MODRM_MOD {
- 0x3: BasicOperate::LFENCE(
+ //0x6: group15();
+ 0x6: decode MODRM_MOD {
+ 0x3: decode MODRM_REG {
+ 0x5: BasicOperate::LFENCE(
{{/*Nothing*/}}, IsReadBarrier);
- default: Inst::UD2();
- }
- 0x6: decode MODRM_MOD {
- 0x3: BasicOperate::MFENCE(
+ 0x6: BasicOperate::MFENCE(
{{/*Nothing*/}}, IsMemBarrier);
- default: Inst::UD2();
- }
- 0x7: decode MODRM_MOD {
- 0x3: BasicOperate::SFENCE(
+ 0x7: BasicOperate::SFENCE(
{{/*Nothing*/}}, IsWriteBarrier);
default: Inst::UD2();
}
+ default: decode MODRM_REG {
+ 0x0: fxsave();
+ 0x1: fxrstor();
+ 0x2: Inst::LDMXCSR(Md);
+ 0x3: Inst::STMXCSR(Md);
+ 0x4: xsave();
+ 0x5: xrstor();
+ 0x6: Inst::UD2();
+ 0x7: clflush();
+ }
}
0x7: Inst::IMUL(Gv,Ev);
}
- 0x16: decode OPCODE_OP_BOTTOM3 {
- 0x0: Inst::CMPXCHG(Eb,Gb);
- 0x1: Inst::CMPXCHG(Ev,Gv);
- 0x2: lss_Gz_Mp();
- 0x3: Inst::BTR(Ev,Gv);
- 0x4: lfs_Gz_Mp();
- 0x5: lgs_Gz_Mp();
- //The size of the second operand in these instructions should
- //really be "b" or "w", but it's set to v in order to have a
- //consistent register size. This shouldn't affect behavior.
- 0x6: Inst::MOVZX_B(Gv,Ev);
- 0x7: Inst::MOVZX_W(Gv,Ev);
- }
- 0x17: decode OPCODE_OP_BOTTOM3 {
- 0x0: jmpe_Jz(); // IA-64?
- format Inst {
- //0x1: group11_UD2();
+ format Inst {
+ 0x16: decode OPCODE_OP_BOTTOM3 {
+ 0x0: CMPXCHG(Eb,Gb);
+ 0x1: CMPXCHG(Ev,Gv);
+ 0x2: WarnUnimpl::lss_Gz_Mp();
+ 0x3: BTR(Ev,Gv);
+ 0x4: WarnUnimpl::lfs_Gz_Mp();
+ 0x5: WarnUnimpl::lgs_Gz_Mp();
+ //The size of the second operand in these instructions
+ //should really be "b" or "w", but it's set to v in order
+ //to have a consistent register size. This shouldn't
+ //affect behavior.
+ 0x6: MOVZX_B(Gv,Ev);
+ 0x7: MOVZX_W(Gv,Ev);
+ }
+ 0x17: decode OPCODE_OP_BOTTOM3 {
+ 0x0: decode LEGACY_REP {
+ 0x0: WarnUnimpl::jmpe_Jz();
+ 0x1: WarnUnimpl::popcnt_Gv_Ev();
+ }
+ //0x1: group10_UD2();
0x1: UD2();
//0x2: group8_Ev_Ib();
0x2: decode MODRM_REG {
@@ -884,244 +840,242 @@
default: UD2();
}
0x3: BTC(Ev,Gv);
- }
- 0x4: Inst::BSF(Gv,Ev);
- 0x5: Inst::BSR(Gv,Ev);
- //The size of the second operand in these instructions should
- //really be "b" or "w", but it's set to v in order to have a
- //consistent register size. This shouldn't affect behavior.
- 0x6: Inst::MOVSX_B(Gv,Ev);
- 0x7: Inst::MOVSX_W(Gv,Ev);
- }
- 0x18: decode OPCODE_OP_BOTTOM3 {
- 0x0: Inst::XADD(Eb,Gb);
- 0x1: Inst::XADD(Ev,Gv);
- //0x7: group9();
- 0x7: decode MODRM_REG {
- //Also CMPXCHG16B
- 0x1: Inst::CMPXCHG8B(Mdp);
- 0x6: decode LEGACY_OP {
- 0x1: vmclear_Mq();
- default: decode LEGACY_REP {
- 0x1: vmxon_Mq();
- 0x0: vmptrld_Mq();
+ 0x4: BSF(Gv,Ev);
+ 0x5: BSR(Gv,Ev);
+ //The size of the second operand in these instructions
+ //should really be "b" or "w", but it's set to v in order
+ //to have a consistent register size. This shouldn't
+ //affect behavior.
+ 0x6: MOVSX_B(Gv,Ev);
+ 0x7: MOVSX_W(Gv,Ev);
+ }
+ 0x18: decode OPCODE_OP_BOTTOM3 {
+ 0x0: XADD(Eb,Gb);
+ 0x1: XADD(Ev,Gv);
+ //0x7: group9();
+ 0x7: decode MODRM_REG {
+ //Also CMPXCHG16B
+ 0x1: CMPXCHG8B(Mdp);
+ 0x6: decode LEGACY_OP {
+ 0x1: WarnUnimpl::vmclear_Mq();
+ default: decode LEGACY_REP {
+ 0x1: WarnUnimpl::vmxon_Mq();
+ 0x0: WarnUnimpl::vmptrld_Mq();
+ }
}
+ 0x7: WarnUnimpl::vmptrst_Mq();
+ default: UD2();
+ }
+ default: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x2: CMPPS(Vo,Wo,Ib);
+ 0x3: MOVNTI(Mdp,Gdp);
+ 0x4: PINSRW(Pq,Ew,Ib);
+ 0x5: PEXTRW(Gd,PRq,Ib);
+ 0x6: SHUFPS(Vps,Wps,Ib);
+ }
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x2: CMPSS(Vd,Wd,Ib);
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x2: CMPPD(Vo,Wo,Ib);
+ 0x4: PINSRW(Vdw,Ew,Ib);
+ 0x5: PEXTRW(Gd,VRdq,Ib);
+ 0x6: SHUFPD(Vpd,Wpd,Ib);
+ default: UD2();
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x2: CMPSD(Vq,Wq,Ib);
+ default: UD2();
+ }
+ default: UD2();
}
- 0x7: vmptrst_Mq();
- default: Inst::UD2();
}
- default: decode LEGACY_DECODEVAL {
+ 0x19: decode OPSIZE {
+ 4: BSWAP_D(Bd);
+ 8: BSWAP_Q(Bq);
+ default: UD2();
+ }
+ 0x1A: decode LEGACY_DECODEVAL {
// no prefix
0x0: decode OPCODE_OP_BOTTOM3 {
- 0x2: cmpccps_Vo_Wo_Ib();
- 0x3: cvtdq2ps_Vo_Wo();
- 0x4: subps_Vo_Wo();
- 0x5: minps_Vo_Wo();
- 0x6: divps_Vo_Wo();
+ 0x1: PSRLW(Pq,Qq);
+ 0x2: PSRLD(Pq,Qq);
+ 0x3: PSRLQ(Pq,Qq);
+ 0x4: PADDQ(Pq,Qq);
+ 0x5: PMULLW(Pq,Qq);
+ 0x7: PMOVMSKB(Gd,PRq);
+ default: UD2();
}
// repe (0xF3)
0x4: decode OPCODE_OP_BOTTOM3 {
- 0x2: cmpccss_Vd_Wd_Ib();
- default: Inst::UD2();
+ 0x6: MOVQ2DQ(Vo,PRq);
+ default: UD2();
}
// operand size (0x66)
0x1: decode OPCODE_OP_BOTTOM3 {
- 0x2: cmpccpd_Vo_Wo_Ib();
- 0x4: subpd_Vo_Wo();
- 0x5: minpd_Vo_Wo();
- 0x6: divpd_Vo_Wo();
- default: Inst::UD2();
+ 0x0: WarnUnimpl::addsubpd_Vo_Wo();
+ 0x1: PSRLW(Vo,Wo);
+ 0x2: PSRLD(Vo,Wo);
+ 0x3: PSRLQ(Vo,Wo);
+ 0x4: PADDQ(Vo,Wo);
+ 0x5: PMULLW(Vo,Wo);
+ 0x6: MOVQ(Wq,Vq);
+ 0x7: PMOVMSKB(Gd,VRo);
}
// repne (0xF2)
0x8: decode OPCODE_OP_BOTTOM3 {
- 0x2: cmpccsd_Vq_Wq_Ib();
- default: Inst::UD2();
+ 0x0: WarnUnimpl::addsubps_Vo_Wo();
+ 0x6: MOVDQ2Q(Pq,VRq);
+ default: UD2();
}
- default: Inst::UD2();
- }
- }
- 0x19: decode OPSIZE {
- 4: Inst::BSWAP_D(Bd);
- 8: Inst::BSWAP_Q(Bq);
- default: Inst::UD2();
- }
- 0x1A: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x1: psrlw_Pq_Qq();
- 0x2: psrld_Pq_Qq();
- 0x3: psrlq_Pq_Qq();
- 0x4: paddq_Pq_Qq();
- 0x5: pmullw_Pq_Qq();
- 0x7: pmovmskb_Gd_PRq();
- default: Inst::UD2();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x6: movq2dq_Vo_PRq();
- default: Inst::UD2();
+ default: UD2();
}
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: addsubpd_Vo_Wo();
- 0x1: psrlw_Vo_Wo();
- 0x2: psrld_Vo_Wo();
- 0x3: psrlq_Vo_Wo();
- 0x4: paddq_Vo_Wo();
- 0x5: pmullw_Vo_Wo();
- 0x6: decode MODRM_MOD {
- 0x3: movq_Vq_Vq();
- default: movq_Mq_Vq();
+ 0x1B: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSUBUSB(Pq,Qq);
+ 0x1: PSUBUSW(Pq,Qq);
+ 0x2: PMINUB(Pq,Qq);
+ 0x3: PAND(Pq,Qq);
+ 0x4: PADDUSB(Pq,Qq);
+ 0x5: PADDUSW(Pq,Qq);
+ 0x6: PMAXUB(Pq,Qq);
+ 0x7: PANDN(Pq,Qq);
}
- 0x7: pmovmskb_Gd_VRo();
- }
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x0: addsubps_Vo_Wo();
- 0x6: movdq2q_Pq_VRq();
- default: Inst::UD2();
- }
- default: Inst::UD2();
- }
- 0x1B: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: psubusb_Pq_Qq();
- 0x1: psubusw_Pq_Qq();
- 0x2: pminub_Pq_Qq();
- 0x3: pand_Pq_Qq();
- 0x4: paddusb_Pq_Qq();
- 0x5: paddusw_Pq_Qq();
- 0x6: pmaxub_Pq_Qq();
- 0x7: pandn_Pq_Qq();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: psubusb_Vo_Wo();
- 0x1: psubusw_Vo_Wo();
- 0x2: pminub_Vo_Wo();
- 0x3: pand_Vo_Wo();
- 0x4: paddusb_Vo_Wo();
- 0x5: paddusw_Vo_Wo();
- 0x6: pmaxub_Vo_Wo();
- 0x7: pandn_Vo_Wo();
- }
- default: Inst::UD2();
- }
- 0x1C: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: pavgb_Pq_Qq();
- 0x1: psraw_Pq_Qq();
- 0x2: psrad_Pq_Qq();
- 0x3: pavgw_Pq_Qq();
- 0x4: pmulhuw_Pq_Qq();
- 0x5: pmulhw_Pq_Qq();
- 0x7: movntq_Mq_Pq();
- default: Inst::UD2();
- }
- // repe (0xF3)
- 0x4: decode OPCODE_OP_BOTTOM3 {
- 0x6: cvtdq2pd_Vo_Wq();
- default: Inst::UD2();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: pavgb_Vo_Wo();
- 0x1: psraw_Vo_Wo();
- 0x2: psrad_Vo_Wo();
- 0x3: pavgw_Vo_Wo();
- 0x4: pmulhuw_Vo_Wo();
- 0x5: pmulhw_Vo_Wo();
- 0x6: cvttpd2dq_Vo_Wo();
- 0x7: movntdq_Mo_Vo();
- }
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x6: cvtpd2dq_Vo_Wo();
- default: Inst::UD2();
- }
- default: Inst::UD2();
- }
- 0x1D: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: psubsb_Pq_Qq();
- 0x1: psubsw_Pq_Qq();
- 0x2: pminsw_Pq_Qq();
- 0x3: por_Pq_Qq();
- 0x4: paddsb_Pq_Qq();
- 0x5: paddsw_Pq_Qq();
- 0x6: pmaxsw_Pq_Qq();
- 0x7: pxor_Pq_Qq();
- }
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: psubsb_Vo_Wo();
- 0x1: psubsw_Vo_Wo();
- 0x2: pminsw_Vo_Wo();
- 0x3: por_Vo_Wo();
- 0x4: paddsb_Vo_Wo();
- 0x5: paddsw_Vo_Wo();
- 0x6: pmaxsw_Vo_Wo();
- 0x7: pxor_Vo_Wo();
- }
- default: Inst::UD2();
- }
- 0x1E: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x1: psllw_Pq_Qq();
- 0x2: pslld_Pq_Qq();
- 0x3: psllq_Pq_Qq();
- 0x4: pmuludq_Pq_Qq();
- 0x5: pmaddwd_Pq_Qq();
- 0x6: psadbw_Pq_Qq();
- 0x7: maskmovq_Pq_PRq();
- default: Inst::UD2();
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSUBUSB(Vo,Wo);
+ 0x1: PSUBUSW(Vo,Wo);
+ 0x2: PMINUB(Vo,Wo);
+ 0x3: PAND(Vo,Wo);
+ 0x4: PADDUSB(Vo,Wo);
+ 0x5: PADDUSW(Vo,Wo);
+ 0x6: PMAXUB(Vo,Wo);
+ 0x7: PANDN(Vo,Wo);
+ }
+ default: UD2();
}
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x1: psllw_Vo_Wo();
- 0x2: pslld_Vo_Wo();
- 0x3: psllq_Vo_Wo();
- 0x4: pmuludq_Vo_Wo();
- 0x5: pmaddwd_Vo_Wo();
- 0x6: psadbw_Vo_Wo();
- 0x7: maskmovdqu_Vo_VRo();
- default: Inst::UD2();
+ 0x1C: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PAVGB(Pq,Qq);
+ 0x1: PSRAW(Pq,Qq);
+ 0x2: PSRAD(Pq,Qq);
+ 0x3: PAVGW(Pq,Qq);
+ 0x4: PMULHUW(Pq,Qq);
+ 0x5: PMULHW(Pq,Qq);
+ 0x7: WarnUnimpl::movntq_Mq_Pq();
+ default: UD2();
+ }
+ // repe (0xF3)
+ 0x4: decode OPCODE_OP_BOTTOM3 {
+ 0x6: CVTDQ2PD(Vo,Wq);
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PAVGB(Vo,Wo);
+ 0x1: PSRAW(Vo,Wo);
+ 0x2: PSRAD(Vo,Wo);
+ 0x3: PAVGW(Vo,Wo);
+ 0x4: PMULHUW(Vo,Wo);
+ 0x5: PMULHW(Vo,Wo);
+ 0x6: CVTTPD2DQ(Vo,Wo);
+ 0x7: WarnUnimpl::movntdq_Mo_Vo();
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x6: CVTPD2DQ(Vo,Wo);
+ default: UD2();
+ }
+ default: UD2();
}
- // repne (0xF2)
- 0x8: decode OPCODE_OP_BOTTOM3 {
- 0x0: lddqu_Vo_Mo();
- default: Inst::UD2();
+ 0x1D: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSUBSB(Pq,Qq);
+ 0x1: PSUBSW(Pq,Qq);
+ 0x2: PMINSW(Pq,Qq);
+ 0x3: POR(Pq,Qq);
+ 0x4: PADDSB(Pq,Qq);
+ 0x5: PADDSW(Pq,Qq);
+ 0x6: PMAXSW(Pq,Qq);
+ 0x7: PXOR(Pq,Qq);
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSUBSB(Vo,Wo);
+ 0x1: PSUBSW(Vo,Wo);
+ 0x2: PMINSW(Vo,Wo);
+ 0x3: POR(Vo,Wo);
+ 0x4: PADDSB(Vo,Wo);
+ 0x5: PADDSW(Vo,Wo);
+ 0x6: PMAXSW(Vo,Wo);
+ 0x7: PXOR(Vo,Wo);
+ }
+ default: UD2();
}
- default: Inst::UD2();
- }
- 0x1F: decode LEGACY_DECODEVAL {
- // no prefix
- 0x0: decode OPCODE_OP_BOTTOM3 {
- 0x0: psubb_Pq_Qq();
- 0x1: psubw_Pq_Qq();
- 0x2: psubd_Pq_Qq();
- 0x3: psubq_Pq_Qq();
- 0x4: paddb_Pq_Qq();
- 0x5: paddw_Pq_Qq();
- 0x6: paddd_Pq_Qq();
- 0x7: Inst::UD2();
+ 0x1E: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x1: PSLLW(Pq,Qq);
+ 0x2: PSLLD(Pq,Qq);
+ 0x3: PSLLQ(Pq,Qq);
+ 0x4: PMULUDQ(Pq,Qq);
+ 0x5: PMADDWD(Pq,Qq);
+ 0x6: PSADBW(Pq,Qq);
+ 0x7: MASKMOVQ(Pq,PRq);
+ default: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x1: PSLLW(Vo,Wo);
+ 0x2: PSLLD(Vo,Wo);
+ 0x3: PSLLQ(Vo,Wo);
+ 0x4: PMULUDQ(Vo,Wo);
+ 0x5: PMADDWD(Vo,Wo);
+ 0x6: PSADBW(Vo,Wo);
+ 0x7: MASKMOVDQU(Vo,VRo);
+ default: UD2();
+ }
+ // repne (0xF2)
+ 0x8: decode OPCODE_OP_BOTTOM3 {
+ 0x0: WarnUnimpl::lddqu_Vo_Mo();
+ default: UD2();
+ }
+ default: UD2();
}
- // operand size (0x66)
- 0x1: decode OPCODE_OP_BOTTOM3 {
- 0x0: psubb_Vo_Wo();
- 0x1: psubw_Vo_Wo();
- 0x2: psubd_Vo_Wo();
- 0x3: psubq_Vo_Wo();
- 0x4: paddb_Vo_Wo();
- 0x5: paddw_Vo_Wo();
- 0x6: paddd_Vo_Wo();
- 0x7: Inst::UD2();
+ 0x1F: decode LEGACY_DECODEVAL {
+ // no prefix
+ 0x0: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSUBB(Pq,Qq);
+ 0x1: PSUBW(Pq,Qq);
+ 0x2: PSUBD(Pq,Qq);
+ 0x3: PSUBQ(Pq,Qq);
+ 0x4: PADDB(Pq,Qq);
+ 0x5: PADDW(Pq,Qq);
+ 0x6: PADDD(Pq,Qq);
+ 0x7: UD2();
+ }
+ // operand size (0x66)
+ 0x1: decode OPCODE_OP_BOTTOM3 {
+ 0x0: PSUBB(Vo,Wo);
+ 0x1: PSUBW(Vo,Wo);
+ 0x2: PSUBD(Vo,Wo);
+ 0x3: PSUBQ(Vo,Wo);
+ 0x4: PADDB(Vo,Wo);
+ 0x5: PADDW(Vo,Wo);
+ 0x6: PADDD(Vo,Wo);
+ 0x7: UD2();
+ }
+ default: UD2();
}
- default: Inst::UD2();
}
default: FailUnimpl::twoByteOps();
}
diff --git a/src/arch/x86/isa/decoder/x87.isa b/src/arch/x86/isa/decoder/x87.isa
index 9a6473141..cfd69b3ba 100644
--- a/src/arch/x86/isa/decoder/x87.isa
+++ b/src/arch/x86/isa/decoder/x87.isa
@@ -249,8 +249,8 @@ format WarnUnimpl {
0x3: Inst::UD2();
default: fisttp();
}
- 0x2: Inst::FST(Mq);
- 0x3: Inst::FSTP(Mq);
+ 0x2: Inst::FST(Eq);
+ 0x3: Inst::FSTP(Eq);
0x4: decode MODRM_MOD {
0x3: fucom();
default: frstor();
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 78046c0c8..6b1fda93f 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -100,6 +100,7 @@ output header {{
#include "arch/x86/insts/macroop.hh"
#include "arch/x86/insts/microfpop.hh"
#include "arch/x86/insts/microldstop.hh"
+#include "arch/x86/insts/micromediaop.hh"
#include "arch/x86/insts/microregop.hh"
#include "arch/x86/insts/static_inst.hh"
#include "arch/x86/isa_traits.hh"
@@ -155,6 +156,7 @@ output exec {{
#include "arch/x86/miscregs.hh"
#include "arch/x86/tlb.hh"
#include "base/bigint.hh"
+#include "base/condcodes.hh"
#include "cpu/base.hh"
#include "cpu/exetrace.hh"
#include "sim/sim_exit.hh"
diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
index 86f1946ba..800549359 100644
--- a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
@@ -221,62 +221,26 @@ def macroop IMUL_R_P_I
mulel reg
muleh t0
};
+'''
+
+pcRel = '''
+ rdip t7
+ ld %s, seg, riprel, disp
+'''
+sibRel = '''
+ ld %s, seg, sib, disp
+'''
#
# One byte version of unsigned division
#
-def macroop DIV_B_R
-{
- # Do the initial part of the division
- div1 ah, reg, dataSize=1
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t1, rax, 8, dataSize=1
- div2 t1, rax, t1, dataSize=1
-
- #Loop until we're out of bits to shift in
-divLoopTop:
- div2 t1, rax, t1, dataSize=1
- div2 t1, rax, t1, flags=(EZF,), dataSize=1
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq rax, dataSize=1
- divr ah, dataSize=1
-};
-
-def macroop DIV_B_M
+divcode = '''
+def macroop DIV_B_%(suffix)s
{
- ld t2, seg, sib, disp
-
+ %(readOp1)s
# Do the initial part of the division
- div1 ah, t2, dataSize=1
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t1, rax, 8, dataSize=1
- div2 t1, rax, t1, dataSize=1
-
- #Loop until we're out of bits to shift in
-divLoopTop:
- div2 t1, rax, t1, dataSize=1
- div2 t1, rax, t1, flags=(EZF,), dataSize=1
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq rax, dataSize=1
- divr ah, dataSize=1
-};
-
-def macroop DIV_B_P
-{
- rdip t7
- ld t2, seg, riprel, disp
-
- # Do the initial part of the division
- div1 ah, t2, dataSize=1
+ div1 ah, %(op1)s, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
@@ -293,68 +257,18 @@ divLoopTop:
divq rax, dataSize=1
divr ah, dataSize=1
};
+'''
#
# Unsigned division
#
-def macroop DIV_R
-{
- # Do the initial part of the division
- div1 rdx, reg
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t1, rax, "env.dataSize * 8"
- div2 t1, rax, t1
-
- #Loop until we're out of bits to shift in
- #The amount of unrolling here could stand some tuning
-divLoopTop:
- div2 t1, rax, t1
- div2 t1, rax, t1
- div2 t1, rax, t1
- div2 t1, rax, t1, flags=(EZF,)
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq rax
- divr rdx
-};
-
-def macroop DIV_M
-{
- ld t2, seg, sib, disp
-
- # Do the initial part of the division
- div1 rdx, t2
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t1, rax, "env.dataSize * 8"
- div2 t1, rax, t1
-
- #Loop until we're out of bits to shift in
- #The amount of unrolling here could stand some tuning
-divLoopTop:
- div2 t1, rax, t1
- div2 t1, rax, t1
- div2 t1, rax, t1
- div2 t1, rax, t1, flags=(EZF,)
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq rax
- divr rdx
-};
-
-def macroop DIV_P
+divcode += '''
+def macroop DIV_%(suffix)s
{
- rdip t7
- ld t2, seg, riprel, disp
-
+ %(readOp1)s
# Do the initial part of the division
- div1 rdx, t2
+ div1 rdx, %(op1)s
#These are split out so we can initialize the number of bits in the
#second register
@@ -374,12 +288,14 @@ divLoopTop:
divq rax
divr rdx
};
+'''
#
# One byte version of signed division
#
-def macroop IDIV_B_R
+divcode += '''
+def macroop IDIV_B_%(suffix)s
{
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
@@ -387,84 +303,15 @@ def macroop IDIV_B_R
sub t2, t0, ah, dataSize=1
sub t2, t2, t4
- #Find the sign of the divisor
- slli t0, reg, 1, flags=(ECF,), dataSize=1
-
- # Negate divisor
- sub t3, t0, reg, dataSize=1
- # Put the divisor's absolute value into t3
- mov t3, t3, reg, flags=(nCECF,), dataSize=1
-
- #Find the sign of the dividend
- slli t0, ah, 1, flags=(ECF,), dataSize=1
-
- # Put the dividend's absolute value into t1 and t2
- mov t1, t1, rax, flags=(nCECF,), dataSize=1
- mov t2, t2, ah, flags=(nCECF,), dataSize=1
-
- # Do the initial part of the division
- div1 t2, t3, dataSize=1
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t4, t1, 8, dataSize=1
- div2 t4, t1, t4, dataSize=1
-
- #Loop until we're out of bits to shift in
-divLoopTop:
- div2 t4, t1, t4, dataSize=1
- div2 t4, t1, t4, flags=(EZF,), dataSize=1
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq t5, dataSize=1
- divr t6, dataSize=1
-
- # Fix up signs. The sign of the dividend is still lying around in ECF.
- # The sign of the remainder, ah, is the same as the dividend. The sign
- # of the quotient is negated if the signs of the divisor and dividend
- # were different.
-
- # Negate the remainder
- sub t4, t0, t6, dataSize=1
- # If the dividend was negitive, put the negated remainder in ah.
- mov ah, ah, t4, (CECF,), dataSize=1
- # Otherwise put the regular remainder in ah.
- mov ah, ah, t6, (nCECF,), dataSize=1
-
- # Negate the quotient.
- sub t4, t0, t5, dataSize=1
- # If the dividend was negative, start using the negated quotient
- mov t5, t5, t4, (CECF,), dataSize=1
-
- # Check the sign of the divisor
- slli t0, reg, 1, flags=(ECF,), dataSize=1
-
- # Negate the (possibly already negated) quotient
- sub t4, t0, t5, dataSize=1
- # If the divisor was negative, put the negated quotient in rax.
- mov rax, rax, t4, (CECF,), dataSize=1
- # Otherwise put the one that wasn't negated (at least here) in rax.
- mov rax, rax, t5, (nCECF,), dataSize=1
-};
-
-def macroop IDIV_B_M
-{
- # Negate dividend
- sub t1, t0, rax, flags=(ECF,), dataSize=1
- ruflag t4, 3
- sub t2, t0, ah, dataSize=1
- sub t2, t2, t4
-
- ld t8, seg, sib, disp
+ %(readOp1)s
#Find the sign of the divisor
- slli t0, t8, 1, flags=(ECF,), dataSize=1
+ slli t0, %(op1)s, 1, flags=(ECF,), dataSize=1
# Negate divisor
- sub t3, t0, t8, dataSize=1
+ sub t3, t0, %(op1)s, dataSize=1
# Put the divisor's absolute value into t3
- mov t3, t3, t8, flags=(nCECF,), dataSize=1
+ mov t3, t3, %(op1)s, flags=(nCECF,), dataSize=1
#Find the sign of the dividend
slli t0, ah, 1, flags=(ECF,), dataSize=1
@@ -509,79 +356,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
- slli t0, t8, 1, flags=(ECF,), dataSize=1
-
- # Negate the (possibly already negated) quotient
- sub t4, t0, t5, dataSize=1
- # If the divisor was negative, put the negated quotient in rax.
- mov rax, rax, t4, (CECF,), dataSize=1
- # Otherwise put the one that wasn't negated (at least here) in rax.
- mov rax, rax, t5, (nCECF,), dataSize=1
-};
-
-def macroop IDIV_B_P
-{
- # Negate dividend
- sub t1, t0, rax, flags=(ECF,), dataSize=1
- ruflag t4, 3
- sub t2, t0, ah, dataSize=1
- sub t2, t2, t4
-
- rdip t7
- ld t8, seg, riprel, disp
-
- #Find the sign of the divisor
- slli t0, t8, 1, flags=(ECF,), dataSize=1
-
- # Negate divisor
- sub t3, t0, t8, dataSize=1
- # Put the divisor's absolute value into t3
- mov t3, t3, t8, flags=(nCECF,), dataSize=1
-
- #Find the sign of the dividend
- slli t0, ah, 1, flags=(ECF,), dataSize=1
-
- # Put the dividend's absolute value into t1 and t2
- mov t1, t1, rax, flags=(nCECF,), dataSize=1
- mov t2, t2, ah, flags=(nCECF,), dataSize=1
-
- # Do the initial part of the division
- div1 t2, t3, dataSize=1
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t4, t1, 8, dataSize=1
- div2 t4, t1, t4, dataSize=1
-
- #Loop until we're out of bits to shift in
-divLoopTop:
- div2 t4, t1, t4, dataSize=1
- div2 t4, t1, t4, flags=(EZF,), dataSize=1
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq t5, dataSize=1
- divr t6, dataSize=1
-
- # Fix up signs. The sign of the dividend is still lying around in ECF.
- # The sign of the remainder, ah, is the same as the dividend. The sign
- # of the quotient is negated if the signs of the divisor and dividend
- # were different.
-
- # Negate the remainder
- sub t4, t0, t6, dataSize=1
- # If the dividend was negitive, put the negated remainder in ah.
- mov ah, ah, t4, (CECF,), dataSize=1
- # Otherwise put the regular remainder in ah.
- mov ah, ah, t6, (nCECF,), dataSize=1
-
- # Negate the quotient.
- sub t4, t0, t5, dataSize=1
- # If the dividend was negative, start using the negated quotient
- mov t5, t5, t4, (CECF,), dataSize=1
-
- # Check the sign of the divisor
- slli t0, t8, 1, flags=(ECF,), dataSize=1
+ slli t0, %(op1)s, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
@@ -590,12 +365,14 @@ divLoopTop:
# Otherwise put the one that wasn't negated (at least here) in rax.
mov rax, rax, t5, (nCECF,), dataSize=1
};
+'''
#
# Signed division
#
-def macroop IDIV_R
+divcode += '''
+def macroop IDIV_%(suffix)s
{
# Negate dividend
sub t1, t0, rax, flags=(ECF,)
@@ -603,166 +380,17 @@ def macroop IDIV_R
sub t2, t0, rdx
sub t2, t2, t4
- #Find the sign of the divisor
- slli t0, reg, 1, flags=(ECF,)
-
- # Negate divisor
- sub t3, t0, reg
- # Put the divisor's absolute value into t3
- mov t3, t3, reg, flags=(nCECF,)
-
- #Find the sign of the dividend
- slli t0, rdx, 1, flags=(ECF,)
-
- # Put the dividend's absolute value into t1 and t2
- mov t1, t1, rax, flags=(nCECF,)
- mov t2, t2, rdx, flags=(nCECF,)
-
- # Do the initial part of the division
- div1 t2, t3
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t4, t1, "env.dataSize * 8"
- div2 t4, t1, t4
-
- #Loop until we're out of bits to shift in
-divLoopTop:
- div2 t4, t1, t4
- div2 t4, t1, t4
- div2 t4, t1, t4
- div2 t4, t1, t4, flags=(EZF,)
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq t5
- divr t6
-
- # Fix up signs. The sign of the dividend is still lying around in ECF.
- # The sign of the remainder, ah, is the same as the dividend. The sign
- # of the quotient is negated if the signs of the divisor and dividend
- # were different.
-
- # Negate the remainder
- sub t4, t0, t6
- # If the dividend was negitive, put the negated remainder in rdx.
- mov rdx, rdx, t4, (CECF,)
- # Otherwise put the regular remainder in rdx.
- mov rdx, rdx, t6, (nCECF,)
-
- # Negate the quotient.
- sub t4, t0, t5
- # If the dividend was negative, start using the negated quotient
- mov t5, t5, t4, (CECF,)
-
- # Check the sign of the divisor
- slli t0, reg, 1, flags=(ECF,)
-
- # Negate the (possibly already negated) quotient
- sub t4, t0, t5
- # If the divisor was negative, put the negated quotient in rax.
- mov rax, rax, t4, (CECF,)
- # Otherwise put the one that wasn't negated (at least here) in rax.
- mov rax, rax, t5, (nCECF,)
-};
-
-def macroop IDIV_M
-{
- # Negate dividend
- sub t1, t0, rax, flags=(ECF,)
- ruflag t4, 3
- sub t2, t0, rdx
- sub t2, t2, t4
-
- ld t8, seg, sib, disp
-
- #Find the sign of the divisor
- #FIXME!!! This depends on shifts setting the carry flag correctly.
- slli t0, t8, 1, flags=(ECF,)
-
- # Negate divisor
- sub t3, t0, t8
- # Put the divisor's absolute value into t3
- mov t3, t3, t8, flags=(nCECF,)
-
- #Find the sign of the dividend
- #FIXME!!! This depends on shifts setting the carry flag correctly.
- slli t0, rdx, 1, flags=(ECF,)
-
- # Put the dividend's absolute value into t1 and t2
- mov t1, t1, rax, flags=(nCECF,)
- mov t2, t2, rdx, flags=(nCECF,)
-
- # Do the initial part of the division
- div1 t2, t3
-
- #These are split out so we can initialize the number of bits in the
- #second register
- div2i t4, t1, "env.dataSize * 8"
- div2 t4, t1, t4
-
- #Loop until we're out of bits to shift in
-divLoopTop:
- div2 t4, t1, t4
- div2 t4, t1, t4
- div2 t4, t1, t4
- div2 t4, t1, t4, flags=(EZF,)
- br label("divLoopTop"), flags=(nCEZF,)
-
- #Unload the answer
- divq t5
- divr t6
-
- # Fix up signs. The sign of the dividend is still lying around in ECF.
- # The sign of the remainder, ah, is the same as the dividend. The sign
- # of the quotient is negated if the signs of the divisor and dividend
- # were different.
-
- # Negate the remainder
- sub t4, t0, t6
- # If the dividend was negitive, put the negated remainder in rdx.
- mov rdx, rdx, t4, (CECF,)
- # Otherwise put the regular remainder in rdx.
- mov rdx, rdx, t6, (nCECF,)
-
- # Negate the quotient.
- sub t4, t0, t5
- # If the dividend was negative, start using the negated quotient
- mov t5, t5, t4, (CECF,)
-
- # Check the sign of the divisor
- slli t0, t8, 1, flags=(ECF,)
-
- # Negate the (possibly already negated) quotient
- sub t4, t0, t5
- # If the divisor was negative, put the negated quotient in rax.
- mov rax, rax, t4, (CECF,)
- # Otherwise put the one that wasn't negated (at least here) in rax.
- mov rax, rax, t5, (nCECF,)
-};
-
-def macroop IDIV_P
-{
- # Negate dividend
- sub t1, t0, rax, flags=(ECF,)
- ruflag t4, 3
- sub t2, t0, rdx
- sub t2, t2, t4
-
- rdip t7
- ld t8, seg, riprel, disp
+ %(readOp1)s
#Find the sign of the divisor
- #FIXME!!! This depends on shifts setting the carry flag correctly.
- slli t0, t8, 1, flags=(ECF,)
+ slli t0, %(op1)s, 1, flags=(ECF,)
# Negate divisor
- sub t3, t0, t8
+ sub t3, t0, %(op1)s
# Put the divisor's absolute value into t3
- mov t3, t3, t4, flags=(nCECF,)
+ mov t3, t3, %(op1)s, flags=(nCECF,)
#Find the sign of the dividend
- #FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rdx, 1, flags=(ECF,)
# Put the dividend's absolute value into t1 and t2
@@ -807,7 +435,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
- slli t0, t8, 1, flags=(ECF,)
+ slli t0, %(op1)s, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
@@ -817,3 +445,10 @@ divLoopTop:
mov rax, rax, t5, (nCECF,)
};
'''
+
+microcode += divcode % {"suffix": "R",
+ "readOp1": "", "op1": "reg"}
+microcode += divcode % {"suffix": "M",
+ "readOp1": sibRel % "t2", "op1": "t2"}
+microcode += divcode % {"suffix": "P",
+ "readOp1": pcRel % "t2", "op1": "t2"}
diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py
index 560a86e64..7ccdca6c3 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py
@@ -346,10 +346,17 @@ processDescriptor:
wrdl reg, t3, t1
wrsel reg, t1
};
+
+def macroop MOVNTI_M_R {
+ st reg, seg, sib, disp
+};
+
+def macroop MOVNTI_P_R {
+ rdip t7
+ st reg, seg, riprel, disp
+};
'''
#let {{
# class MOVD(Inst):
# "GenFault ${new UnimpInstFault}"
-# class MOVNTI(Inst):
-# "GenFault ${new UnimpInstFault}"
#}};
diff --git a/src/arch/x86/isa/insts/general_purpose/semaphores.py b/src/arch/x86/isa/insts/general_purpose/semaphores.py
index 2bdbd0ada..b3c0d21cb 100644
--- a/src/arch/x86/isa/insts/general_purpose/semaphores.py
+++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py
@@ -98,100 +98,6 @@ def macroop CMPXCHG_LOCKED_P_R {
mov rax, rax, t1, flags=(nCZF,)
};
-def macroop CMPXCHG8B_M {
- lea t1, seg, sib, disp, dataSize=asz
- ldst t2, seg, [1, t0, t1], 0
- ldst t3, seg, [1, t0, t1], dsz
-
- sub t0, rax, t2, flags=(ZF,)
- br label("doneComparing"), flags=(nCZF,)
- sub t0, rdx, t3, flags=(ZF,)
-doneComparing:
-
- # If they're equal, set t3:t2 to rbx:rcx to write to memory
- mov t2, t2, rbx, flags=(CZF,)
- mov t3, t3, rcx, flags=(CZF,)
-
- # If they're not equal, set rdx:rax to the value from memory.
- mov rax, rax, t2, flags=(nCZF,)
- mov rdx, rdx, t3, flags=(nCZF,)
-
- # Write to memory
- st t3, seg, [1, t0, t1], dsz
- st t2, seg, [1, t0, t1], 0
-};
-
-def macroop CMPXCHG8B_P {
- rdip t7
- lea t1, seg, riprel, disp, dataSize=asz
- ldst t2, seg, [1, t0, t1], 0
- ldst t3, seg, [1, t0, t1], dsz
-
- sub t0, rax, t2, flags=(ZF,)
- br label("doneComparing"), flags=(nCZF,)
- sub t0, rdx, t3, flags=(ZF,)
-doneComparing:
-
- # If they're equal, set t3:t2 to rbx:rcx to write to memory
- mov t2, t2, rbx, flags=(CZF,)
- mov t3, t3, rcx, flags=(CZF,)
-
- # If they're not equal, set rdx:rax to the value from memory.
- mov rax, rax, t2, flags=(nCZF,)
- mov rdx, rdx, t3, flags=(nCZF,)
-
- # Write to memory
- st t3, seg, [1, t0, t1], dsz
- st t2, seg, [1, t0, t1], 0
-};
-
-def macroop CMPXCHG8B_LOCKED_M {
- lea t1, seg, sib, disp, dataSize=asz
- ldstl t2, seg, [1, t0, t1], 0
- ldstl t3, seg, [1, t0, t1], dsz
-
- sub t0, rax, t2, flags=(ZF,)
- br label("doneComparing"), flags=(nCZF,)
- sub t0, rdx, t3, flags=(ZF,)
-doneComparing:
-
- # If they're equal, set t3:t2 to rbx:rcx to write to memory
- mov t2, t2, rbx, flags=(CZF,)
- mov t3, t3, rcx, flags=(CZF,)
-
- # If they're not equal, set rdx:rax to the value from memory.
- mov rax, rax, t2, flags=(nCZF,)
- mov rdx, rdx, t3, flags=(nCZF,)
-
- # Write to memory
- stul t3, seg, [1, t0, t1], dsz
- stul t2, seg, [1, t0, t1], 0
-};
-
-def macroop CMPXCHG8B_LOCKED_P {
- rdip t7
- lea t1, seg, riprel, disp, dataSize=asz
- ldstl t2, seg, [1, t0, t1], 0
- ldstl t3, seg, [1, t0, t1], dsz
-
- sub t0, rax, t2, flags=(ZF,)
- br label("doneComparing"), flags=(nCZF,)
- sub t0, rdx, t3, flags=(ZF,)
-doneComparing:
-
- # If they're equal, set t3:t2 to rbx:rcx to write to memory
- mov t2, t2, rbx, flags=(CZF,)
- mov t3, t3, rcx, flags=(CZF,)
-
- # If they're not equal, set rdx:rax to the value from memory.
- mov rax, rax, t2, flags=(nCZF,)
- mov rdx, rdx, t3, flags=(nCZF,)
-
- # Write to memory
- stul t3, seg, [1, t0, t1], dsz
- stul t2, seg, [1, t0, t1], 0
-};
-
def macroop XADD_M_R {
ldst t1, seg, sib, disp
add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
@@ -229,6 +135,46 @@ def macroop XADD_R_R {
};
'''
+
+cmpxchg8bCode = '''
+def macroop CMPXCHG8B_%(suffix)s {
+ %(rdip)s
+ lea t1, seg, %(sib)s, disp, dataSize=asz
+ ldst%(l)s t2, seg, [1, t0, t1], 0
+ ldst%(l)s t3, seg, [1, t0, t1], dsz
+
+ sub t0, rax, t2, flags=(ZF,)
+ br label("doneComparing"), flags=(nCZF,)
+ sub t0, rdx, t3, flags=(ZF,)
+doneComparing:
+
+ # If they're equal, set t3:t2 to rbx:rcx to write to memory
+ mov t2, t2, rbx, flags=(CZF,)
+ mov t3, t3, rcx, flags=(CZF,)
+
+ # If they're not equal, set rdx:rax to the value from memory.
+ mov rax, rax, t2, flags=(nCZF,)
+ mov rdx, rdx, t3, flags=(nCZF,)
+
+ # Write to memory
+ st%(ul)s t3, seg, [1, t0, t1], dsz
+ st%(ul)s t2, seg, [1, t0, t1], 0
+};
+'''
+
+microcode += cmpxchg8bCode % {"rdip": "", "sib": "sib",
+ "l": "", "ul": "",
+ "suffix": "M"}
+microcode += cmpxchg8bCode % {"rdip": "rdip t7", "sib": "riprel",
+ "l": "", "ul": "",
+ "suffix": "P"}
+microcode += cmpxchg8bCode % {"rdip": "", "sib": "sib",
+ "l": "l", "ul": "ul",
+ "suffix": "LOCKED_M"}
+microcode += cmpxchg8bCode % {"rdip": "rdip t7", "sib": "riprel",
+ "l": "l", "ul": "ul",
+ "suffix": "LOCKED_P"}
+
#let {{
# class XCHG(Inst):
# "GenFault ${new UnimpInstFault}"
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py
index c1764ff12..083d8775d 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py
@@ -54,22 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# ADDPS
-# ADDPD
-# ADDSS
+def macroop ADDSS_XMM_XMM {
+ maddf xmml, xmml, xmmlm, size=4, ext=1
+};
+
+def macroop ADDSS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop ADDSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop ADDSD_XMM_XMM {
+ maddf xmml, xmml, xmmlm, size=8, ext=1
+};
+
+def macroop ADDSD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop ADDSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop ADDPS_XMM_XMM {
+ maddf xmml, xmml, xmmlm, size=4, ext=0
+ maddf xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop ADDPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddf xmml, xmml, ufp1, size=4, ext=0
+ maddf xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop ADDPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddf xmml, xmml, ufp1, size=4, ext=0
+ maddf xmmh, xmmh, ufp2, size=4, ext=0
+};
-def macroop ADDSD_R_R {
- addfp xmml, xmml, xmmlm
+def macroop ADDPD_XMM_XMM {
+ maddf xmml, xmml, xmmlm, size=8, ext=0
+ maddf xmmh, xmmh, xmmhm, size=8, ext=0
};
-def macroop ADDSD_R_M {
- ldfp ufp1, seg, sib, disp
- addfp xmml, xmml, ufp1
+def macroop ADDPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddf xmml, xmml, ufp1, size=8, ext=0
+ maddf xmmh, xmmh, ufp2, size=8, ext=0
};
-def macroop ADDSD_R_P {
+def macroop ADDPD_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- addfp xmml, xmml, ufp1
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddf xmml, xmml, ufp1, size=8, ext=0
+ maddf xmmh, xmmh, ufp2, size=8, ext=0
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py
index 31f336696..3e565278c 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py
@@ -54,22 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# DIVPS
-# DIVPD
-# DIVSS
+def macroop DIVSS_XMM_XMM {
+ mdivf xmml, xmml, xmmlm, size=4, ext=1
+};
+
+def macroop DIVSS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mdivf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop DIVSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mdivf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop DIVSD_XMM_XMM {
+ mdivf xmml, xmml, xmmlm, size=8, ext=1
+};
+
+def macroop DIVSD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mdivf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop DIVSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mdivf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop DIVPS_XMM_XMM {
+ mdivf xmml, xmml, xmmlm, size=4, ext=0
+ mdivf xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop DIVPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mdivf xmml, xmml, ufp1, size=4, ext=0
+ mdivf xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop DIVPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mdivf xmml, xmml, ufp1, size=4, ext=0
+ mdivf xmmh, xmmh, ufp2, size=4, ext=0
+};
-def macroop DIVSD_R_R {
- divfp xmml, xmml, xmmlm
+def macroop DIVPD_XMM_XMM {
+ mdivf xmml, xmml, xmmlm, size=8, ext=0
+ mdivf xmmh, xmmh, xmmhm, size=8, ext=0
};
-def macroop DIVSD_R_M {
- ldfp ufp1, seg, sib, disp
- divfp xmml, xmml, ufp1
+def macroop DIVPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mdivf xmml, xmml, ufp1, size=8, ext=0
+ mdivf xmmh, xmmh, ufp2, size=8, ext=0
};
-def macroop DIVSD_R_P {
+def macroop DIVPD_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- divfp xmml, xmml, ufp1
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mdivf xmml, xmml, ufp1, size=8, ext=0
+ mdivf xmmh, xmmh, ufp2, size=8, ext=0
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py
index 1a53eb27f..fc28fbda4 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py
@@ -54,22 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# MULPS
-# MULPD
-# MULSS
+def macroop MULSS_XMM_XMM {
+ mmulf xmml, xmml, xmmlm, size=4, ext=1
+};
+
+def macroop MULSS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmulf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop MULSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmulf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop MULSD_XMM_XMM {
+ mmulf xmml, xmml, xmmlm, size=8, ext=1
+};
+
+def macroop MULSD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmulf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop MULSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmulf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop MULPS_XMM_XMM {
+ mmulf xmml, xmml, xmmlm, size=4, ext=0
+ mmulf xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop MULPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmulf xmml, xmml, ufp1, size=4, ext=0
+ mmulf xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop MULPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmulf xmml, xmml, ufp1, size=4, ext=0
+ mmulf xmmh, xmmh, ufp2, size=4, ext=0
+};
-def macroop MULSD_R_R {
- mulfp xmml, xmml, xmmlm
+def macroop MULPD_XMM_XMM {
+ mmulf xmml, xmml, xmmlm, size=8, ext=0
+ mmulf xmmh, xmmh, xmmhm, size=8, ext=0
};
-def macroop MULSD_R_M {
- ldfp ufp1, seg, sib, disp
- mulfp xmml, xmml, ufp1
+def macroop MULPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmulf xmml, xmml, ufp1, size=8, ext=0
+ mmulf xmmh, xmmh, ufp2, size=8, ext=0
};
-def macroop MULSD_R_P {
+def macroop MULPD_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- mulfp xmml, xmml, ufp1
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmulf xmml, xmml, ufp1, size=8, ext=0
+ mmulf xmmh, xmmh, ufp2, size=8, ext=0
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py
index 4f67aee88..fdeb30ddc 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py
@@ -54,22 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# SQRTPS
-# SQRTPD
-# SQRTSS
+def macroop SQRTSS_XMM_XMM {
+ msqrt xmml, xmmlm, size=4, ext=1
+};
+
+def macroop SQRTSS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msqrt xmml, ufp1, size=4, ext=1
+};
+
+def macroop SQRTSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msqrt xmml, ufp1, size=4, ext=1
+};
+
+def macroop SQRTPS_XMM_XMM {
+ msqrt xmml, xmmlm, size=4, ext=0
+ msqrt xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop SQRTPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msqrt xmml, ufp1, size=4, ext=0
+ msqrt xmmh, ufp2, size=4, ext=0
+};
+
+def macroop SQRTPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msqrt xmml, ufp1, size=4, ext=0
+ msqrt xmmh, ufp2, size=4, ext=0
+};
+
+def macroop SQRTSD_XMM_XMM {
+ msqrt xmml, xmmlm, size=8, ext=1
+};
+
+def macroop SQRTSD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msqrt xmml, ufp1, size=8, ext=1
+};
+
+def macroop SQRTSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msqrt xmml, ufp1, size=8, ext=1
+};
-def macroop SQRTSD_R_R {
- sqrtfp xmml, xmml, xmmlm
+def macroop SQRTPD_XMM_XMM {
+ msqrt xmml, xmmlm, size=8, ext=0
+ msqrt xmmh, xmmhm, size=8, ext=0
};
-def macroop SQRTSD_R_M {
- ldfp ufp1, seg, sib, disp
- sqrtfp xmml, xmml, ufp1
+def macroop SQRTPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msqrt xmml, ufp1, size=8, ext=0
+ msqrt xmmh, ufp2, size=8, ext=0
};
-def macroop SQRTSD_R_P {
+def macroop SQRTPD_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- sqrtfp xmml, xmml, ufp1
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msqrt xmml, ufp1, size=8, ext=0
+ msqrt xmmh, ufp2, size=8, ext=0
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py
index 4f73fa899..378abc070 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py
@@ -54,22 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# SUBPS
-# SUBPD
-# SUBSS
+def macroop SUBSS_XMM_XMM {
+ msubf xmml, xmml, xmmlm, size=4, ext=1
+};
+
+def macroop SUBSS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop SUBSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubf xmml, xmml, ufp1, size=4, ext=1
+};
+
+def macroop SUBSD_XMM_XMM {
+ msubf xmml, xmml, xmmlm, size=8, ext=1
+};
+
+def macroop SUBSD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop SUBSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubf xmml, xmml, ufp1, size=8, ext=1
+};
+
+def macroop SUBPS_XMM_XMM {
+ msubf xmml, xmml, xmmlm, size=4, ext=0
+ msubf xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop SUBPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubf xmml, xmml, ufp1, size=4, ext=0
+ msubf xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop SUBPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubf xmml, xmml, ufp1, size=4, ext=0
+ msubf xmmh, xmmh, ufp2, size=4, ext=0
+};
-def macroop SUBSD_R_R {
- subfp xmml, xmml, xmmlm
+def macroop SUBPD_XMM_XMM {
+ msubf xmml, xmml, xmmlm, size=8, ext=0
+ msubf xmmh, xmmh, xmmhm, size=8, ext=0
};
-def macroop SUBSD_R_M {
- ldfp ufp1, seg, sib, disp
- subfp xmml, xmml, ufp1
+def macroop SUBPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubf xmml, xmml, ufp1, size=8, ext=0
+ msubf xmmh, xmmh, ufp2, size=8, ext=0
};
-def macroop SUBSD_R_P {
+def macroop SUBPD_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- subfp xmml, xmml, ufp1
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubf xmml, xmml, ufp1, size=8, ext=0
+ msubf xmmh, xmmh, ufp2, size=8, ext=0
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py
index 705f64093..09c34600b 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py
@@ -54,8 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# CMPPS
-# CMPPD
-# CMPSS
-# CMPSD
+def macroop CMPPS_XMM_XMM_I {
+ mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE & mask(3)"
+ mcmpf2r xmmh, xmmh, xmmhm, size=4, ext="IMMEDIATE & mask(3)"
+};
+
+def macroop CMPPS_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE & mask(3)"
+ mcmpf2r xmmh, xmmh, ufp2, size=4, ext="IMMEDIATE & mask(3)"
+};
+
+def macroop CMPPS_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE & mask(3)"
+ mcmpf2r xmmh, xmmh, ufp2, size=4, ext="IMMEDIATE & mask(3)"
+};
+
+def macroop CMPPD_XMM_XMM_I {
+ mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE & mask(3)"
+ mcmpf2r xmmh, xmmh, xmmhm, size=8, ext="IMMEDIATE & mask(3)"
+};
+
+def macroop CMPPD_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE & mask(3)"
+ mcmpf2r xmmh, xmmh, ufp2, size=8, ext="IMMEDIATE & mask(3)"
+};
+
+def macroop CMPPD_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE & mask(3)"
+ mcmpf2r xmmh, xmmh, ufp2, size=8, ext="IMMEDIATE & mask(3)"
+};
+
+def macroop CMPSS_XMM_XMM_I {
+ mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE | 0x8"
+};
+
+def macroop CMPSS_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8"
+};
+
+def macroop CMPSS_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8"
+};
+
+def macroop CMPSD_XMM_XMM_I {
+ mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE | 0x8"
+};
+
+def macroop CMPSD_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8"
+};
+
+def macroop CMPSD_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8"
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
index 8ef363333..17c97662c 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
@@ -54,12 +54,143 @@
# Authors: Gabe Black
microcode = '''
-# MAXPS
-# MAXPD
-# MAXSS
-# MAXSD
-# MINPS
-# MINPD
-# MINSS
-# MINSD
+def macroop MINPS_XMM_XMM {
+ mminf xmml, xmml, xmmlm, ext=0, size=4
+ mminf xmmh, xmmh, xmmhm, ext=0, size=4
+};
+
+def macroop MINPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mminf xmml, xmml, ufp1, ext=0, size=4
+ mminf xmmh, xmmh, ufp2, ext=0, size=4
+};
+
+def macroop MINPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mminf xmml, xmml, ufp1, ext=0, size=4
+ mminf xmmh, xmmh, ufp2, ext=0, size=4
+};
+
+def macroop MINPD_XMM_XMM {
+ mminf xmml, xmml, xmmlm, ext=0, size=8
+ mminf xmmh, xmmh, xmmhm, ext=0, size=8
+};
+
+def macroop MINPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mminf xmml, xmml, ufp1, ext=0, size=8
+ mminf xmmh, xmmh, ufp2, ext=0, size=8
+};
+
+def macroop MINPD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mminf xmml, xmml, ufp1, ext=0, size=8
+ mminf xmmh, xmmh, ufp2, ext=0, size=8
+};
+
+def macroop MINSS_XMM_XMM {
+ mminf xmml, xmml, xmmlm, ext=1, size=4
+};
+
+def macroop MINSS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mminf xmml, xmml, ufp1, ext=1, size=4
+};
+
+def macroop MINSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mminf xmml, xmml, ufp1, ext=1, size=4
+};
+
+def macroop MINSD_XMM_XMM {
+ mminf xmml, xmml, xmmlm, ext=1, size=8
+};
+
+def macroop MINSD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mminf xmml, xmml, ufp1, ext=1, size=8
+};
+
+def macroop MINSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mminf xmml, xmml, ufp1, ext=1, size=8
+};
+
+def macroop MAXPS_XMM_XMM {
+ mmaxf xmml, xmml, xmmlm, ext=0, size=4
+ mmaxf xmmh, xmmh, xmmhm, ext=0, size=4
+};
+
+def macroop MAXPS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=0, size=4
+ mmaxf xmmh, xmmh, ufp2, ext=0, size=4
+};
+
+def macroop MAXPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=0, size=4
+ mmaxf xmmh, xmmh, ufp2, ext=0, size=4
+};
+
+def macroop MAXPD_XMM_XMM {
+ mmaxf xmml, xmml, xmmlm, ext=0, size=8
+ mmaxf xmmh, xmmh, xmmhm, ext=0, size=8
+};
+
+def macroop MAXPD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=0, size=8
+ mmaxf xmmh, xmmh, ufp2, ext=0, size=8
+};
+
+def macroop MAXPD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=0, size=8
+ mmaxf xmmh, xmmh, ufp2, ext=0, size=8
+};
+
+def macroop MAXSS_XMM_XMM {
+ mmaxf xmml, xmml, xmmlm, ext=1, size=4
+};
+
+def macroop MAXSS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=1, size=4
+};
+
+def macroop MAXSS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=1, size=4
+};
+
+def macroop MAXSD_XMM_XMM {
+ mmaxf xmml, xmml, xmmlm, ext=1, size=8
+};
+
+def macroop MAXSD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=1, size=8
+};
+
+def macroop MAXSD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mmaxf xmml, xmml, ufp1, ext=1, size=8
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py
index e05018495..50afddf7b 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py
@@ -54,22 +54,63 @@
# Authors: Gabe Black
microcode = '''
-# COMISS
-# COMISD
-# UCOMISS
+def macroop UCOMISS_XMM_XMM {
+ mcmpf2rf xmml, xmmlm, size=4
+};
+
+def macroop UCOMISS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=4
+};
+
+def macroop UCOMISS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=4
+};
+
+def macroop UCOMISD_XMM_XMM {
+ mcmpf2rf xmml, xmmlm, size=8
+};
+
+def macroop UCOMISD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=8
+};
+
+def macroop UCOMISD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=8
+};
+
+def macroop COMISS_XMM_XMM {
+ mcmpf2rf xmml, xmmlm, size=4
+};
+
+def macroop COMISS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=4
+};
+
+def macroop COMISS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=4
+};
-def macroop UCOMISD_R_R {
- compfp xmml, xmmlm
+def macroop COMISD_XMM_XMM {
+ mcmpf2rf xmml, xmmlm, size=8
};
-def macroop UCOMISD_R_M {
- ldfp ufp1, seg, sib, disp
- compfp xmml, ufp1
+def macroop COMISD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=8
};
-def macroop UCOMISD_R_P {
+def macroop COMISD_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- compfp xmml, ufp1
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpf2rf xmml, ufp1, size=8
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py
index 2de33efa2..1c36f7e45 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py
@@ -54,8 +54,74 @@
# Authors: Gabe Black
microcode = '''
-# CVTPS2PD
-# CVTPD2PS
-# CVTSS2SD
-# CVTSD2SS
+def macroop CVTSS2SD_XMM_XMM {
+ cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=1
+};
+
+def macroop CVTSS2SD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1
+};
+
+def macroop CVTSS2SD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1
+};
+
+def macroop CVTSD2SS_XMM_XMM {
+ cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=1
+};
+
+def macroop CVTSD2SS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1
+};
+
+def macroop CVTSD2SS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1
+};
+
+def macroop CVTPS2PD_XMM_XMM {
+ cvtf2f xmmh, xmmlm, destSize=8, srcSize=4, ext=2
+ cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=0
+};
+
+def macroop CVTPS2PD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2f xmmh, ufp1, destSize=8, srcSize=4, ext=2
+ cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=0
+};
+
+def macroop CVTPS2PD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2f xmmh, ufp1, destSize=8, srcSize=4, ext=2
+ cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=0
+};
+
+def macroop CVTPD2PS_XMM_XMM {
+ cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=0
+ cvtf2f xmml, xmmhm, destSize=4, srcSize=8, ext=2
+ lfpimm xmmh, 0
+};
+
+def macroop CVTPD2PS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=0
+ cvtf2f xmml, ufp2, destSize=4, srcSize=8, ext=2
+ lfpimm xmmh, 0
+};
+
+def macroop CVTPD2PS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=0
+ cvtf2f xmml, ufp2, destSize=4, srcSize=8, ext=2
+ lfpimm xmmh, 0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py
index 8d5f4e659..16abd96f4 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py
@@ -54,22 +54,75 @@
# Authors: Gabe Black
microcode = '''
-# CVTSS2SI
-# CVTSD2SI
-# CVTTSS2SI
+def macroop CVTSS2SI_R_XMM {
+ cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=(1 | 4)
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTSS2SI_R_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4)
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTSS2SI_R_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4)
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTSD2SI_R_XMM {
+ cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=(1 | 4)
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTSD2SI_R_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4)
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTSD2SI_R_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4)
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTTSS2SI_R_XMM {
+ cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=1
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTTSS2SI_R_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1
+ mov2int reg, ufp1, size=dsz
+};
+
+def macroop CVTTSS2SI_R_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1
+ mov2int reg, ufp1, size=dsz
+};
-def macroop CVTTSD2SI_R_R {
- cvtf_d2i reg, xmmlm
+def macroop CVTTSD2SI_R_XMM {
+ cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=1
+ mov2int reg, ufp1, size=dsz
};
def macroop CVTTSD2SI_R_M {
- ldfp ufp1, seg, sib, disp
- cvtf_d2i reg, ufp1
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1
+ mov2int reg, ufp1, size=dsz
};
def macroop CVTTSD2SI_R_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- cvtf_d2i reg, ufp1
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1
+ mov2int reg, ufp1, size=dsz
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py
index af579a46f..900e91c99 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py
@@ -54,8 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# CVTPS2PI
-# CVTPD2PI
-# CVTTPS2PI
-# CVTTPD2PI
+def macroop CVTPS2PI_MMX_XMM {
+ cvtf2i mmx, xmmlm, size=4, ext=4
+};
+
+def macroop CVTPS2PI_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2i mmx, ufp1, size=4, ext=4
+};
+
+def macroop CVTPS2PI_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2i mmx, ufp1, size=4, ext=4
+};
+
+def macroop CVTPD2PI_MMX_XMM {
+ cvtf2i mmx, xmmlm, srcSize=8, destSize=4, ext=4
+ cvtf2i mmx, xmmhm, srcSize=8, destSize=4, ext=(4 | 2)
+};
+
+def macroop CVTPD2PI_MMX_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=4
+ cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=(4 | 2)
+};
+
+def macroop CVTPD2PI_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=4
+ cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=(4 | 2)
+};
+
+def macroop CVTTPS2PI_MMX_XMM {
+ cvtf2i mmx, xmmlm, size=4, ext=0
+};
+
+def macroop CVTTPS2PI_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvtf2i mmx, ufp1, size=4, ext=0
+};
+
+def macroop CVTTPS2PI_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvtf2i mmx, ufp1, size=4, ext=0
+};
+
+def macroop CVTTPD2PI_MMX_XMM {
+ cvtf2i mmx, xmmlm, srcSize=8, destSize=4, ext=0
+ cvtf2i mmx, xmmhm, srcSize=8, destSize=4, ext=2
+};
+
+def macroop CVTTPD2PI_MMX_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=0
+ cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=2
+};
+
+def macroop CVTTPD2PI_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=0
+ cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py
index 683e2808d..041f891ef 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py
@@ -54,8 +54,89 @@
# Authors: Gabe Black
microcode = '''
-# CVTPS2DQ
-# CVTPD2DQ
-# CVTTPS2DQ
-# CVTTPD2DQ
+def macroop CVTPS2DQ_XMM_XMM {
+ cvtf2i xmml, xmmlm, size=4, ext=4
+ cvtf2i xmmh, xmmhm, size=4, ext=4
+};
+
+def macroop CVTPS2DQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, size=4, ext=4
+ cvtf2i xmmh, ufp2, size=4, ext=4
+};
+
+def macroop CVTPS2DQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, size=4, ext=4
+ cvtf2i xmmh, ufp2, size=4, ext=4
+};
+
+def macroop CVTPD2DQ_XMM_XMM {
+ cvtf2i xmml, xmmlm, srcSize=8, destSize=4, ext=4
+ cvtf2i xmml, xmmhm, srcSize=8, destSize=4, ext=(4 | 2)
+ lfpimm xmmh, 0
+};
+
+def macroop CVTPD2DQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=4
+ cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=(4 | 2)
+ lfpimm xmmh, 0
+};
+
+def macroop CVTPD2DQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=4
+ cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=(4 | 2)
+ lfpimm xmmh, 0
+};
+
+def macroop CVTTPS2DQ_XMM_XMM {
+ cvtf2i xmml, xmmlm, size=4, ext=0
+ cvtf2i xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop CVTTPS2DQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, size=4, ext=0
+ cvtf2i xmmh, ufp2, size=4, ext=0
+};
+
+def macroop CVTTPS2DQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, size=4, ext=0
+ cvtf2i xmmh, ufp2, size=4, ext=0
+};
+
+def macroop CVTTPD2DQ_XMM_XMM {
+ cvtf2i xmml, xmmlm, srcSize=8, destSize=4, ext=0
+ cvtf2i xmml, xmmhm, srcSize=8, destSize=4, ext=2
+ lfpimm xmmh, 0
+};
+
+def macroop CVTTPD2DQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=0
+ cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=2
+ lfpimm xmmh, 0
+};
+
+def macroop CVTTPD2DQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=0
+ cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=2
+ lfpimm xmmh, 0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py
index 868d72b06..7b09e796a 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py
@@ -54,6 +54,45 @@
# Authors: Gabe Black
microcode = '''
-# SHUFPS
-# SHUFPD
+def macroop SHUFPS_XMM_XMM_I {
+ shuffle ufp1, xmml, xmmh, size=4, ext="IMMEDIATE"
+ shuffle xmmh, xmmlm, xmmhm, size=4, ext="IMMEDIATE >> 4"
+ movfp xmml, ufp1, dataSize=8
+};
+
+def macroop SHUFPS_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmml, xmml, xmmh, size=4, ext="IMMEDIATE"
+ shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4"
+};
+
+def macroop SHUFPS_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmml, xmml, xmmh, size=4, ext="IMMEDIATE"
+ shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4"
+};
+
+def macroop SHUFPD_XMM_XMM_I {
+ shuffle ufp1, xmml, xmmh, size=8, ext="IMMEDIATE"
+ shuffle xmmh, xmmlm, xmmhm, size=8, ext="IMMEDIATE >> 1"
+ movfp xmml, ufp1, dataSize=8
+};
+
+def macroop SHUFPD_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmml, xmml, xmmh, size=8, ext="IMMEDIATE"
+ shuffle xmmh, ufp1, ufp2, size=8, ext="IMMEDIATE >> 1"
+};
+
+def macroop SHUFPD_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmml, xmml, xmmh, size=8, ext="IMMEDIATE"
+ shuffle xmmh, ufp1, ufp2, size=8, ext="IMMEDIATE >> 1"
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py
index 93112f7d6..0bf654b15 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py
@@ -54,8 +54,74 @@
# Authors: Gabe Black
microcode = '''
-# UNPCKHPS
-# UNPCKHPD
-# UNPCKLPS
-# UNPCKLPD
+def macroop UNPCKLPS_XMM_XMM {
+ unpack xmmh, xmml, xmmlm, ext=1, size=4
+ unpack xmml, xmml, xmmlm, ext=0, size=4
+};
+
+def macroop UNPCKLPS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=4
+ unpack xmml, xmml, ufp1, ext=0, size=4
+};
+
+def macroop UNPCKLPS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=4
+ unpack xmml, xmml, ufp1, ext=0, size=4
+};
+
+def macroop UNPCKLPD_XMM_XMM {
+ movfp xmmh, xmmlm
+};
+
+def macroop UNPCKLPD_XMM_M {
+ ldfp xmmh, seg, sib, disp, dataSize=8
+};
+
+def macroop UNPCKLPD_XMM_P {
+ rdip t7
+ ldfp xmmh, seg, riprel, disp, dataSize=8
+};
+
+def macroop UNPCKHPS_XMM_XMM {
+ unpack xmml, xmmh, xmmhm, ext=0, size=4
+ unpack xmmh, xmmh, xmmhm, ext=1, size=4
+};
+
+def macroop UNPCKHPS_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=4
+ unpack xmmh, xmmh, ufp1, ext=1, size=4
+};
+
+def macroop UNPCKHPS_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=4
+ unpack xmmh, xmmh, ufp1, ext=1, size=4
+};
+
+def macroop UNPCKHPD_XMM_XMM {
+ movfp xmml, xmmh
+ movfp xmmh, xmmhm
+};
+
+def macroop UNPCKHPD_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ movfp xmml, xmmh
+ movfp xmmh, ufp1
+};
+
+def macroop UNPCKHPD_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ movfp xmml, xmmh
+ movfp xmmh, ufp1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py
index 76279fc70..1f4044bde 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py
@@ -54,92 +54,248 @@
# Authors: Gabe Black
microcode = '''
-def macroop MOVAPS_R_M {
+def macroop MOVAPS_XMM_M {
# Check low address.
ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
ldfp xmml, seg, sib, disp, dataSize=8
};
-def macroop MOVAPS_R_P {
+def macroop MOVAPS_XMM_P {
rdip t7
# Check low address.
ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
ldfp xmml, seg, riprel, disp, dataSize=8
};
-def macroop MOVAPS_M_R {
+def macroop MOVAPS_M_XMM {
# Check low address.
stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
stfp xmml, seg, sib, disp, dataSize=8
};
-def macroop MOVAPS_P_R {
+def macroop MOVAPS_P_XMM {
rdip t7
# Check low address.
stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
stfp xmml, seg, riprel, disp, dataSize=8
};
-def macroop MOVAPS_R_R {
+def macroop MOVAPS_XMM_XMM {
# Check low address.
movfp xmml, xmmlm, dataSize=8
movfp xmmh, xmmhm, dataSize=8
};
-# MOVAPD
-# MOVUPS
-# MOVUPD
-# MOVHPS
-# MOVHPD
-# MOVLPS
+def macroop MOVAPD_XMM_XMM {
+ movfp xmml, xmmlm, dataSize=8
+ movfp xmmh, xmmhm, dataSize=8
+};
+
+def macroop MOVAPD_XMM_M {
+ ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVAPD_XMM_P {
+ rdip t7
+ ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVAPD_M_XMM {
+ stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVAPD_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPS_XMM_XMM {
+ movfp xmml, xmmlm, dataSize=8
+ movfp xmmh, xmmhm, dataSize=8
+};
+
+def macroop MOVUPS_XMM_M {
+ ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPS_XMM_P {
+ rdip t7
+ ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
-def macroop MOVLPD_R_M {
+def macroop MOVUPS_M_XMM {
+ stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPS_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPD_XMM_XMM {
+ movfp xmml, xmmlm, dataSize=8
+ movfp xmmh, xmmhm, dataSize=8
+};
+
+def macroop MOVUPD_XMM_M {
+ ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPD_XMM_P {
+ rdip t7
+ ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPD_M_XMM {
+ stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVUPD_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPS_XMM_M {
+ ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPS_XMM_P {
+ rdip t7
+ ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPS_M_XMM {
+ stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPS_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPD_XMM_M {
+ ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPD_XMM_P {
+ rdip t7
+ ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPD_M_XMM {
+ stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVHPD_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8
+ stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+};
+
+def macroop MOVLPS_XMM_M {
ldfp xmml, seg, sib, disp, dataSize=8
};
-def macroop MOVLPD_R_P {
+def macroop MOVLPS_XMM_P {
rdip t7
ldfp xmml, seg, riprel, disp, dataSize=8
};
-def macroop MOVLPD_M_R {
+def macroop MOVLPS_M_XMM {
stfp xmml, seg, sib, disp, dataSize=8
};
-def macroop MOVLPD_P_R {
+def macroop MOVLPS_P_XMM {
rdip t7
stfp xmml, seg, riprel, disp, dataSize=8
};
-def macroop MOVLPD_R_R {
- movfp xmml, xmmlm, dataSize=8
+def macroop MOVLPD_XMM_M {
+ ldfp xmml, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVLPD_XMM_P {
+ rdip t7
+ ldfp xmml, seg, riprel, disp, dataSize=8
+};
+
+def macroop MOVLPD_M_XMM {
+ stfp xmml, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVLPD_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, disp, dataSize=8
+};
+
+def macroop MOVHLPS_XMM_XMM {
+ movfp xmml, xmmhm, dataSize=8
};
-# MOVHLPS
-# MOVLHPS
-# MOVSS
+def macroop MOVLHPS_XMM_XMM {
+ movfp xmmh, xmmlm, dataSize=8
+};
+
+def macroop MOVSS_XMM_XMM {
+ movfp xmml, xmmlm, dataSize=4
+};
+
+def macroop MOVSS_XMM_M {
+ lfpimm xmml, 0
+ lfpimm xmmh, 0
+ ldfp xmml, seg, sib, disp, dataSize=4
+};
+
+def macroop MOVSS_XMM_P {
+ rdip t7
+ lfpimm xmml, 0
+ lfpimm xmmh, 0
+ ldfp xmml, seg, riprel, disp, dataSize=4
+};
+
+def macroop MOVSS_M_XMM {
+ stfp xmml, seg, sib, disp, dataSize=4
+};
+
+def macroop MOVSS_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, disp, dataSize=4
+};
-def macroop MOVSD_R_M {
+def macroop MOVSD_XMM_M {
# Zero xmmh
ldfp xmml, seg, sib, disp, dataSize=8
};
-def macroop MOVSD_R_P {
+def macroop MOVSD_XMM_P {
rdip t7
# Zero xmmh
ldfp xmml, seg, riprel, disp, dataSize=8
};
-def macroop MOVSD_M_R {
+def macroop MOVSD_M_XMM {
stfp xmml, seg, sib, disp, dataSize=8
};
-def macroop MOVSD_P_R {
+def macroop MOVSD_P_XMM {
rdip t7
stfp xmml, seg, riprel, disp, dataSize=8
};
-def macroop MOVSD_R_R {
+def macroop MOVSD_XMM_XMM {
movfp xmml, xmmlm, dataSize=8
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py
index abd1c7327..5c5fb81d2 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py
@@ -54,6 +54,15 @@
# Authors: Gabe Black
microcode = '''
-# MOVMSKPS
-# MOVMSKPD
+def macroop MOVMSKPS_R_XMM {
+ limm reg, 0
+ movsign reg, xmmlm, size=4, ext=0
+ movsign reg, xmmhm, size=4, ext=1
+};
+
+def macroop MOVMSKPD_R_XMM {
+ limm reg, 0
+ movsign reg, xmmlm, size=8, ext=0
+ movsign reg, xmmhm, size=8, ext=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py
index 096708393..539edfd74 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py
@@ -54,7 +54,22 @@
# Authors: Gabe Black
microcode = '''
-# MOVDDUP
+def macroop MOVDDUP_XMM_XMM {
+ movfp xmmh, xmmlm, dataSize=8
+ movfp xmml, xmmlm, dataSize=8
+};
+
+def macroop MOVDDUP_XMM_M {
+ ldfp xmml, seg, sib, disp, dataSize=8
+ movfp xmmh, xmml, dataSize=8
+};
+
+def macroop MOVDDUP_XMM_P {
+ rdip t7
+ ldfp xmml, seg, riprel, disp, dataSize=8
+ movfp xmmh, xmml, dataSize=8
+};
+
# MOVSLDUP
# MOVSHDUP
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py
index 8d7d3ba25..e3eaf16a0 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py
@@ -54,8 +54,91 @@
# Authors: Gabe Black
microcode = '''
-# ANDPS
-# ANDPD
-# ANDNPS
-# ANDNPD
+def macroop ANDPS_XMM_XMM {
+ mand xmml, xmml, xmmlm
+ mand xmmh, xmmh, xmmhm
+};
+
+def macroop ANDPS_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mand xmml, xmml, ufp1
+ mand xmmh, xmmh, ufp2
+};
+
+def macroop ANDPS_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mand xmml, xmml, ufp1
+ mand xmmh, xmmh, ufp2
+};
+
+def macroop ANDPD_XMM_XMM {
+ mand xmml, xmml, xmmlm
+ mand xmmh, xmmh, xmmhm
+};
+
+def macroop ANDPD_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mand xmml, xmml, ufp1
+ mand xmmh, xmmh, ufp2
+};
+
+def macroop ANDPD_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mand xmml, xmml, ufp1
+ mand xmmh, xmmh, ufp2
+};
+
+def macroop ANDNPS_XMM_XMM {
+ mandn xmml, xmml, xmmlm
+ mandn xmmh, xmmh, xmmhm
+};
+
+def macroop ANDNPS_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mandn xmml, xmml, ufp1
+ mandn xmmh, xmmh, ufp2
+};
+
+def macroop ANDNPS_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mandn xmml, xmml, ufp1
+ mandn xmmh, xmmh, ufp2
+};
+
+def macroop ANDNPD_XMM_XMM {
+ mandn xmml, xmml, xmmlm
+ mandn xmmh, xmmh, xmmhm
+};
+
+def macroop ANDNPD_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mandn xmml, xmml, ufp1
+ mandn xmmh, xmmh, ufp2
+};
+
+def macroop ANDNPD_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mandn xmml, xmml, ufp1
+ mandn xmmh, xmmh, ufp2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py
index 974771e98..5c20db204 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py
@@ -54,25 +54,43 @@
# Authors: Gabe Black
microcode = '''
-# XORPS
+def macroop XORPD_XMM_XMM {
+ mxor xmml, xmml, xmmlm
+ mxor xmmh, xmmh, xmmhm
+};
+
+def macroop XORPD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mxor xmml, xmml, ufp1
+ mxor xmmh, xmmh, ufp2
+};
+
+def macroop XORPD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mxor xmml, xmml, ufp1
+ mxor xmmh, xmmh, ufp2
+};
-def macroop XORPD_R_R {
- xorfp xmml, xmml, xmmlm
- xorfp xmmh, xmmh, xmmhm
+def macroop XORPS_XMM_XMM {
+ mxor xmml, xmml, xmmlm
+ mxor xmmh, xmmh, xmmhm
};
-def macroop XORPD_R_M {
- ldfp ufp1, seg, sib, disp
- ldfp ufp2, seg, sib, "DISPLACEMENT + 8"
- xorfp xmml, xmml, ufp1
- xorfp xmmh, xmmh, ufp2
+def macroop XORPS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mxor xmml, xmml, ufp1
+ mxor xmmh, xmmh, ufp2
};
-def macroop XORPD_R_P {
+def macroop XORPS_XMM_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
- ldfp ufp2, seg, riprel, "DISPLACEMENT + 8"
- xorfp xmml, xmml, ufp1
- xorfp xmmh, xmmh, ufp2
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mxor xmml, xmml, ufp1
+ mxor xmmh, xmmh, ufp2
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py
index fba71c899..c99a713aa 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py
@@ -54,6 +54,47 @@
# Authors: Gabe Black
microcode = '''
-# ORPS
-# ORPD
+def macroop ORPS_XMM_XMM {
+ mor xmml, xmml, xmmlm
+ mor xmmh, xmmh, xmmhm
+};
+
+def macroop ORPS_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mor xmml, xmml, ufp1
+ mor xmmh, xmmh, ufp2
+};
+
+def macroop ORPS_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mor xmml, xmml, ufp1
+ mor xmmh, xmmh, ufp2
+};
+
+def macroop ORPD_XMM_XMM {
+ mor xmml, xmml, xmmlm
+ mor xmmh, xmmh, xmmhm
+};
+
+def macroop ORPD_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mor xmml, xmml, ufp1
+ mor xmmh, xmmh, ufp2
+};
+
+def macroop ORPD_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mor xmml, xmml, ufp1
+ mor xmmh, xmmh, ufp2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py
index 235a4fc7b..05e2b80d5 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py
@@ -54,12 +54,163 @@
# Authors: Gabe Black
microcode = '''
-# PADDB
-# PADDW
-# PADDD
-# PADDQ
-# PADDSB
-# PADDSW
-# PADDUSB
-# PADDUSW
+def macroop PADDB_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=1, ext=0
+ maddi xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PADDB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=1, ext=0
+ maddi xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PADDB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=1, ext=0
+ maddi xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PADDW_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=2, ext=0
+ maddi xmmh, xmmh, xmmhm, size=2, ext=0
+};
+
+def macroop PADDW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=2, ext=0
+ maddi xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PADDW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=2, ext=0
+ maddi xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PADDD_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=4, ext=0
+ maddi xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop PADDD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=4, ext=0
+ maddi xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PADDD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=4, ext=0
+ maddi xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PADDQ_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=8, ext=0
+ maddi xmmh, xmmh, xmmhm, size=8, ext=0
+};
+
+def macroop PADDQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=8, ext=0
+ maddi xmmh, xmmh, ufp2, size=8, ext=0
+};
+
+def macroop PADDQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=8, ext=0
+ maddi xmmh, xmmh, ufp2, size=8, ext=0
+};
+
+def macroop PADDSB_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=1, ext=4
+ maddi xmmh, xmmh, xmmhm, size=1, ext=4
+};
+
+def macroop PADDSB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=1, ext=4
+ maddi xmmh, xmmh, ufp2, size=1, ext=4
+};
+
+def macroop PADDSB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=1, ext=4
+ maddi xmmh, xmmh, ufp2, size=1, ext=4
+};
+
+def macroop PADDSW_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=2, ext=4
+ maddi xmmh, xmmh, xmmhm, size=2, ext=4
+};
+
+def macroop PADDSW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=2, ext=4
+ maddi xmmh, xmmh, ufp2, size=2, ext=4
+};
+
+def macroop PADDSW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=2, ext=4
+ maddi xmmh, xmmh, ufp2, size=2, ext=4
+};
+
+def macroop PADDUSB_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=1, ext=2
+ maddi xmmh, xmmh, xmmhm, size=1, ext=2
+};
+
+def macroop PADDUSB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=1, ext=2
+ maddi xmmh, xmmh, ufp2, size=1, ext=2
+};
+
+def macroop PADDUSB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=1, ext=2
+ maddi xmmh, xmmh, ufp2, size=1, ext=2
+};
+
+def macroop PADDUSW_XMM_XMM {
+ maddi xmml, xmml, xmmlm, size=2, ext=2
+ maddi xmmh, xmmh, xmmhm, size=2, ext=2
+};
+
+def macroop PADDUSW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=2, ext=2
+ maddi xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PADDUSW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ maddi xmml, xmml, ufp1, size=2, ext=2
+ maddi xmmh, xmmh, ufp2, size=2, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py
index 9bda3371f..017443e55 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py
@@ -54,6 +54,63 @@
# Authors: Gabe Black
microcode = '''
-# PAVGB
-# PAVGW
+def macroop PAVGB_XMM_XMM {
+ mavg xmml, xmml, xmmlm, size=1, ext=0
+ mavg xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PAVGB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mavg xmml, xmml, ufp1, size=1, ext=0
+ mavg xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PAVGB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mavg xmml, xmml, ufp1, size=1, ext=0
+ mavg xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PAVGUSB_XMM_XMM {
+ mavg xmml, xmml, xmmlm, size=1, ext=0
+ mavg xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PAVGUSB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mavg xmml, xmml, ufp1, size=1, ext=0
+ mavg xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PAVGUSB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mavg xmml, xmml, ufp1, size=1, ext=0
+ mavg xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PAVGW_XMM_XMM {
+ mavg xmml, xmml, xmmlm, size=2, ext=0
+ mavg xmmh, xmmh, xmmhm, size=2, ext=0
+};
+
+def macroop PAVGW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mavg xmml, xmml, ufp1, size=2, ext=0
+ mavg xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PAVGW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mavg xmml, xmml, ufp1, size=2, ext=0
+ mavg xmmh, xmmh, ufp2, size=2, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
index 3e7345a9f..a5d90c6b2 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
@@ -54,8 +54,83 @@
# Authors: Gabe Black
microcode = '''
-# PMULHW
-# PMULLW
-# PMULHUW
-# PMULUDQ
+def macroop PMULHW_XMM_XMM {
+ mmuli xmml, xmml, xmmlm, size=2, ext=(0x2 | 0x8)
+ mmuli xmmh, xmmh, xmmhm, size=2, ext=(0x2 | 0x8)
+};
+
+def macroop PMULHW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8)
+ mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8)
+};
+
+def macroop PMULHW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8)
+ mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8)
+};
+
+def macroop PMULLW_XMM_XMM {
+ mmuli xmml, xmml, xmmlm, size=2, ext=2
+ mmuli xmmh, xmmh, xmmhm, size=2, ext=2
+};
+
+def macroop PMULLW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, size=2, ext=2
+ mmuli xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PMULLW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, size=2, ext=2
+ mmuli xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PMULHUW_XMM_XMM {
+ mmuli xmml, xmml, xmmlm, size=2, ext=8
+ mmuli xmmh, xmmh, xmmhm, size=2, ext=8
+};
+
+def macroop PMULHUW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, size=2, ext=8
+ mmuli xmmh, xmmh, ufp2, size=2, ext=8
+};
+
+def macroop PMULHUW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, size=2, ext=8
+ mmuli xmmh, xmmh, ufp2, size=2, ext=8
+};
+
+def macroop PMULUDQ_XMM_XMM {
+ mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=1
+ mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=1
+};
+
+def macroop PMULUDQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1
+ mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1
+};
+
+def macroop PMULUDQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1
+ mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py
index 40b38867b..f157d165f 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py
@@ -54,5 +54,35 @@
# Authors: Gabe Black
microcode = '''
-# PMADDWD
+def macroop PMADDWD_XMM_XMM {
+ mmuli ufp3, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi xmml, ufp3, ufp4, size=4, ext=0
+ mmuli ufp3, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi xmmh, ufp3, ufp4, size=4, ext=0
+};
+
+def macroop PMADDWD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi xmml, ufp3, ufp4, size=4, ext=0
+ mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi xmmh, ufp3, ufp4, size=4, ext=0
+};
+
+def macroop PMADDWD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi xmml, ufp3, ufp4, size=4, ext=0
+ mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi xmmh, ufp3, ufp4, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py
index 44781eb55..fdfb08667 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py
@@ -54,12 +54,163 @@
# Authors: Gabe Black
microcode = '''
-# PSUBB
-# PSUBW
-# PSUBD
-# PSUBQ
-# PSUBSB
-# PSUBSW
-# PSUBUSB
-# PSUBUSW
+def macroop PSUBB_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=1, ext=0
+ msubi xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PSUBB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=1, ext=0
+ msubi xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PSUBB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=1, ext=0
+ msubi xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PSUBW_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=2, ext=0
+ msubi xmmh, xmmh, xmmhm, size=2, ext=0
+};
+
+def macroop PSUBW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=2, ext=0
+ msubi xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PSUBW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=2, ext=0
+ msubi xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PSUBD_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=4, ext=0
+ msubi xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop PSUBD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=4, ext=0
+ msubi xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PSUBD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=4, ext=0
+ msubi xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PSUBQ_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=8, ext=0
+ msubi xmmh, xmmh, xmmhm, size=8, ext=0
+};
+
+def macroop PSUBQ_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=8, ext=0
+ msubi xmmh, xmmh, ufp2, size=8, ext=0
+};
+
+def macroop PSUBQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=8, ext=0
+ msubi xmmh, xmmh, ufp2, size=8, ext=0
+};
+
+def macroop PSUBSB_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=1, ext=4
+ msubi xmmh, xmmh, xmmhm, size=1, ext=4
+};
+
+def macroop PSUBSB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=1, ext=4
+ msubi xmmh, xmmh, ufp2, size=1, ext=4
+};
+
+def macroop PSUBSB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=1, ext=4
+ msubi xmmh, xmmh, ufp2, size=1, ext=4
+};
+
+def macroop PSUBSW_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=2, ext=4
+ msubi xmmh, xmmh, xmmhm, size=2, ext=4
+};
+
+def macroop PSUBSW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=2, ext=4
+ msubi xmmh, xmmh, ufp2, size=2, ext=4
+};
+
+def macroop PSUBSW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=2, ext=4
+ msubi xmmh, xmmh, ufp2, size=2, ext=4
+};
+
+def macroop PSUBUSB_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=1, ext=2
+ msubi xmmh, xmmh, xmmhm, size=1, ext=2
+};
+
+def macroop PSUBUSB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=1, ext=2
+ msubi xmmh, xmmh, ufp2, size=1, ext=2
+};
+
+def macroop PSUBUSB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=1, ext=2
+ msubi xmmh, xmmh, ufp2, size=1, ext=2
+};
+
+def macroop PSUBUSW_XMM_XMM {
+ msubi xmml, xmml, xmmlm, size=2, ext=2
+ msubi xmmh, xmmh, xmmhm, size=2, ext=2
+};
+
+def macroop PSUBUSW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=2, ext=2
+ msubi xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PSUBUSW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msubi xmml, xmml, ufp1, size=2, ext=2
+ msubi xmmh, xmmh, ufp2, size=2, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py
index df1ca2301..ef3758e49 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py
@@ -54,5 +54,23 @@
# Authors: Gabe Black
microcode = '''
-# PSADBW
+def macroop PSADBW_XMM_XMM {
+ msad xmml, xmml, xmmlm, srcSize=1, destSize=2
+ msad xmmh, xmmh, xmmhm, srcSize=1, destSize=2
+};
+
+def macroop PSADBW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ msad xmml, xmml, ufp1, srcSize=1, destSize=2
+ msad xmmh, xmmh, ufp2, srcSize=1, destSize=2
+};
+
+def macroop PSADBW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ msad xmml, xmml, ufp1, srcSize=1, destSize=2
+ msad xmmh, xmmh, ufp2, srcSize=1, destSize=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py
index 59380c6a7..2cb41861c 100644
--- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py
+++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py
@@ -54,10 +54,123 @@
# Authors: Gabe Black
microcode = '''
-# PCMPEQB
-# PCMPEQW
-# PCMPEQD
-# PCMPGTB
-# PCMPGTW
-# PCMPGTD
+def macroop PCMPEQB_XMM_XMM {
+ mcmpi2r xmml, xmml, xmmlm, size=1, ext=0
+ mcmpi2r xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PCMPEQB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=1, ext=0
+ mcmpi2r xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PCMPEQB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=1, ext=0
+ mcmpi2r xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PCMPEQW_XMM_XMM {
+ mcmpi2r xmml, xmml, xmmlm, size=2, ext=0
+ mcmpi2r xmmh, xmmh, xmmhm, size=2, ext=0
+};
+
+def macroop PCMPEQW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=2, ext=0
+ mcmpi2r xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PCMPEQW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=2, ext=0
+ mcmpi2r xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PCMPEQD_XMM_XMM {
+ mcmpi2r xmml, xmml, xmmlm, size=4, ext=0
+ mcmpi2r xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop PCMPEQD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=4, ext=0
+ mcmpi2r xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PCMPEQD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=4, ext=0
+ mcmpi2r xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PCMPGTB_XMM_XMM {
+ mcmpi2r xmml, xmml, xmmlm, size=1, ext=2
+ mcmpi2r xmmh, xmmh, xmmhm, size=1, ext=2
+};
+
+def macroop PCMPGTB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=1, ext=2
+ mcmpi2r xmmh, xmmh, ufp2, size=1, ext=2
+};
+
+def macroop PCMPGTB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=1, ext=2
+ mcmpi2r xmmh, xmmh, ufp2, size=1, ext=2
+};
+
+def macroop PCMPGTW_XMM_XMM {
+ mcmpi2r xmml, xmml, xmmlm, size=2, ext=2
+ mcmpi2r xmmh, xmmh, xmmhm, size=2, ext=2
+};
+
+def macroop PCMPGTW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=2, ext=2
+ mcmpi2r xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PCMPGTW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=2, ext=2
+ mcmpi2r xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PCMPGTD_XMM_XMM {
+ mcmpi2r xmml, xmml, xmmlm, size=4, ext=2
+ mcmpi2r xmmh, xmmh, xmmhm, size=4, ext=2
+};
+
+def macroop PCMPGTD_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=4, ext=2
+ mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2
+};
+
+def macroop PCMPGTD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mcmpi2r xmml, xmml, ufp1, size=4, ext=2
+ mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
index 79b1aca4c..d3bfbb529 100644
--- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
@@ -54,8 +54,83 @@
# Authors: Gabe Black
microcode = '''
-# PMAXUB
-# PMINUB
-# PMAXSW
-# PMINSW
+def macroop PMINUB_XMM_XMM {
+ mmini xmml, xmml, xmmlm, size=1, ext=0
+ mmini xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PMINUB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmini xmml, xmml, ufp1, size=1, ext=0
+ mmini xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PMINUB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmini xmml, xmml, ufp1, size=1, ext=0
+ mmini xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PMINSW_XMM_XMM {
+ mmini xmml, xmml, xmmlm, size=2, ext=2
+ mmini xmmh, xmmh, xmmhm, size=2, ext=2
+};
+
+def macroop PMINSW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmini xmml, xmml, ufp1, size=2, ext=2
+ mmini xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PMINSW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmini xmml, xmml, ufp1, size=2, ext=2
+ mmini xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PMAXUB_XMM_XMM {
+ mmaxi xmml, xmml, xmmlm, size=1, ext=0
+ mmaxi xmmh, xmmh, xmmhm, size=1, ext=0
+};
+
+def macroop PMAXUB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmaxi xmml, xmml, ufp1, size=1, ext=0
+ mmaxi xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PMAXUB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmaxi xmml, xmml, ufp1, size=1, ext=0
+ mmaxi xmmh, xmmh, ufp2, size=1, ext=0
+};
+
+def macroop PMAXSW_XMM_XMM {
+ mmaxi xmml, xmml, xmmlm, size=2, ext=2
+ mmaxi xmmh, xmmh, xmmhm, size=2, ext=2
+};
+
+def macroop PMAXSW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ mmaxi xmml, xmml, ufp1, size=2, ext=2
+ mmaxi xmmh, xmmh, ufp2, size=2, ext=2
+};
+
+def macroop PMAXSW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ mmaxi xmml, xmml, ufp1, size=2, ext=2
+ mmaxi xmmh, xmmh, ufp2, size=2, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py
index b667055dc..8d632a0ac 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py
@@ -54,20 +54,35 @@
# Authors: Gabe Black
microcode = '''
-# CVTSI2SS
+def macroop CVTSI2SS_XMM_R {
+ mov2fp ufp1, regm, destSize=dsz, srcSize=dsz
+ cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1
+};
+
+def macroop CVTSI2SS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1
+};
+
+def macroop CVTSI2SS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1
+};
-def macroop CVTSI2SD_R_R {
- cvtf_i2d xmml, regm
+def macroop CVTSI2SD_XMM_R {
+ mov2fp ufp1, regm, destSize=dsz, srcSize=dsz
+ cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1
};
-def macroop CVTSI2SD_R_M {
- ld t1, seg, sib, disp
- cvtf_i2d xmml, t1
+def macroop CVTSI2SD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1
};
-def macroop CVTSI2SD_R_P {
+def macroop CVTSI2SD_XMM_P {
rdip t7
- ld t1, seg, riprel, disp
- cvtf_i2d xmml, t1
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1
};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py
index 41e8549b5..21cfdcc68 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py
@@ -54,6 +54,41 @@
# Authors: Gabe Black
microcode = '''
-# CVTDQ2PS
-# CVTDQ2PD
+def macroop CVTDQ2PS_XMM_XMM {
+ cvti2f xmml, xmmlm, size=4, ext=0
+ cvti2f xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop CVTDQ2PS_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ cvti2f xmml, ufp1, size=4, ext=0
+ cvti2f xmmh, ufp2, size=4, ext=0
+};
+
+def macroop CVTDQ2PS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ cvti2f xmml, ufp1, size=4, ext=0
+ cvti2f xmmh, ufp2, size=4, ext=0
+};
+
+def macroop CVTDQ2PD_XMM_XMM {
+ cvti2f xmmh, xmmlm, srcSize=4, destSize=8, ext=2
+ cvti2f xmml, xmmlm, srcSize=4, destSize=8, ext=0
+};
+
+def macroop CVTDQ2PD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0
+ cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2
+};
+
+def macroop CVTDQ2PD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0
+ cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py
index aeaea26cd..bc09d8b92 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py
@@ -54,6 +54,36 @@
# Authors: Gabe Black
microcode = '''
-# CVTPI2PS
-# CVTPI2PD
+def macroop CVTPI2PS_XMM_MMX {
+ cvti2f xmml, mmxm, size=4, ext=0
+};
+
+def macroop CVTPI2PS_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvti2f xmml, ufp1, size=4, ext=0
+};
+
+def macroop CVTPI2PS_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvti2f xmml, ufp1, size=4, ext=0
+};
+
+def macroop CVTPI2PD_XMM_MMX {
+ cvti2f xmml, mmxm, srcSize=4, destSize=8, ext=0
+ cvti2f xmmh, mmxm, srcSize=4, destSize=8, ext=2
+};
+
+def macroop CVTPI2PD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0
+ cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2
+};
+
+def macroop CVTPI2PD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0
+ cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py
index 80f7a3e71..f4f06ca67 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py
@@ -54,6 +54,26 @@
# Authors: Gabe Black
microcode = '''
-# PEXTRW
-# PINSRW
+def macroop PEXTRW_R_XMM_I {
+ mov2int reg, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1
+ mov2int reg, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1
+};
+
+def macroop PINSRW_XMM_R_I {
+ mov2fp xmml, regm, "IMMEDIATE & mask(3)", size=2, ext=1
+ mov2fp xmmh, regm, "IMMEDIATE & mask(3)", size=2, ext=1
+};
+
+def macroop PINSRW_XMM_M_I {
+ ld t1, seg, sib, disp, dataSize=2
+ mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1
+ mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1
+};
+
+def macroop PINSRW_XMM_P_I {
+ rdip t7
+ ld t1, seg, riprel, disp, dataSize=2
+ mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1
+ mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
index 859d2bc7e..9112a7382 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
@@ -54,7 +54,66 @@
# Authors: Gabe Black
microcode = '''
-# PACKSSDW
-# PACKSSWB
-# PACKUSWB
+def macroop PACKSSDW_XMM_XMM {
+ pack ufp1, xmml, xmmh, ext=1, srcSize=4, destSize=2
+ pack xmmh, xmmlm, xmmhm, ext=1, srcSize=4, destSize=2
+ movfp xmml, ufp1, dataSize=8
+};
+
+def macroop PACKSSDW_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2
+ pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2
+};
+
+def macroop PACKSSDW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2
+ pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2
+};
+
+def macroop PACKSSWB_XMM_XMM {
+ pack ufp1, xmml, xmmh, ext=1, srcSize=2, destSize=1
+ pack xmmh, xmmlm, xmmhm, ext=1, srcSize=2, destSize=1
+ movfp xmml, ufp1, dataSize=8
+};
+
+def macroop PACKSSWB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1
+ pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1
+};
+
+def macroop PACKSSWB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1
+ pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1
+};
+
+def macroop PACKUSWB_XMM_XMM {
+ pack ufp1, xmml, xmmh, ext=0, srcSize=2, destSize=1
+ pack xmmh, xmmlm, xmmhm, ext=0, srcSize=2, destSize=1
+ movfp xmml, ufp1, dataSize=8
+};
+
+def macroop PACKUSWB_XMM_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1
+ pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1
+};
+
+def macroop PACKUSWB_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1
+ pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
index 9f66c3f05..46cfbce82 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
@@ -54,7 +54,54 @@
# Authors: Gabe Black
microcode = '''
-# PSHUFD
-# PSHUFHW
-# PSHUFLW
+def macroop PSHUFD_XMM_XMM_I {
+ shuffle ufp1, xmmlm, xmmhm, size=4, ext="IMMEDIATE"
+ shuffle xmmh, xmmlm, xmmhm, size=4, ext="IMMEDIATE >> 4"
+ movfp xmml, ufp1, dataSize=8
+};
+
+def macroop PSHUFD_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmml, ufp1, ufp2, size=4, ext="IMMEDIATE"
+ shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4"
+};
+
+def macroop PSHUFD_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmml, ufp1, ufp2, size=4, ext="IMMEDIATE"
+ shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4"
+};
+
+def macroop PSHUFHW_XMM_XMM_I {
+ shuffle xmmh, xmmhm, xmmhm, size=2, ext=imm
+};
+
+def macroop PSHUFHW_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmmh, ufp1, ufp1, size=2, ext=imm
+};
+
+def macroop PSHUFHW_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+ shuffle xmmh, ufp1, ufp1, size=2, ext=imm
+};
+
+def macroop PSHUFLW_XMM_XMM_I {
+ shuffle xmml, xmmlm, xmmlm, size=2, ext=imm
+};
+
+def macroop PSHUFLW_XMM_M_I {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ shuffle xmml, ufp1, ufp1, size=2, ext=imm
+};
+
+def macroop PSHUFLW_XMM_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ shuffle xmml, ufp1, ufp1, size=2, ext=imm
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py
index e8c22ea9f..b4dfb4607 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py
@@ -54,12 +54,150 @@
# Authors: Gabe Black
microcode = '''
-# PUNPCKHBW
-# PUNPCKHWD
-# PUNPCKHDQ
-# PUNPCKHQDQ
-# PUNPCKLBW
-# PUNPCKLWD
-# PUNPCKLDQ
-# PUNPCKLQDQ
+def macroop PUNPCKLBW_XMM_XMM {
+ unpack xmmh, xmml, xmmlm, ext=1, size=1
+ unpack xmml, xmml, xmmlm, ext=0, size=1
+};
+
+def macroop PUNPCKLBW_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=1
+ unpack xmml, xmml, ufp1, ext=0, size=1
+};
+
+def macroop PUNPCKLBW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=1
+ unpack xmml, xmml, ufp1, ext=0, size=1
+};
+
+def macroop PUNPCKLWD_XMM_XMM {
+ unpack xmmh, xmml, xmmlm, ext=1, size=2
+ unpack xmml, xmml, xmmlm, ext=0, size=2
+};
+
+def macroop PUNPCKLWD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=2
+ unpack xmml, xmml, ufp1, ext=0, size=2
+};
+
+def macroop PUNPCKLWD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=2
+ unpack xmml, xmml, ufp1, ext=0, size=2
+};
+
+def macroop PUNPCKLDQ_XMM_XMM {
+ unpack xmmh, xmml, xmmlm, ext=1, size=4
+ unpack xmml, xmml, xmmlm, ext=0, size=4
+};
+
+def macroop PUNPCKLDQ_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=4
+ unpack xmml, xmml, ufp1, ext=0, size=4
+};
+
+def macroop PUNPCKLDQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack xmmh, xmml, ufp1, ext=1, size=4
+ unpack xmml, xmml, ufp1, ext=0, size=4
+};
+
+def macroop PUNPCKHBW_XMM_XMM {
+ unpack xmml, xmmh, xmmhm, ext=0, size=1
+ unpack xmmh, xmmh, xmmhm, ext=1, size=1
+};
+
+def macroop PUNPCKHBW_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=1
+ unpack xmmh, xmmh, ufp1, ext=1, size=1
+};
+
+def macroop PUNPCKHBW_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=1
+ unpack xmmh, xmmh, ufp1, ext=1, size=1
+};
+
+def macroop PUNPCKHWD_XMM_XMM {
+ unpack xmml, xmmh, xmmhm, ext=0, size=2
+ unpack xmmh, xmmh, xmmhm, ext=1, size=2
+};
+
+def macroop PUNPCKHWD_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=2
+ unpack xmmh, xmmh, ufp1, ext=1, size=2
+};
+
+def macroop PUNPCKHWD_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=2
+ unpack xmmh, xmmh, ufp1, ext=1, size=2
+};
+
+def macroop PUNPCKHDQ_XMM_XMM {
+ unpack xmml, xmmh, xmmhm, ext=0, size=4
+ unpack xmmh, xmmh, xmmhm, ext=1, size=4
+};
+
+def macroop PUNPCKHDQ_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=4
+ unpack xmmh, xmmh, ufp1, ext=1, size=4
+};
+
+def macroop PUNPCKHDQ_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ unpack xmml, xmmh, ufp1, ext=0, size=4
+ unpack xmmh, xmmh, ufp1, ext=1, size=4
+};
+
+def macroop PUNPCKHQDQ_XMM_XMM {
+ movfp xmml, xmmh
+ movfp xmmh, xmmhm
+};
+
+def macroop PUNPCKHQDQ_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8
+ movfp xmml, xmmh
+ movfp xmmh, ufp1
+};
+
+def macroop PUNPCKHQDQ_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, riprel, 8, dataSize=8
+ movfp xmml, xmmh
+ movfp xmmh, ufp1
+};
+
+def macroop PUNPCKLQDQ_XMM_XMM {
+ movfp xmmh, xmmlm
+};
+
+def macroop PUNPCKLQDQ_XMM_M {
+ ldfp xmmh, seg, sib, disp, dataSize=8
+};
+
+def macroop PUNPCKLQDQ_XMM_P {
+ rdip t7
+ ldfp xmmh, seg, riprel, disp, dataSize=8
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py
index 4bee18c19..c34bd42bb 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py
@@ -54,11 +54,40 @@
# Authors: Gabe Black
microcode = '''
-# MOVD
-# MOVQ
+def macroop MOVQ_XMM_XMM {
+ movfp xmml, xmmlm
+ lfpimm xmmh, 0
+};
+
+def macroop MOVQ_XMM_M {
+ ldfp xmml, seg, sib, disp, dataSize=8
+ lfpimm xmmh, 0
+};
+
+def macroop MOVQ_XMM_P {
+ rdip t7
+ ldfp xmml, seg, riprel, disp, dataSize=8
+ lfpimm xmmh, 0
+};
+
+def macroop MOVQ_M_XMM {
+ stfp xmml, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVQ_P_XMM {
+ rdip t7
+ stfp xmml, seg, riprel, disp, dataSize=8
+};
+
+def macroop MOVDQ2Q_MMX_XMM {
+ movfp mmx, xmmlm, dataSize=8
+};
+
+def macroop MOVQ2DQ_XMM_MMX {
+ movfp xmml, mmxm, dataSize=8
+ lfpimm xmmh, 0
+};
+'''
# MOVDQA
# MOVDQU
-# MOVDQ2Q
-# MOVQ2DQ
# LDDQU
-'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py
index 558391c6a..0be1229b4 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py
@@ -54,5 +54,9 @@
# Authors: Gabe Black
microcode = '''
-# PMOVMSKB
+def macroop PMOVMSKB_R_XMM {
+ limm reg, 0
+ movsign reg, xmmlm, size=1, ext=0
+ movsign reg, xmmhm, size=1, ext=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py
index 413dddb84..c8df3b403 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py
@@ -55,5 +55,13 @@
microcode = '''
# MOVNTDQ
-# MASKMOVDQU
+
+def macroop MASKMOVDQU_XMM_XMM {
+ ldfp ufp1, ds, [1, t0, rdi], dataSize=8
+ ldfp ufp2, ds, [1, t0, rdi], 8, dataSize=8
+ maskmov ufp1, xmml, xmmlm, size=1
+ maskmov ufp2, xmmh, xmmhm, size=1
+ stfp ufp1, ds, [1, t0, rdi], dataSize=8
+ stfp ufp2, ds, [1, t0, rdi], 8, dataSize=8
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py b/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py
index 336796e23..e72deae0d 100644
--- a/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py
+++ b/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py
@@ -54,5 +54,25 @@
# Authors: Gabe Black
microcode = '''
-# PXOR
+def macroop PXOR_XMM_XMM {
+ mxor xmml, xmml, xmmlm
+ mxor xmmh, xmmh, xmmhm
+};
+
+def macroop PXOR_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mxor xmml, xmml, ufp1
+ mxor xmmh, xmmh, ufp2
+};
+
+def macroop PXOR_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mxor xmml, xmml, ufp1
+ mxor xmmh, xmmh, ufp2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/pand.py b/src/arch/x86/isa/insts/simd128/integer/logical/pand.py
index 055b7c5f6..69699454d 100644
--- a/src/arch/x86/isa/insts/simd128/integer/logical/pand.py
+++ b/src/arch/x86/isa/insts/simd128/integer/logical/pand.py
@@ -54,6 +54,47 @@
# Authors: Gabe Black
microcode = '''
-# PAND
-# PANDN
+def macroop PAND_XMM_XMM {
+ mand xmml, xmml, xmmlm
+ mand xmmh, xmmh, xmmhm
+};
+
+def macroop PAND_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mand xmml, xmml, ufp1
+ mand xmmh, xmmh, ufp2
+};
+
+def macroop PAND_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mand xmml, xmml, ufp1
+ mand xmmh, xmmh, ufp2
+};
+
+def macroop PANDN_XMM_XMM {
+ mandn xmml, xmml, xmmlm
+ mandn xmmh, xmmh, xmmhm
+};
+
+def macroop PANDN_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mandn xmml, xmml, ufp1
+ mandn xmmh, xmmh, ufp2
+};
+
+def macroop PANDN_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mandn xmml, xmml, ufp1
+ mandn xmmh, xmmh, ufp2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/por.py b/src/arch/x86/isa/insts/simd128/integer/logical/por.py
index addb28a60..dc0d7575b 100644
--- a/src/arch/x86/isa/insts/simd128/integer/logical/por.py
+++ b/src/arch/x86/isa/insts/simd128/integer/logical/por.py
@@ -54,5 +54,25 @@
# Authors: Gabe Black
microcode = '''
-# POR
+def macroop POR_XMM_XMM {
+ mor xmml, xmml, xmmlm
+ mor xmmh, xmmh, xmmhm
+};
+
+def macroop POR_XMM_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mor xmml, xmml, ufp1
+ mor xmmh, xmmh, ufp2
+};
+
+def macroop POR_XMM_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldfp ufp1, seg, [1, t0, t1], dataSize=8
+ ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8
+ mor xmml, xmml, ufp1
+ mor xmmh, xmmh, ufp2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py b/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py
index 687391b47..2481b744f 100644
--- a/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py
+++ b/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py
@@ -54,6 +54,25 @@
# Authors: Gabe Black
microcode = '''
-# STMXCSR
-# LDMXCSR
+def macroop STMXCSR_M {
+ rdval t1, "InstRegIndex(MISCREG_MXCSR)"
+ st t1, seg, sib, disp
+};
+
+def macroop STMXCSR_P {
+ rdval t1, "InstRegIndex(MISCREG_MXCSR)"
+ rdip t7
+ st t1, seg, riprel, disp
+};
+
+def macroop LDMXCSR_M {
+ ld t1, seg, sib, disp
+ wrval "InstRegIndex(MISCREG_MXCSR)", t1
+};
+
+def macroop LDMXCSR_P {
+ rdip t7
+ ld t1, seg, riprel, disp
+ wrval "InstRegIndex(MISCREG_MXCSR)", t1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py b/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py
index 18d6feb24..617033bc0 100644
--- a/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py
+++ b/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py
@@ -54,8 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# PSLLW
-# PSLLD
-# PSLLQ
-# PSLLDQ
+def macroop PSLLW_XMM_XMM {
+ msll xmmh, xmmh, xmmlm, size=2, ext=0
+ msll xmml, xmml, xmmlm, size=2, ext=0
+};
+
+def macroop PSLLW_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msll xmml, xmml, ufp1, size=2, ext=0
+ msll xmmh, xmmh, ufp1, size=2, ext=0
+};
+
+def macroop PSLLW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msll xmml, xmml, ufp1, size=2, ext=0
+ msll xmmh, xmmh, ufp1, size=2, ext=0
+};
+
+def macroop PSLLW_XMM_I {
+ mslli xmml, xmml, imm, size=2, ext=0
+ mslli xmmh, xmmh, imm, size=2, ext=0
+};
+
+def macroop PSLLD_XMM_XMM {
+ msll xmmh, xmmh, xmmlm, size=4, ext=0
+ msll xmml, xmml, xmmlm, size=4, ext=0
+};
+
+def macroop PSLLD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msll xmml, xmml, ufp1, size=4, ext=0
+ msll xmmh, xmmh, ufp1, size=4, ext=0
+};
+
+def macroop PSLLD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msll xmml, xmml, ufp1, size=4, ext=0
+ msll xmmh, xmmh, ufp1, size=4, ext=0
+};
+
+def macroop PSLLD_XMM_I {
+ mslli xmml, xmml, imm, size=4, ext=0
+ mslli xmmh, xmmh, imm, size=4, ext=0
+};
+
+def macroop PSLLQ_XMM_XMM {
+ msll xmmh, xmmh, xmmlm, size=8, ext=0
+ msll xmml, xmml, xmmlm, size=8, ext=0
+};
+
+def macroop PSLLQ_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msll xmml, xmml, ufp1, size=8, ext=0
+ msll xmmh, xmmh, ufp1, size=8, ext=0
+};
+
+def macroop PSLLQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msll xmml, xmml, ufp1, size=8, ext=0
+ msll xmmh, xmmh, ufp1, size=8, ext=0
+};
+
+def macroop PSLLQ_XMM_I {
+ mslli xmml, xmml, imm, size=8, ext=0
+ mslli xmmh, xmmh, imm, size=8, ext=0
+};
'''
+# PSLLDQ
diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py b/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py
index 63750e292..b88457a02 100644
--- a/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py
+++ b/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py
@@ -54,6 +54,49 @@
# Authors: Gabe Black
microcode = '''
-# PSRAW
-# PSRAD
+def macroop PSRAW_XMM_XMM {
+ msra xmmh, xmmh, xmmlm, size=2, ext=0
+ msra xmml, xmml, xmmlm, size=2, ext=0
+};
+
+def macroop PSRAW_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msra xmml, xmml, ufp1, size=2, ext=0
+ msra xmmh, xmmh, ufp1, size=2, ext=0
+};
+
+def macroop PSRAW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msra xmml, xmml, ufp1, size=2, ext=0
+ msra xmmh, xmmh, ufp1, size=2, ext=0
+};
+
+def macroop PSRAW_XMM_I {
+ msrai xmml, xmml, imm, size=2, ext=0
+ msrai xmmh, xmmh, imm, size=2, ext=0
+};
+
+def macroop PSRAD_XMM_XMM {
+ msra xmmh, xmmh, xmmlm, size=4, ext=0
+ msra xmml, xmml, xmmlm, size=4, ext=0
+};
+
+def macroop PSRAD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msra xmml, xmml, ufp1, size=4, ext=0
+ msra xmmh, xmmh, ufp1, size=4, ext=0
+};
+
+def macroop PSRAD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msra xmml, xmml, ufp1, size=4, ext=0
+ msra xmmh, xmmh, ufp1, size=4, ext=0
+};
+
+def macroop PSRAD_XMM_I {
+ msrai xmml, xmml, imm, size=4, ext=0
+ msrai xmmh, xmmh, imm, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py b/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py
index fc6fb180b..c904eaf50 100644
--- a/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py
+++ b/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py
@@ -54,8 +54,73 @@
# Authors: Gabe Black
microcode = '''
-# PSRLW
-# PSRLD
-# PSRLQ
-# PSRLDQ
+def macroop PSRLW_XMM_XMM {
+ msrl xmmh, xmmh, xmmlm, size=2, ext=0
+ msrl xmml, xmml, xmmlm, size=2, ext=0
+};
+
+def macroop PSRLW_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msrl xmml, xmml, ufp1, size=2, ext=0
+ msrl xmmh, xmmh, ufp1, size=2, ext=0
+};
+
+def macroop PSRLW_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msrl xmml, xmml, ufp1, size=2, ext=0
+ msrl xmmh, xmmh, ufp1, size=2, ext=0
+};
+
+def macroop PSRLW_XMM_I {
+ msrli xmml, xmml, imm, size=2, ext=0
+ msrli xmmh, xmmh, imm, size=2, ext=0
+};
+
+def macroop PSRLD_XMM_XMM {
+ msrl xmmh, xmmh, xmmlm, size=4, ext=0
+ msrl xmml, xmml, xmmlm, size=4, ext=0
+};
+
+def macroop PSRLD_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msrl xmml, xmml, ufp1, size=4, ext=0
+ msrl xmmh, xmmh, ufp1, size=4, ext=0
+};
+
+def macroop PSRLD_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msrl xmml, xmml, ufp1, size=4, ext=0
+ msrl xmmh, xmmh, ufp1, size=4, ext=0
+};
+
+def macroop PSRLD_XMM_I {
+ msrli xmml, xmml, imm, size=4, ext=0
+ msrli xmmh, xmmh, imm, size=4, ext=0
+};
+
+def macroop PSRLQ_XMM_XMM {
+ msrl xmmh, xmmh, xmmlm, size=8, ext=0
+ msrl xmml, xmml, xmmlm, size=8, ext=0
+};
+
+def macroop PSRLQ_XMM_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msrl xmml, xmml, ufp1, size=8, ext=0
+ msrl xmmh, xmmh, ufp1, size=8, ext=0
+};
+
+def macroop PSRLQ_XMM_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msrl xmml, xmml, ufp1, size=8, ext=0
+ msrl xmmh, xmmh, ufp1, size=8, ext=0
+};
+
+def macroop PSRLQ_XMM_I {
+ msrli xmml, xmml, imm, size=8, ext=0
+ msrli xmmh, xmmh, imm, size=8, ext=0
+};
'''
+# PSRLDQ
diff --git a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py
index 54047ec7c..f970018ac 100644
--- a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py
@@ -54,5 +54,18 @@
# Authors: Gabe Black
microcode = '''
-# PFMUL
+def macroop PFMUL_MMX_MMX {
+ mmulf mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PFMUL_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmulf mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PFMUL_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmulf mmx, mmx, ufp1, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py
index 6e4c1804d..363794411 100644
--- a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py
@@ -54,6 +54,33 @@
# Authors: Gabe Black
microcode = '''
-# PFSUB
-# PFSUBR
+def macroop PFSUB_MMX_MMX {
+ msubf mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PFSUB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubf mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PFSUB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubf mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PFSUBR_MMX_MMX {
+ msubf mmx, mmxm, mmx, size=4, ext=0
+};
+
+def macroop PFSUBR_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubf mmx, ufp1, mmx, size=4, ext=0
+};
+
+def macroop PFSUBR_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubf mmx, ufp1, mmx, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py b/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py
index 6c942065d..1ade48152 100644
--- a/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py
+++ b/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py
@@ -54,10 +54,7 @@
# Authors: Gabe Black
microcode = '''
-# CVTPS2PI
-# CVTTPS2PI
-# CVTPD2PI
-# CVTTPD2PI
+# CVTPS2PI, CVTTPS2PI, CVTPD2PI, and CVTTPD2PI are implemented in simd128
# PF2IW
# PF2ID
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py
index 235a4fc7b..b663d15b7 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py
@@ -54,12 +54,123 @@
# Authors: Gabe Black
microcode = '''
-# PADDB
-# PADDW
-# PADDD
-# PADDQ
-# PADDSB
-# PADDSW
-# PADDUSB
-# PADDUSW
+def macroop PADDB_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=1, ext=0
+};
+
+def macroop PADDB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PADDB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PADDW_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PADDW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PADDW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PADDD_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PADDD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PADDD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PADDQ_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=8, ext=0
+};
+
+def macroop PADDQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PADDQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PADDSB_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=1, ext=4
+};
+
+def macroop PADDSB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=1, ext=4
+};
+
+def macroop PADDSB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=1, ext=4
+};
+
+def macroop PADDSW_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=2, ext=4
+};
+
+def macroop PADDSW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=2, ext=4
+};
+
+def macroop PADDSW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=2, ext=4
+};
+
+def macroop PADDUSB_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=1, ext=2
+};
+
+def macroop PADDUSB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=1, ext=2
+};
+
+def macroop PADDUSB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=1, ext=2
+};
+
+def macroop PADDUSW_MMX_MMX {
+ maddi mmx, mmx, mmxm, size=2, ext=2
+};
+
+def macroop PADDUSW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PADDUSW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ maddi mmx, mmx, ufp1, size=2, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py
index af1b39097..54bc9e53c 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py
@@ -54,7 +54,34 @@
# Authors: Gabe Black
microcode = '''
-# PAVGB
-# PAVGW
+def macroop PAVGB_MMX_MMX {
+ mavg mmx, mmx, mmxm, size=1, ext=0
+};
+
+def macroop PAVGB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mavg mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PAVGB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mavg mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PAVGW_MMX_MMX {
+ mavg mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PAVGW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mavg mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PAVGW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mavg mmx, mmx, ufp1, size=2, ext=0
+};
# PAVGUSB
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py
index 8382bc439..7383a744f 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py
@@ -54,9 +54,78 @@
# Authors: Gabe Black
microcode = '''
-# PMULHW
-# PMULLW
-# PMULHRW
-# PMULHUW
-# PMULUDQ
+def macroop PMULHW_MMX_MMX {
+ mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x8)
+};
+
+def macroop PMULHW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8)
+};
+
+def macroop PMULHW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8)
+};
+
+def macroop PMULLW_MMX_MMX {
+ mmuli mmx, mmx, mmxm, size=2, ext=2
+};
+
+def macroop PMULLW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PMULLW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PMULHRW_MMX_MMX {
+ mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x4 | 0x8)
+};
+
+def macroop PMULHRW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8)
+};
+
+def macroop PMULHRW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8)
+};
+
+def macroop PMULHUW_MMX_MMX {
+ mmuli mmx, mmx, mmxm, size=2, ext=8
+};
+
+def macroop PMULHUW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=8
+};
+
+def macroop PMULHUW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, size=2, ext=8
+};
+
+def macroop PMULUDQ_MMX_MMX {
+ mmuli mmx, mmx, mmxm, srcSize=4, destSize=8, ext=1
+};
+
+def macroop PMULUDQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1
+};
+
+def macroop PMULUDQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py
index 40b38867b..f6940d159 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py
@@ -54,5 +54,24 @@
# Authors: Gabe Black
microcode = '''
-# PMADDWD
+def macroop PMADDWD_MMX_MMX {
+ mmuli ufp3, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi mmx, ufp3, ufp4, size=4, ext=0
+};
+
+def macroop PMADDWD_MMX_M {
+ ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+ mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi mmx, ufp3, ufp4, size=4, ext=0
+};
+
+def macroop PMADDWD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+ mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20)
+ mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10)
+ maddi mmx, ufp3, ufp4, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py
index 44781eb55..a60c0b1a8 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py
@@ -54,12 +54,123 @@
# Authors: Gabe Black
microcode = '''
-# PSUBB
-# PSUBW
-# PSUBD
-# PSUBQ
-# PSUBSB
-# PSUBSW
-# PSUBUSB
-# PSUBUSW
+def macroop PSUBB_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=1, ext=0
+};
+
+def macroop PSUBB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PSUBB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PSUBW_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PSUBW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSUBW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSUBD_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PSUBD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSUBD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSUBQ_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=8, ext=0
+};
+
+def macroop PSUBQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PSUBQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PSUBSB_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=1, ext=4
+};
+
+def macroop PSUBSB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=1, ext=4
+};
+
+def macroop PSUBSB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=1, ext=4
+};
+
+def macroop PSUBSW_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=2, ext=4
+};
+
+def macroop PSUBSW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=2, ext=4
+};
+
+def macroop PSUBSW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=2, ext=4
+};
+
+def macroop PSUBUSB_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=1, ext=2
+};
+
+def macroop PSUBUSB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=1, ext=2
+};
+
+def macroop PSUBUSB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=1, ext=2
+};
+
+def macroop PSUBUSW_MMX_MMX {
+ msubi mmx, mmx, mmxm, size=2, ext=2
+};
+
+def macroop PSUBUSW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PSUBUSW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msubi mmx, mmx, ufp1, size=2, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py
index df1ca2301..8c3c239bd 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py
@@ -54,5 +54,18 @@
# Authors: Gabe Black
microcode = '''
-# PSADBW
+def macroop PSADBW_MMX_MMX {
+ msad mmx, mmx, mmxm, srcSize=1, destSize=2
+};
+
+def macroop PSADBW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msad mmx, mmx, ufp1, srcSize=1, destSize=2
+};
+
+def macroop PSADBW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msad mmx, mmx, ufp1, srcSize=1, destSize=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py
index 59380c6a7..60640f45a 100644
--- a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py
+++ b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py
@@ -54,10 +54,93 @@
# Authors: Gabe Black
microcode = '''
-# PCMPEQB
-# PCMPEQW
-# PCMPEQD
-# PCMPGTB
-# PCMPGTW
-# PCMPGTD
+def macroop PCMPEQB_MMX_MMX {
+ mcmpi2r mmx, mmx, mmxm, size=1, ext=0
+};
+
+def macroop PCMPEQB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PCMPEQB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PCMPEQW_MMX_MMX {
+ mcmpi2r mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PCMPEQW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PCMPEQW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PCMPEQD_MMX_MMX {
+ mcmpi2r mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PCMPEQD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PCMPEQD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PCMPGTB_MMX_MMX {
+ mcmpi2r mmx, mmx, mmxm, size=1, ext=2
+};
+
+def macroop PCMPGTB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=1, ext=2
+};
+
+def macroop PCMPGTB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=1, ext=2
+};
+
+def macroop PCMPGTW_MMX_MMX {
+ mcmpi2r mmx, mmx, mmxm, size=2, ext=2
+};
+
+def macroop PCMPGTW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PCMPGTW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PCMPGTD_MMX_MMX {
+ mcmpi2r mmx, mmx, mmxm, size=4, ext=2
+};
+
+def macroop PCMPGTD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=4, ext=2
+};
+
+def macroop PCMPGTD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mcmpi2r mmx, mmx, ufp1, size=4, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py
index 79b1aca4c..8d8247300 100644
--- a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py
@@ -54,8 +54,63 @@
# Authors: Gabe Black
microcode = '''
-# PMAXUB
-# PMINUB
-# PMAXSW
-# PMINSW
+def macroop PMINUB_MMX_MMX {
+ mmini mmx, mmx, mmxm, size=1, ext=0
+};
+
+def macroop PMINUB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmini mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PMINUB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmini mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PMINSW_MMX_MMX {
+ mmini mmx, mmx, mmxm, size=2, ext=2
+};
+
+def macroop PMINSW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmini mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PMINSW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmini mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PMAXUB_MMX_MMX {
+ mmaxi mmx, mmx, mmxm, size=1, ext=0
+};
+
+def macroop PMAXUB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmaxi mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PMAXUB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmaxi mmx, mmx, ufp1, size=1, ext=0
+};
+
+def macroop PMAXSW_MMX_MMX {
+ mmaxi mmx, mmx, mmxm, size=2, ext=2
+};
+
+def macroop PMAXSW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mmaxi mmx, mmx, ufp1, size=2, ext=2
+};
+
+def macroop PMAXSW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mmaxi mmx, mmx, ufp1, size=2, ext=2
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_conversion.py b/src/arch/x86/isa/insts/simd64/integer/data_conversion.py
index f41049171..5b1c8703a 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_conversion.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_conversion.py
@@ -54,8 +54,7 @@
# Authors: Gabe Black
microcode = '''
-# CVTPI2PS
-# CVTPI2PD
+# CVTPI2PS and CVTPI2PD are implemented in simd128
# PI2FW
# PI2FD
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py
index 80f7a3e71..c9ebbcf14 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py
@@ -54,6 +54,22 @@
# Authors: Gabe Black
microcode = '''
-# PEXTRW
-# PINSRW
+def macroop PEXTRW_R_MMX_I {
+ mov2int reg, mmxm, "IMMEDIATE & mask(2)", size=2, ext=0
+};
+
+def macroop PINSRW_MMX_R_I {
+ mov2fp mmx, regm, "IMMEDIATE & mask(2)", size=2, ext=0
+};
+
+def macroop PINSRW_MMX_M_I {
+ ld t1, seg, sib, disp, dataSize=2
+ mov2fp mmx, t1, "IMMEDIATE & mask(2)", size=2, ext=0
+};
+
+def macroop PINSRW_MMX_P_I {
+ rdip t7
+ ld t1, seg, riprel, disp, dataSize=2
+ mov2fp mmx, t1, "IMMEDIATE & mask(2)", size=2, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py
index 859d2bc7e..4235d7f26 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py
@@ -54,7 +54,48 @@
# Authors: Gabe Black
microcode = '''
-# PACKSSDW
-# PACKSSWB
-# PACKUSWB
+def macroop PACKSSDW_MMX_MMX {
+ pack mmx, mmx, mmxm, ext=1, srcSize=4, destSize=2
+};
+
+def macroop PACKSSDW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2
+};
+
+def macroop PACKSSDW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2
+};
+
+def macroop PACKSSWB_MMX_MMX {
+ pack mmx, mmx, mmxm, ext=1, srcSize=2, destSize=1
+};
+
+def macroop PACKSSWB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1
+};
+
+def macroop PACKSSWB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1
+};
+
+def macroop PACKUSWB_MMX_MMX {
+ pack mmx, mmx, mmxm, ext=0, srcSize=2, destSize=1
+};
+
+def macroop PACKUSWB_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ pack mmx, mmx, ufp1, ext=0, srcSize=2, destSize=1
+};
+
+def macroop PACKUSWB_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ pack mmx, mmx, ufp1, ext=0, srcSize=2, destSize=1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py
index 8f95f0f48..a6dd8748a 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py
@@ -54,6 +54,20 @@
# Authors: Gabe Black
microcode = '''
-# PSHUFW
-# PSWAPD
+def macroop PSHUFW_MMX_MMX_I {
+ shuffle mmx, mmxm, mmxm, size=2, ext=imm
+};
+
+def macroop PSHUFW_MMX_M_I {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ shuffle mmx, ufp1, ufp1, size=2, ext=imm
+};
+
+def macroop PSHUFW_MMX_P_I {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ shuffle mmx, ufp1, ufp1, size=2, ext=imm
+};
+
'''
+# PSWAPD
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py
index 1f4c617dd..5a58beca3 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py
@@ -54,10 +54,93 @@
# Authors: Gabe Black
microcode = '''
-# PUNPCKHBW
-# PUNPCKHWD
-# PUNPCKHDQ
-# PUNPCKLBW
-# PUNPCKLWD
-# PUNPCKLDQ
+def macroop PUNPCKLBW_MMX_MMX {
+ unpack mmx, mmx, mmxm, ext=0, size=1
+};
+
+def macroop PUNPCKLBW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=0, size=1
+};
+
+def macroop PUNPCKLBW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=0, size=1
+};
+
+def macroop PUNPCKLWD_MMX_MMX {
+ unpack mmx, mmx, mmxm, ext=0, size=2
+};
+
+def macroop PUNPCKLWD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=0, size=2
+};
+
+def macroop PUNPCKLWD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=0, size=2
+};
+
+def macroop PUNPCKLDQ_MMX_MMX {
+ unpack mmx, mmx, mmxm, ext=0, size=4
+};
+
+def macroop PUNPCKLDQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=0, size=4
+};
+
+def macroop PUNPCKLDQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=0, size=4
+};
+
+def macroop PUNPCKHBW_MMX_MMX {
+ unpack mmx, mmx, mmxm, ext=1, size=1
+};
+
+def macroop PUNPCKHBW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=1, size=1
+};
+
+def macroop PUNPCKHBW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=1, size=1
+};
+
+def macroop PUNPCKHWD_MMX_MMX {
+ unpack mmx, mmx, mmxm, ext=1, size=2
+};
+
+def macroop PUNPCKHWD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=1, size=2
+};
+
+def macroop PUNPCKHWD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=1, size=2
+};
+
+def macroop PUNPCKHDQ_MMX_MMX {
+ unpack mmx, mmx, mmxm, ext=1, size=4
+};
+
+def macroop PUNPCKHDQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=1, size=4
+};
+
+def macroop PUNPCKHDQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ unpack mmx, mmx, ufp1, ext=1, size=4
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py
index e60273022..f35a1e4c8 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py
@@ -54,8 +54,53 @@
# Authors: Gabe Black
microcode = '''
-# MOVD
-# MOVQ
+def macroop MOVD_MMX_R {
+ mov2fp mmx, regm, srcSize=dsz, destSize=8
+};
+
+def macroop MOVD_MMX_M {
+ ldfp mmx, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVD_MMX_P {
+ rdip t7
+ ldfp mmx, seg, riprel, disp, dataSize=8
+};
+
+def macroop MOVD_R_MMX {
+ mov2int reg, mmxm, size=dsz
+};
+
+def macroop MOVD_M_MMX {
+ stfp mmx, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVD_P_MMX {
+ rdip t7
+ stfp mmx, seg, riprel, disp, dataSize=8
+};
+
+def macroop MOVQ_MMX_MMX {
+ movfp mmx, mmxm
+};
+
+def macroop MOVQ_MMX_M {
+ ldfp mmx, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVQ_MMX_P {
+ rdip t7
+ ldfp mmx, seg, riprel, disp, dataSize=8
+};
+
+def macroop MOVQ_M_MMX {
+ stfp mmx, seg, sib, disp, dataSize=8
+};
+
+def macroop MOVQ_P_MMX {
+ rdip t7
+ stfp mmx, seg, riprel, disp, dataSize=8
+};
+'''
# MOVDQ2Q
# MOVQ2DQ
-'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py
index 558391c6a..11e670b40 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py
@@ -54,5 +54,8 @@
# Authors: Gabe Black
microcode = '''
-# PMOVMSKB
+def macroop PMOVMSKB_R_MMX {
+ limm reg, 0
+ movsign reg, mmxm, size=1, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py
index edd55d35a..f43d75e68 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py
@@ -55,5 +55,10 @@
microcode = '''
# MOVNTQ
-# MASKMOVQ
+
+def macroop MASKMOVQ_MMX_MMX {
+ ldfp ufp1, ds, [1, t0, rdi], dataSize=8
+ maskmov ufp1, mmx, mmxm, size=1
+ stfp ufp1, ds, [1, t0, rdi], dataSize=8
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py b/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py
index 336796e23..3e70093e0 100644
--- a/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py
+++ b/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py
@@ -54,5 +54,18 @@
# Authors: Gabe Black
microcode = '''
-# PXOR
+def macroop PXOR_MMX_MMX {
+ mxor mmx, mmx, mmxm
+};
+
+def macroop PXOR_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mxor mmx, mmx, ufp1
+};
+
+def macroop PXOR_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mxor mmx, mmx, ufp1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/pand.py b/src/arch/x86/isa/insts/simd64/integer/logical/pand.py
index 055b7c5f6..181c26a62 100644
--- a/src/arch/x86/isa/insts/simd64/integer/logical/pand.py
+++ b/src/arch/x86/isa/insts/simd64/integer/logical/pand.py
@@ -54,6 +54,33 @@
# Authors: Gabe Black
microcode = '''
-# PAND
-# PANDN
+def macroop PAND_MMX_MMX {
+ mand mmx, mmx, mmxm
+};
+
+def macroop PAND_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mand mmx, mmx, ufp1
+};
+
+def macroop PAND_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mand mmx, mmx, ufp1
+};
+
+def macroop PANDN_MMX_MMX {
+ mandn mmx, mmx, mmxm
+};
+
+def macroop PANDN_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mandn mmx, mmx, ufp1
+};
+
+def macroop PANDN_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mandn mmx, mmx, ufp1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/por.py b/src/arch/x86/isa/insts/simd64/integer/logical/por.py
index addb28a60..701ac95b5 100644
--- a/src/arch/x86/isa/insts/simd64/integer/logical/por.py
+++ b/src/arch/x86/isa/insts/simd64/integer/logical/por.py
@@ -54,5 +54,18 @@
# Authors: Gabe Black
microcode = '''
-# POR
+def macroop POR_MMX_MMX {
+ mor mmx, mmx, mmxm
+};
+
+def macroop POR_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ mor mmx, mmx, ufp1
+};
+
+def macroop POR_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ mor mmx, mmx, ufp1
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py b/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py
index 4687cab8d..011337ef7 100644
--- a/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py
+++ b/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py
@@ -54,7 +54,60 @@
# Authors: Gabe Black
microcode = '''
-# PSLLW
-# PSLLD
-# PSLLQ
+def macroop PSLLW_MMX_MMX {
+ msll mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PSLLW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msll mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSLLW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msll mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSLLW_MMX_I {
+ mslli mmx, mmx, imm, size=2, ext=0
+};
+
+def macroop PSLLD_MMX_MMX {
+ msll mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PSLLD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msll mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSLLD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msll mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSLLD_MMX_I {
+ mslli mmx, mmx, imm, size=4, ext=0
+};
+
+def macroop PSLLQ_MMX_MMX {
+ msll mmx, mmx, mmxm, size=8, ext=0
+};
+
+def macroop PSLLQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msll mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PSLLQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msll mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PSLLQ_MMX_I {
+ mslli mmx, mmx, imm, size=8, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py b/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py
index 63750e292..951b3ea9f 100644
--- a/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py
+++ b/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py
@@ -54,6 +54,41 @@
# Authors: Gabe Black
microcode = '''
-# PSRAW
-# PSRAD
+def macroop PSRAW_MMX_MMX {
+ msra mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PSRAW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msra mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSRAW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msra mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSRAW_MMX_I {
+ msrai mmx, mmx, imm, size=2, ext=0
+};
+
+def macroop PSRAD_MMX_MMX {
+ msra mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PSRAD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msra mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSRAD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msra mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSRAD_MMX_I {
+ msrai mmx, mmx, imm, size=4, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py b/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py
index 1f870dc32..dc6182de7 100644
--- a/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py
+++ b/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py
@@ -54,7 +54,60 @@
# Authors: Gabe Black
microcode = '''
-# PSRLW
-# PSRLD
-# PSRLQ
+def macroop PSRLW_MMX_MMX {
+ msrl mmx, mmx, mmxm, size=2, ext=0
+};
+
+def macroop PSRLW_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msrl mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSRLW_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msrl mmx, mmx, ufp1, size=2, ext=0
+};
+
+def macroop PSRLW_MMX_I {
+ msrli mmx, mmx, imm, size=2, ext=0
+};
+
+def macroop PSRLD_MMX_MMX {
+ msrl mmx, mmx, mmxm, size=4, ext=0
+};
+
+def macroop PSRLD_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msrl mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSRLD_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msrl mmx, mmx, ufp1, size=4, ext=0
+};
+
+def macroop PSRLD_MMX_I {
+ msrli mmx, mmx, imm, size=4, ext=0
+};
+
+def macroop PSRLQ_MMX_MMX {
+ msrl mmx, mmx, mmxm, size=8, ext=0
+};
+
+def macroop PSRLQ_MMX_M {
+ ldfp ufp1, seg, sib, disp, dataSize=8
+ msrl mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PSRLQ_MMX_P {
+ rdip t7
+ ldfp ufp1, seg, riprel, disp, dataSize=8
+ msrl mmx, mmx, ufp1, size=8, ext=0
+};
+
+def macroop PSRLQ_MMX_I {
+ msrli mmx, mmx, imm, size=8, ext=0
+};
'''
diff --git a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py
index 2a4c3f0ed..4837e1b45 100644
--- a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py
+++ b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py
@@ -65,6 +65,10 @@ def macroop FLD_P {
movfp st(-1), ufp1, spm=-1
};
+def macroop FST_R {
+ movfp sti, st(0)
+};
+
def macroop FST_M {
stfp st(0), seg, sib, disp
};
@@ -74,6 +78,10 @@ def macroop FST_P {
stfp st(0), seg, riprel, disp
};
+def macroop FSTP_R {
+ movfp sti, st(0), spm=1
+};
+
def macroop FSTP_M {
movfp ufp1, st(0), spm=1
stfp ufp1, seg, sib, disp
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index c6f5e9cdd..25b58dfb7 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -112,6 +112,8 @@ let {{
"regm" : regIdx("env.regm"),
"xmmlm" : regIdx("FLOATREG_XMM_LOW(env.regm)"),
"xmmhm" : regIdx("FLOATREG_XMM_HIGH(env.regm)"),
+ "mmx" : regIdx("FLOATREG_MMX(env.reg)"),
+ "mmxm" : regIdx("FLOATREG_MMX(env.regm)"),
"imm" : "adjustedImm",
"disp" : "adjustedDisp",
"seg" : regIdx("env.seg"),
@@ -217,6 +219,8 @@ let {{
return regIdx("NUM_FLOATREGS + (((%s) + 8) %% 8)" % index)
assembler.symbols["st"] = stack_index
+ assembler.symbols["sti"] = stack_index("env.reg")
+ assembler.symbols["stim"] = stack_index("env.regm")
macroopDict = assembler.assemble(microcode)
diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa
index 94c707f73..912aa3511 100644
--- a/src/arch/x86/isa/microops/ldstop.isa
+++ b/src/arch/x86/isa/microops/ldstop.isa
@@ -529,7 +529,8 @@ let {{
dataSize="env.dataSize",
addressSize="env.addressSize", atCPL0=False):
super(CdaOp, self).__init__("InstRegIndex(NUM_INTREGS)", segment,
- addr, disp, dataSize, addressSize, "0", atCPL0, False)
+ addr, disp, dataSize, addressSize, "Request::NO_ACCESS",
+ atCPL0, False)
self.className = "Cda"
self.mnemonic = "cda"
diff --git a/src/arch/x86/isa/microops/limmop.isa b/src/arch/x86/isa/microops/limmop.isa
index f7e7728ab..91ddb1465 100644
--- a/src/arch/x86/isa/microops/limmop.isa
+++ b/src/arch/x86/isa/microops/limmop.isa
@@ -164,6 +164,30 @@ let {{
return allocator
microopClasses["limm"] = LimmOp
+
+ class LfpimmOp(X86Microop):
+ def __init__(self, dest, imm, dataSize="env.dataSize"):
+ self.className = "Lfpimm"
+ self.mnemonic = "lfpimm"
+ self.dest = dest
+ if isinstance(imm, (int, long)):
+ imm = "ULL(%d)" % imm
+ if isinstance(imm, float):
+ imm = "reinterpret_cast<uint64_t>((double)(%d))"
+ self.imm = imm
+ self.dataSize = dataSize
+
+ def getAllocator(self, *microFlags):
+ allocator = '''new %(class_name)s(machInst, macrocodeBlock
+ %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % {
+ "class_name" : self.className,
+ "mnemonic" : self.mnemonic,
+ "flags" : self.microFlagsText(microFlags),
+ "dest" : self.dest, "imm" : self.imm,
+ "dataSize" : self.dataSize}
+ return allocator
+
+ microopClasses["lfpimm"] = LfpimmOp
}};
let {{
@@ -174,4 +198,11 @@ let {{
decoder_output += MicroLimmOpConstructor.subst(iop)
decoder_output += MicroLimmOpDisassembly.subst(iop)
exec_output += MicroLimmOpExecute.subst(iop)
+
+ iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase',
+ {"code" : "FpDestReg.uqw = imm"})
+ header_output += MicroLimmOpDeclare.subst(iop)
+ decoder_output += MicroLimmOpConstructor.subst(iop)
+ decoder_output += MicroLimmOpDisassembly.subst(iop)
+ exec_output += MicroLimmOpExecute.subst(iop)
}};
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa
new file mode 100644
index 000000000..9c53fa0fb
--- /dev/null
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -0,0 +1,1554 @@
+/// Copyright (c) 2009 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+def template MediaOpExecute {{
+ Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+ Trace::InstRecord *traceData) const
+ {
+ Fault fault = NoFault;
+
+ %(op_decl)s;
+ %(op_rd)s;
+
+ %(code)s;
+
+ //Write the resulting state to the execution context
+ if(fault == NoFault)
+ {
+ %(op_wb)s;
+ }
+ return fault;
+ }
+}};
+
+def template MediaOpRegDeclare {{
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ void buildMe();
+
+ public:
+ %(class_name)s(ExtMachInst _machInst,
+ const char * instMnem,
+ bool isMicro, bool isDelayed, bool isFirst, bool isLast,
+ InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
+
+ %(class_name)s(ExtMachInst _machInst,
+ const char * instMnem,
+ InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
+
+ %(BasicExecDeclare)s
+ };
+}};
+
+def template MediaOpImmDeclare {{
+
+ class %(class_name)s : public %(base_class)s
+ {
+ protected:
+ void buildMe();
+
+ public:
+ %(class_name)s(ExtMachInst _machInst,
+ const char * instMnem,
+ bool isMicro, bool isDelayed, bool isFirst, bool isLast,
+ InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
+
+ %(class_name)s(ExtMachInst _machInst,
+ const char * instMnem,
+ InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
+
+ %(BasicExecDeclare)s
+ };
+}};
+
+def template MediaOpRegConstructor {{
+
+ inline void %(class_name)s::buildMe()
+ {
+ %(constructor)s;
+ }
+
+ inline %(class_name)s::%(class_name)s(
+ ExtMachInst machInst, const char * instMnem,
+ InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
+ %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+ false, false, false, false,
+ _src1, _src2, _dest, _srcSize, _destSize, _ext,
+ %(op_class)s)
+ {
+ buildMe();
+ }
+
+ inline %(class_name)s::%(class_name)s(
+ ExtMachInst machInst, const char * instMnem,
+ bool isMicro, bool isDelayed, bool isFirst, bool isLast,
+ InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
+ %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+ isMicro, isDelayed, isFirst, isLast,
+ _src1, _src2, _dest, _srcSize, _destSize, _ext,
+ %(op_class)s)
+ {
+ buildMe();
+ }
+}};
+
+def template MediaOpImmConstructor {{
+
+ inline void %(class_name)s::buildMe()
+ {
+ %(constructor)s;
+ }
+
+ inline %(class_name)s::%(class_name)s(
+ ExtMachInst machInst, const char * instMnem,
+ InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
+ %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+ false, false, false, false,
+ _src1, _imm8, _dest, _srcSize, _destSize, _ext,
+ %(op_class)s)
+ {
+ buildMe();
+ }
+
+ inline %(class_name)s::%(class_name)s(
+ ExtMachInst machInst, const char * instMnem,
+ bool isMicro, bool isDelayed, bool isFirst, bool isLast,
+ InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
+ uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
+ %(base_class)s(machInst, "%(mnemonic)s", instMnem,
+ isMicro, isDelayed, isFirst, isLast,
+ _src1, _imm8, _dest, _srcSize, _destSize, _ext,
+ %(op_class)s)
+ {
+ buildMe();
+ }
+}};
+
+let {{
+ # Make these empty strings so that concatenating onto
+ # them will always work.
+ header_output = ""
+ decoder_output = ""
+ exec_output = ""
+
+ immTemplates = (
+ MediaOpImmDeclare,
+ MediaOpImmConstructor,
+ MediaOpExecute)
+
+ regTemplates = (
+ MediaOpRegDeclare,
+ MediaOpRegConstructor,
+ MediaOpExecute)
+
+ class MediaOpMeta(type):
+ def buildCppClasses(self, name, Name, suffix, code):
+
+ # Globals to stick the output in
+ global header_output
+ global decoder_output
+ global exec_output
+
+ # If op2 is used anywhere, make register and immediate versions
+ # of this code.
+ matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
+ match = matcher.search(code)
+ if match:
+ typeQual = ""
+ if match.group("typeQual"):
+ typeQual = match.group("typeQual")
+ src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
+ self.buildCppClasses(name, Name, suffix,
+ matcher.sub(src2_name, code))
+ self.buildCppClasses(name + "i", Name, suffix + "Imm",
+ matcher.sub("imm8", code))
+ return
+
+ base = "X86ISA::MediaOp"
+
+ # If imm8 shows up in the code, use the immediate templates, if
+ # not, hopefully the register ones will be correct.
+ matcher = re.compile("(?<!\w)imm8(?!\w)")
+ if matcher.search(code):
+ base += "Imm"
+ templates = immTemplates
+ else:
+ base += "Reg"
+ templates = regTemplates
+
+ # Get everything ready for the substitution
+ iop = InstObjParams(name, Name + suffix, base, {"code" : code})
+
+ # Generate the actual code (finally!)
+ header_output += templates[0].subst(iop)
+ decoder_output += templates[1].subst(iop)
+ exec_output += templates[2].subst(iop)
+
+
+ def __new__(mcls, Name, bases, dict):
+ abstract = False
+ name = Name.lower()
+ if "abstract" in dict:
+ abstract = dict['abstract']
+ del dict['abstract']
+
+ cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
+ if not abstract:
+ cls.className = Name
+ cls.base_mnemonic = name
+ code = cls.code
+
+ # Set up the C++ classes
+ mcls.buildCppClasses(cls, name, Name, "", code)
+
+ # Hook into the microassembler dict
+ global microopClasses
+ microopClasses[name] = cls
+
+ # If op2 is used anywhere, make register and immediate versions
+ # of this code.
+ matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
+ if matcher.search(code):
+ microopClasses[name + 'i'] = cls
+ return cls
+
+
+ class MediaOp(X86Microop):
+ __metaclass__ = MediaOpMeta
+ # This class itself doesn't act as a microop
+ abstract = True
+
+ def __init__(self, dest, src1, op2,
+ size = None, destSize = None, srcSize = None, ext = None):
+ self.dest = dest
+ self.src1 = src1
+ self.op2 = op2
+ if size is not None:
+ self.srcSize = size
+ self.destSize = size
+ if srcSize is not None:
+ self.srcSize = srcSize
+ if destSize is not None:
+ self.destSize = destSize
+ if self.srcSize is None:
+ raise Exception, "Source size not set."
+ if self.destSize is None:
+ raise Exception, "Dest size not set."
+ if ext is None:
+ self.ext = 0
+ else:
+ self.ext = ext
+
+ def getAllocator(self, *microFlags):
+ className = self.className
+ if self.mnemonic == self.base_mnemonic + 'i':
+ className += "Imm"
+ allocator = '''new %(class_name)s(machInst, macrocodeBlock
+ %(flags)s, %(src1)s, %(op2)s, %(dest)s,
+ %(srcSize)s, %(destSize)s, %(ext)s)''' % {
+ "class_name" : className,
+ "flags" : self.microFlagsText(microFlags),
+ "src1" : self.src1, "op2" : self.op2,
+ "dest" : self.dest,
+ "srcSize" : self.srcSize,
+ "destSize" : self.destSize,
+ "ext" : self.ext}
+ return allocator
+
+ class Mov2int(MediaOp):
+ def __init__(self, dest, src1, src2 = 0, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Mov2int, self).__init__(dest, src1,\
+ src2, size, destSize, srcSize, ext)
+ code = '''
+ int items = sizeof(FloatRegBits) / srcSize;
+ int offset = imm8;
+ if (bits(src1, 0) && (ext & 0x1))
+ offset -= items;
+ if (offset >= 0 && offset < items) {
+ uint64_t fpSrcReg1 =
+ bits(FpSrcReg1.uqw,
+ (offset + 1) * srcSize * 8 - 1,
+ (offset + 0) * srcSize * 8);
+ DestReg = merge(0, fpSrcReg1, destSize);
+ } else {
+ DestReg = DestReg;
+ }
+ '''
+
+ class Mov2fp(MediaOp):
+ def __init__(self, dest, src1, src2 = 0, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Mov2fp, self).__init__(dest, src1,\
+ src2, size, destSize, srcSize, ext)
+ code = '''
+ int items = sizeof(FloatRegBits) / destSize;
+ int offset = imm8;
+ if (bits(dest, 0) && (ext & 0x1))
+ offset -= items;
+ if (offset >= 0 && offset < items) {
+ uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
+ FpDestReg.uqw =
+ insertBits(FpDestReg.uqw,
+ (offset + 1) * destSize * 8 - 1,
+ (offset + 0) * destSize * 8, srcReg1);
+ } else {
+ FpDestReg.uqw = FpDestReg.uqw;
+ }
+ '''
+
+ class Movsign(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Movsign, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ int items = sizeof(FloatRegBits) / srcSize;
+ uint64_t result = 0;
+ int offset = (ext & 0x1) ? items : 0;
+ for (int i = 0; i < items; i++) {
+ uint64_t picked =
+ bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
+ result = insertBits(result, i + offset, i + offset, picked);
+ }
+ DestReg = DestReg | result;
+ '''
+
+ class Maskmov(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ if (bits(FpSrcReg2.uqw, hiIndex))
+ result = insertBits(result, hiIndex, loIndex, arg1Bits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class shuffle(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = sizeof(FloatRegBits) / size;
+ int options;
+ int optionBits;
+ if (size == 8) {
+ options = 2;
+ optionBits = 1;
+ } else {
+ options = 4;
+ optionBits = 2;
+ }
+
+ uint64_t result = 0;
+ uint8_t sel = ext;
+
+ for (int i = 0; i < items; i++) {
+ uint64_t resBits;
+ uint8_t lsel = sel & mask(optionBits);
+ if (lsel * size >= sizeof(FloatRegBits)) {
+ lsel -= options / 2;
+ resBits = bits(FpSrcReg2.uqw,
+ (lsel + 1) * sizeBits - 1,
+ (lsel + 0) * sizeBits);
+ } else {
+ resBits = bits(FpSrcReg1.uqw,
+ (lsel + 1) * sizeBits - 1,
+ (lsel + 0) * sizeBits);
+ }
+
+ sel >>= optionBits;
+
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Unpack(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = destSize;
+ int items = (sizeof(FloatRegBits) / size) / 2;
+ int offset = ext ? items : 0;
+ uint64_t result = 0;
+ for (int i = 0; i < items; i++) {
+ uint64_t pickedLow =
+ bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
+ (i + offset) * 8 * size);
+ result = insertBits(result,
+ (2 * i + 1) * 8 * size - 1,
+ (2 * i + 0) * 8 * size,
+ pickedLow);
+ uint64_t pickedHigh =
+ bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
+ (i + offset) * 8 * size);
+ result = insertBits(result,
+ (2 * i + 2) * 8 * size - 1,
+ (2 * i + 1) * 8 * size,
+ pickedHigh);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Pack(MediaOp):
+ code = '''
+ assert(srcSize == destSize * 2);
+ int items = (sizeof(FloatRegBits) / destSize);
+ int destBits = destSize * 8;
+ int srcBits = srcSize * 8;
+ uint64_t result = 0;
+ int i;
+ for (i = 0; i < items / 2; i++) {
+ uint64_t picked =
+ bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
+ (i + 0) * srcBits);
+ unsigned signBit = bits(picked, srcBits - 1);
+ uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
+
+ // Handle saturation.
+ if (signBit) {
+ if (overflow != mask(destBits - srcBits + 1)) {
+ if (ext & 0x1)
+ picked = (1 << (destBits - 1));
+ else
+ picked = 0;
+ }
+ } else {
+ if (overflow != 0) {
+ if (ext & 0x1)
+ picked = mask(destBits - 1);
+ else
+ picked = mask(destBits);
+ }
+ }
+ result = insertBits(result,
+ (i + 1) * destBits - 1,
+ (i + 0) * destBits,
+ picked);
+ }
+ for (;i < items; i++) {
+ uint64_t picked =
+ bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
+ (i - items + 0) * srcBits);
+ unsigned signBit = bits(picked, srcBits - 1);
+ uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
+
+ // Handle saturation.
+ if (signBit) {
+ if (overflow != mask(destBits - srcBits + 1)) {
+ if (ext & 0x1)
+ picked = (1 << (destBits - 1));
+ else
+ picked = 0;
+ }
+ } else {
+ if (overflow != 0) {
+ if (ext & 0x1)
+ picked = mask(destBits - 1);
+ else
+ picked = mask(destBits);
+ }
+ }
+ result = insertBits(result,
+ (i + 1) * destBits - 1,
+ (i + 0) * destBits,
+ picked);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mxor(MediaOp):
+ def __init__(self, dest, src1, src2):
+ super(Mxor, self).__init__(dest, src1, src2, 1)
+ code = '''
+ FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
+ '''
+
+ class Mor(MediaOp):
+ def __init__(self, dest, src1, src2):
+ super(Mor, self).__init__(dest, src1, src2, 1)
+ code = '''
+ FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
+ '''
+
+ class Mand(MediaOp):
+ def __init__(self, dest, src1, src2):
+ super(Mand, self).__init__(dest, src1, src2, 1)
+ code = '''
+ FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
+ '''
+
+ class Mandn(MediaOp):
+ def __init__(self, dest, src1, src2):
+ super(Mandn, self).__init__(dest, src1, src2, 1)
+ code = '''
+ FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
+ '''
+
+ class Mminf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ double arg1, arg2;
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ if (arg1 < arg2) {
+ result = insertBits(result, hiIndex, loIndex, arg1Bits);
+ } else {
+ result = insertBits(result, hiIndex, loIndex, arg2Bits);
+ }
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmaxf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ double arg1, arg2;
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ if (arg1 > arg2) {
+ result = insertBits(result, hiIndex, loIndex, arg1Bits);
+ } else {
+ result = insertBits(result, hiIndex, loIndex, arg2Bits);
+ }
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmini(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (1 << (sizeBits - 1))));
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (1 << (sizeBits - 1))));
+ uint64_t resBits;
+
+ if (ext & 0x2) {
+ if (arg1 < arg2) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ } else {
+ if (arg1Bits < arg2Bits) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ }
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmaxi(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (1 << (sizeBits - 1))));
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (1 << (sizeBits - 1))));
+ uint64_t resBits;
+
+ if (ext & 0x2) {
+ if (arg1 > arg2) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ } else {
+ if (arg1Bits > arg2Bits) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ }
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msqrt(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Msqrt, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = argBits;
+ fi.f = sqrt(fi.f);
+ argBits = fi.i;
+ } else {
+ doubleInt di;
+ di.i = argBits;
+ di.d = sqrt(di.d);
+ argBits = di.i;
+ }
+ result = insertBits(result, hiIndex, loIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Maddf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f + arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d + arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msubf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f - arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d - arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmulf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f * arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d * arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mdivf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f / arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d / arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Maddi(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits = arg1Bits + arg2Bits;
+
+ if (ext & 0x2) {
+ if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
+ resBits = mask(sizeBits);
+ } else if (ext & 0x4) {
+ int arg1Sign = bits(arg1Bits, sizeBits - 1);
+ int arg2Sign = bits(arg2Bits, sizeBits - 1);
+ int resSign = bits(resBits, sizeBits - 1);
+ if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
+ if (resSign == 0)
+ resBits = (1 << (sizeBits - 1));
+ else
+ resBits = mask(sizeBits - 1);
+ }
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msubi(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits = arg1Bits - arg2Bits;
+
+ if (ext & 0x2) {
+ if (arg2Bits > arg1Bits) {
+ resBits = 0;
+ } else if (!findCarry(sizeBits, resBits,
+ arg1Bits, ~arg2Bits)) {
+ resBits = mask(sizeBits);
+ }
+ } else if (ext & 0x4) {
+ int arg1Sign = bits(arg1Bits, sizeBits - 1);
+ int arg2Sign = !bits(arg2Bits, sizeBits - 1);
+ int resSign = bits(resBits, sizeBits - 1);
+ if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
+ if (resSign == 0)
+ resBits = (1 << (sizeBits - 1));
+ else
+ resBits = mask(sizeBits - 1);
+ }
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmuli(MediaOp):
+ code = '''
+ int srcBits = srcSize * 8;
+ int destBits = destSize * 8;
+ assert(destBits <= 64);
+ assert(destSize >= srcSize);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int offset = 0;
+ if (ext & 16) {
+ if (ext & 32)
+ offset = i * (destBits - srcBits);
+ else
+ offset = i * (destBits - srcBits) + srcBits;
+ }
+ int srcHiIndex = (i + 1) * srcBits - 1 + offset;
+ int srcLoIndex = (i + 0) * srcBits + offset;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
+ uint64_t resBits;
+
+ if (ext & 0x2) {
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (1 << (srcBits - 1))));
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (1 << (srcBits - 1))));
+ resBits = (uint64_t)(arg1 * arg2);
+ } else {
+ resBits = arg1Bits * arg2Bits;
+ }
+
+ if (ext & 0x4)
+ resBits += (1 << (destBits - 1));
+
+ if (ext & 0x8)
+ resBits >>= destBits;
+
+ int destHiIndex = (i + 1) * destBits - 1;
+ int destLoIndex = (i + 0) * destBits;
+ result = insertBits(result, destHiIndex, destLoIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mavg(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msad(MediaOp):
+ code = '''
+ int srcBits = srcSize * 8;
+ int items = sizeof(FloatRegBits) / srcSize;
+
+ uint64_t sum = 0;
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * srcBits - 1;
+ int loIndex = (i + 0) * srcBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t resBits = arg1Bits - arg2Bits;
+ if (resBits < 0)
+ resBits = -resBits;
+ sum += resBits;
+ }
+ FpDestReg.uqw = sum & mask(destSize * 8);
+ '''
+
+ class Msrl(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t shiftAmt = op2.uqw;
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+ if (shiftAmt >= sizeBits) {
+ resBits = 0;
+ } else {
+ resBits = (arg1Bits >> shiftAmt) &
+ mask(sizeBits - shiftAmt);
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msra(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t shiftAmt = op2.uqw;
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+ if (shiftAmt >= sizeBits) {
+ if (bits(arg1Bits, sizeBits - 1))
+ resBits = mask(sizeBits);
+ else
+ resBits = 0;
+ } else {
+ resBits = (arg1Bits >> shiftAmt);
+ resBits = resBits |
+ (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msll(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t shiftAmt = op2.uqw;
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+ if (shiftAmt >= sizeBits) {
+ resBits = 0;
+ } else {
+ resBits = (arg1Bits << shiftAmt);
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Cvtf2i(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Cvtf2i, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(destSize == 4 || destSize == 8);
+ assert(srcSize == 4 || srcSize == 8);
+ int srcSizeBits = srcSize * 8;
+ int destSizeBits = destSize * 8;
+ int items;
+ int srcStart = 0;
+ int destStart = 0;
+ if (srcSize == 2 * destSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+ if (ext & 0x2)
+ destStart = destSizeBits * items;
+ } else if (destSize == 2 * srcSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ if (ext & 0x2)
+ srcStart = srcSizeBits * items;
+ } else {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ }
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
+ int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ double arg;
+
+ if (srcSize == 4) {
+ floatInt fi;
+ fi.i = argBits;
+ arg = fi.f;
+ } else {
+ doubleInt di;
+ di.i = argBits;
+ arg = di.d;
+ }
+
+ if (ext & 0x4) {
+ if (arg >= 0)
+ arg += 0.5;
+ else
+ arg -= 0.5;
+ }
+
+ if (destSize == 4) {
+ argBits = (uint32_t)(float)arg;
+ } else {
+ argBits = (uint64_t)arg;
+ }
+ int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
+ int destLoIndex = destStart + (i + 0) * destSizeBits;
+ result = insertBits(result, destHiIndex, destLoIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Cvti2f(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Cvti2f, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(destSize == 4 || destSize == 8);
+ assert(srcSize == 4 || srcSize == 8);
+ int srcSizeBits = srcSize * 8;
+ int destSizeBits = destSize * 8;
+ int items;
+ int srcStart = 0;
+ int destStart = 0;
+ if (srcSize == 2 * destSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+ if (ext & 0x2)
+ destStart = destSizeBits * items;
+ } else if (destSize == 2 * srcSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ if (ext & 0x2)
+ srcStart = srcSizeBits * items;
+ } else {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ }
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
+ int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
+ double arg = sArg;
+
+ if (destSize == 4) {
+ floatInt fi;
+ fi.f = arg;
+ argBits = fi.i;
+ } else {
+ doubleInt di;
+ di.d = arg;
+ argBits = di.i;
+ }
+ int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
+ int destLoIndex = destStart + (i + 0) * destSizeBits;
+ result = insertBits(result, destHiIndex, destLoIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Cvtf2f(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Cvtf2f, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(destSize == 4 || destSize == 8);
+ assert(srcSize == 4 || srcSize == 8);
+ int srcSizeBits = srcSize * 8;
+ int destSizeBits = destSize * 8;
+ int items;
+ int srcStart = 0;
+ int destStart = 0;
+ if (srcSize == 2 * destSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+ if (ext & 0x2)
+ destStart = destSizeBits * items;
+ } else if (destSize == 2 * srcSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ if (ext & 0x2)
+ srcStart = srcSizeBits * items;
+ } else {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ }
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
+ int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ double arg;
+
+ if (srcSize == 4) {
+ floatInt fi;
+ fi.i = argBits;
+ arg = fi.f;
+ } else {
+ doubleInt di;
+ di.i = argBits;
+ arg = di.d;
+ }
+ if (destSize == 4) {
+ floatInt fi;
+ fi.f = arg;
+ argBits = fi.i;
+ } else {
+ doubleInt di;
+ di.d = arg;
+ argBits = di.i;
+ }
+ int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
+ int destLoIndex = destStart + (i + 0) * destSizeBits;
+ result = insertBits(result, destHiIndex, destLoIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mcmpi2r(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (1 << (sizeBits - 1))));
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (1 << (sizeBits - 1))));
+
+ uint64_t resBits = 0;
+ if (((ext & 0x2) == 0 && arg1 == arg2) ||
+ ((ext & 0x2) == 0x2 && arg1 > arg2))
+ resBits = mask(sizeBits);
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mcmpf2r(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ double arg1, arg2;
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ uint64_t resBits = 0;
+ bool nanop = isnan(arg1) || isnan(arg2);
+ switch (ext & mask(3)) {
+ case 0:
+ if (arg1 == arg2 && !nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 1:
+ if (arg1 < arg2 && !nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 2:
+ if (arg1 <= arg2 && !nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 3:
+ if (nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 4:
+ if (arg1 != arg2 || nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 5:
+ if (!(arg1 < arg2) || nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 6:
+ if (!(arg1 <= arg2) || nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 7:
+ if (!nanop)
+ resBits = mask(sizeBits);
+ break;
+ };
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mcmpf2rf(MediaOp):
+ def __init__(self, src1, src2,\
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
+ src2, size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ assert(srcSize == 4 || srcSize == 8);
+ int size = srcSize;
+ int sizeBits = size * 8;
+
+ double arg1, arg2;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ // ZF PF CF
+ // Unordered 1 1 1
+ // Greater than 0 0 0
+ // Less than 0 0 1
+ // Equal 1 0 0
+ // OF = SF = AF = 0
+ ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
+ ZFBit | PFBit | CFBit);
+ if (isnan(arg1) || isnan(arg2))
+ ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
+ else if(arg1 < arg2)
+ ccFlagBits = ccFlagBits | CFBit;
+ else if(arg1 == arg2)
+ ccFlagBits = ccFlagBits | ZFBit;
+ '''
+}};
diff --git a/src/arch/x86/isa/microops/microops.isa b/src/arch/x86/isa/microops/microops.isa
index 19266f6d6..a9cdffe0a 100644
--- a/src/arch/x86/isa/microops/microops.isa
+++ b/src/arch/x86/isa/microops/microops.isa
@@ -68,6 +68,9 @@
//Load/store microop definitions
##include "ldstop.isa"
+//Media microop definitions
+##include "mediaop.isa"
+
//Control flow microop definitions
##include "seqop.isa"
diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa
index b74363470..8d65111b0 100644
--- a/src/arch/x86/isa/specialize.isa
+++ b/src/arch/x86/isa/specialize.isa
@@ -139,7 +139,7 @@ let {{
opType = OpType(opTypes[0])
opTypes.pop(0)
- if opType.tag not in ("I", "J"):
+ if opType.tag not in ("I", "J", "P", "PR", "Q", "V", "VR", "W"):
if opType.size:
env.setSize(opType.size)
@@ -190,7 +190,12 @@ let {{
env.addReg(ModRMRegIndex)
env.addToDisassembly(
"printReg(out, %s, regSize);\n" % ModRMRegIndex)
- Name += "_R"
+ if opType.tag == "P":
+ Name += "_MMX"
+ elif opType.tag == "V":
+ Name += "_XMM"
+ else:
+ Name += "_R"
elif opType.tag in ("E", "Q", "W"):
# This might refer to memory or to a register. We need to
# divide it up farther.
@@ -202,9 +207,16 @@ let {{
# modrm addressing.
memEnv = copy.copy(env)
memEnv.doModRM = True
+ regSuffix = "_R"
+ if opType.tag == "Q":
+ regSuffix = "_MMX"
+ elif opType.tag == "W":
+ regSuffix = "_XMM"
return doSplitDecode("MODRM_MOD",
- {"3" : (specializeInst, Name + "_R", copy.copy(opTypes), regEnv)},
- (doRipRelativeDecode, Name, copy.copy(opTypes), memEnv))
+ {"3" : (specializeInst, Name + regSuffix,
+ copy.copy(opTypes), regEnv)},
+ (doRipRelativeDecode, Name,
+ copy.copy(opTypes), memEnv))
elif opType.tag in ("I", "J"):
# Immediates
env.addToDisassembly(
@@ -218,7 +230,12 @@ let {{
env.addReg(ModRMRMIndex)
env.addToDisassembly(
"printReg(out, %s, regSize);\n" % ModRMRMIndex)
- Name += "_R"
+ if opType.tag == "PR":
+ Name += "_MMX"
+ elif opType.tag == "VR":
+ Name += "_XMM"
+ else:
+ Name += "_R"
elif opType.tag in ("X", "Y"):
# This type of memory addressing is for string instructions.
# They'll use the right index and segment internally.
diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc
index f537f92af..026b733bd 100644
--- a/src/arch/x86/predecoder.cc
+++ b/src/arch/x86/predecoder.cc
@@ -195,7 +195,7 @@ namespace X86ISA
State nextState = ErrorState;
emi.opcode.num++;
//We can't handle 3+ byte opcodes right now
- assert(emi.opcode.num < 3);
+ assert(emi.opcode.num < 4);
consumeByte();
if(emi.opcode.num == 1 && nextByte == 0x0f)
{
@@ -203,11 +203,8 @@ namespace X86ISA
DPRINTF(Predecoder, "Found two byte opcode.\n");
emi.opcode.prefixA = nextByte;
}
- else if(emi.opcode.num == 2 &&
- (nextByte == 0x0f ||
- (nextByte & 0xf8) == 0x38))
+ else if(emi.opcode.num == 2 && (nextByte == 0x38 || nextByte == 0x3F))
{
- panic("Three byte opcodes aren't yet supported!\n");
nextState = OpcodeState;
DPRINTF(Predecoder, "Found three byte opcode.\n");
emi.opcode.prefixB = nextByte;
diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc
index 3434ebd73..4082e568c 100644
--- a/src/arch/x86/process.cc
+++ b/src/arch/x86/process.cc
@@ -270,6 +270,8 @@ X86_64LiveProcess::startup()
// setting it to one.
cr0.pe = 1; // We're definitely in protected mode.
tc->setMiscReg(MISCREG_CR0, cr0);
+
+ tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
}
}
@@ -390,6 +392,8 @@ I386LiveProcess::startup()
// setting it to one.
cr0.pe = 1; // We're definitely in protected mode.
tc->setMiscReg(MISCREG_CR0, cr0);
+
+ tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
}
}
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index 956ec3216..6fd36b487 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -156,8 +156,10 @@ namespace X86ISA
uint8_t num;
//The first byte detected in a 2+ byte opcode. Should be 0xF0.
uint8_t prefixA;
- //The second byte detected in a 3+ byte opcode. Could be 0xF0 for
- //3dnow instructions, or 0x38-0x3F for some SSE instructions.
+ //The second byte detected in a 3+ byte opcode. Could be 0x38-0x3F
+ //for some SSE instructions. 3dNow! instructions are handled as
+ //two byte opcodes and then split out further by the immediate
+ //byte.
uint8_t prefixB;
//The main opcode byte. The highest addressed byte in the opcode.
Opcode op;