diff options
Diffstat (limited to 'src/arch')
93 files changed, 6463 insertions, 1547 deletions
diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript index 3d1f6f8e3..97868986f 100644 --- a/src/arch/x86/SConscript +++ b/src/arch/x86/SConscript @@ -91,6 +91,7 @@ if env['TARGET_ISA'] == 'x86': Source('faults.cc') Source('insts/microfpop.cc') Source('insts/microldstop.cc') + Source('insts/micromediaop.cc') Source('insts/microop.cc') Source('insts/microregop.cc') Source('insts/static_inst.cc') diff --git a/src/arch/x86/insts/microldstop.hh b/src/arch/x86/insts/microldstop.hh index 309a2e6b7..048535a27 100644 --- a/src/arch/x86/insts/microldstop.hh +++ b/src/arch/x86/insts/microldstop.hh @@ -64,8 +64,8 @@ namespace X86ISA { - static const Request::FlagsType SegmentFlagMask = mask(4); - static const int FlagShift = 4; + const Request::FlagsType SegmentFlagMask = mask(4); + const int FlagShift = 4; enum FlagBit { CPL0FlagBit = 1, AddrSizeFlagBit = 2, diff --git a/src/arch/x86/insts/micromediaop.cc b/src/arch/x86/insts/micromediaop.cc new file mode 100644 index 000000000..07ae360ee --- /dev/null +++ b/src/arch/x86/insts/micromediaop.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2009 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/x86/insts/micromediaop.hh" +#include "arch/x86/miscregs.hh" +#include <string> + +namespace X86ISA +{ + std::string MediaOpReg::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, instMnem, mnemonic); + printDestReg(response, 0, destSize); + response << ", "; + printSrcReg(response, 0, srcSize); + response << ", "; + printSrcReg(response, 1, srcSize); + return response.str(); + } + + std::string MediaOpImm::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { + std::stringstream response; + + printMnemonic(response, instMnem, mnemonic); + printDestReg(response, 0, destSize); + response << ", "; + printSrcReg(response, 0, srcSize); + ccprintf(response, ", %#x", imm8); + return response.str(); + } +} diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh new file mode 100644 index 000000000..508ef4e26 --- /dev/null +++ b/src/arch/x86/insts/micromediaop.hh @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2009 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_X86_INSTS_MICROMEDIAOP_HH__ +#define __ARCH_X86_INSTS_MICROMEDIAOP_HH__ + +#include "arch/x86/insts/microop.hh" + +namespace X86ISA +{ + class MediaOpBase : public X86MicroopBase + { + protected: + const RegIndex src1; + const RegIndex dest; + const uint8_t srcSize; + const uint8_t destSize; + const uint8_t ext; + static const RegIndex foldOBit = 0; + + // Constructor + MediaOpBase(ExtMachInst _machInst, + const char *mnem, const char *_instMnem, + bool isMicro, bool isDelayed, + bool isFirst, bool isLast, + InstRegIndex _src1, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint8_t _ext, + OpClass __opClass) : + X86MicroopBase(_machInst, mnem, _instMnem, + isMicro, isDelayed, isFirst, isLast, + __opClass), + src1(_src1.idx), dest(_dest.idx), + srcSize(_srcSize), destSize(_destSize), ext(_ext) + {} + }; + + class MediaOpReg : public MediaOpBase + { + protected: + const RegIndex src2; + + // Constructor + MediaOpReg(ExtMachInst _machInst, + const char *mnem, const char *_instMnem, + bool isMicro, bool isDelayed, + bool isFirst, bool isLast, + InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint8_t _ext, + OpClass __opClass) : + MediaOpBase(_machInst, mnem, _instMnem, + isMicro, isDelayed, isFirst, isLast, + _src1, _dest, _srcSize, _destSize, _ext, + __opClass), + src2(_src2.idx) + {} + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + }; + + class MediaOpImm : public MediaOpBase + { + protected: + uint8_t imm8; + + // Constructor + MediaOpImm(ExtMachInst _machInst, + const char *mnem, const char *_instMnem, + bool isMicro, bool isDelayed, + bool isFirst, bool isLast, + InstRegIndex _src1, uint8_t _imm8, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint8_t _ext, + OpClass __opClass) : + MediaOpBase(_machInst, mnem, _instMnem, + isMicro, isDelayed, isFirst, isLast, + _src1, _dest, _srcSize, _destSize, _ext, + __opClass), + imm8(_imm8) + {} + + std::string generateDisassembly(Addr pc, + const SymbolTable *symtab) const; + }; +} + +#endif //__ARCH_X86_INSTS_MICROMEDIAOP_HH__ diff --git a/src/arch/x86/isa/decoder/decoder.isa b/src/arch/x86/isa/decoder/decoder.isa index dcf6ce089..f757abef9 100644 --- a/src/arch/x86/isa/decoder/decoder.isa +++ b/src/arch/x86/isa/decoder/decoder.isa @@ -70,25 +70,7 @@ decode LEGACY_LOCK default Unknown::unknown() //2 byte opcodes ##include "two_byte_opcodes.isa" //3 byte opcodes - 0x3: decode OPCODE_PREFIXA { - 0xF0: decode OPCODE_PREFIXB { - //We don't handle these properly in the predecoder yet, so - //there's no reason to implement them for now. - 0x38: decode OPCODE_OP { - default: FailUnimpl::sseThreeEight(); - } - 0x3A: decode OPCODE_OP { - default: FailUnimpl::sseThreeA(); - } - 0xF0: decode OPCODE_OP { - default: FailUnimpl::threednow(); - } - default: M5InternalError::error( - {{"Unexpected second opcode byte in three byte opcode!"}}); - } - default: M5InternalError::error( - {{"Unexpected first opcode byte in three byte opcode!"}}); - } + ##include "three_byte_opcodes.isa" } //Lock prefix ##include "locked_opcodes.isa" diff --git a/src/arch/x86/isa/decoder/locked_opcodes.isa b/src/arch/x86/isa/decoder/locked_opcodes.isa index 14d5e58a3..e776d1320 100644 --- a/src/arch/x86/isa/decoder/locked_opcodes.isa +++ b/src/arch/x86/isa/decoder/locked_opcodes.isa @@ -139,6 +139,10 @@ } 0x2: decode OPCODE_PREFIXA { 0x0F: decode OPCODE_OP_TOP5 { + 0x04: decode OPCODE_OP_BOTTOM3 { + 0x0: WarnUnimpl::mov_Rd_CR8D(); + 0x2: WarnUnimpl::mov_CR8D_Rd(); + } 0x15: decode OPCODE_OP_BOTTOM3 { 0x3: BTS_LOCKED(Mv,Gv); } diff --git a/src/arch/x86/isa/decoder/three_byte_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_opcodes.isa new file mode 100644 index 000000000..7587e3dad --- /dev/null +++ b/src/arch/x86/isa/decoder/three_byte_opcodes.isa @@ -0,0 +1,151 @@ +// Copyright (c) 2008 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// Decode the three byte opcodes +// +0x3: decode OPCODE_PREFIXA { + 0x0F: decode OPCODE_PREFIXB { + 0x38: decode LEGACY_OP { + format WarnUnimpl { + 1: decode OPCODE_OP { + 0x00: pshufb_Vdq_Wdq(); + 0x01: phaddw_Vdq_Wdq(); + 0x02: phaddd_Vdq_Wdq(); + 0x03: phaddsw_Vdq_Wdq(); + 0x04: pmaddubsw_Vdq_Wdq(); + 0x05: phsubw_Vdq_Wdq(); + 0x06: phsubd_Vdq_Wdq(); + 0x07: phsubsw_Vdq_Wdq(); + 0x08: psignb_Vdq_Wdq(); + 0x09: psignw_Vdq_Wdq(); + 0x0A: psignd_Vdq_Wdq(); + 0x0B: pmulhrsw_Vdq_Wdq(); + 0x10: pblendvb_Vdq_Wdq(); + 0x14: blendvps_Vdq_Wdq(); + 0x15: blendvpd_Vdq_Wdq(); + 0x17: ptest_Vdq_Wdq(); + 0x1C: pabsb_Vdq_Wdq(); + 0x1D: pabsw_Vdq_Wdq(); + 0x1E: pabsd_Vdq_Wdq(); + 0x20: pmovsxbw_Vdq_Udq_or_Mq(); + 0x21: pmovsxbd_Vdq_Udq_or_Md(); + 0x22: pmovsxbq_Vdq_Udq_or_Mw(); + 0x23: pmovsxwd_Vdq_Udq_or_Mq(); + 0x24: pmovsxwq_Vdq_Udq_or_Md(); + 0x25: pmovsxdq_Vdq_Udq_or_Mq(); + 0x28: pmuldq_Vdq_Wdq(); + 0x29: pcmpeqq_Vdq_Wdq(); + 0x2A: movntdqa_Vdq_Mdq(); + 0x2B: packusdw_Vdq_Wdq(); + 0x30: pmovzxbw_Vdq_Udq_or_Mq(); + 0x31: pmovzxbd_Vdq_Udq_or_Md(); + 0x32: pmovzxbq_Vdq_Udq_or_Mw(); + 0x33: pmovzxwd_Vdq_Udq_or_Mq(); + 0x34: pmovzxwq_Vdq_Udq_or_Md(); + 0x35: pmovzxdq_Vdq_Udq_or_Mq(); + 0x37: pcmpgtq_Vdq_Wdq(); + 0x38: pminsb_Vdq_Wdq(); + 0x39: pminsd_Vdq_Wdq(); + 0x3A: pminuw_Vdq_Wdq(); + 0x3B: pminud_Vdq_Wdq(); + 0x3C: pmaxsb_Vdq_Wdq(); + 0x3D: pmaxsd_Vdq_Wdq(); + 0x3E: pmaxuw_Vdq_Wdq(); + 0x3F: pmaxud_Vdq_Wdq(); + 0x40: pmulld_Vdq_Wdq(); + 0x41: phminposuw_Vdq_Wdq(); + default: Inst::UD2(); + } + default: decode LEGACY_REPNE { + 1: decode OPCODE_OP { + 0xF0: crc32_Gd_Eb(); + 0xF1: crc32_Gd_Ev(); + default: Inst::UD2(); + } + default: decode OPCODE_OP { + 0x00: pshufb_Pq_Qq(); + 0x01: phaddw_Pq_Qq(); + 0x02: phaddd_Pq_Qq(); + 0x03: phaddsw_Pq_Qq(); + 0x04: pmaddubsw_Pq_Qq(); + 0x05: phsubw_Pq_Qq(); + 0x06: phsubd_Pq_Qq(); + 0x07: phsubsw_Pq_Qq(); + 0x08: psignb_Pq_Qq(); + 0x09: psignw_Pq_Qq(); + 0x0A: psignd_Pq_Qq(); + 0x0B: pmulhrsw_Pq_Qq(); + 0x1C: pabsb_Pq_Qq(); + 0x1D: pabsw_Pq_Qq(); + 0x1E: pabsd_Pq_Qq(); + default: Inst::UD2(); + } + } + } + } + 0x3A: decode LEGACY_OP { + format WarnUnimpl { + 1: decode OPCODE_OP { + 0x08: roundps_Vdq_Wdq_Ib(); + 0x09: roundpd_Vdq_Wdq_Ib(); + 0x0A: roundss_Vss_Wss_Ib(); + 0x0B: roundsd_Vsd_Wsd_Ib(); + 0x0C: blendps_Vdq_Wdq_Ib(); + 0x0D: blendpd_Vdq_Wdq_Ib(); + 0x0E: pblendw_Vdq_Wdq_Ib(); + 0x0F: palignr_Vdq_Wdq_Ib(); + 0x14: pextrb_Rd_or_Mb_Vdq_Ib(); + 0x15: decode MODRM_MOD { + 0x3: Inst::PEXTRW(Rd,Vdq,Ib); + default: pextrw_Mw_Vdq_Ib(); + } + 0x16: pextrd_pextrq_Ed_or_Eq_Vdq_Ib(); + 0x17: extractps_Ed_Vdq_Ib(); + 0x20: pinsrb_Vdq_Rd_or_Rq_or_Mb_Ib(); + 0x21: insertps_Vdq_Udq_or_Md_Ib(); + 0x22: pinsrd_pinsrq_Vdq_Ed_or_Eq_Ib(); + 0x40: dpps_Vdq_Wdq_Ib(); + 0x41: dppd_Vdq_Wdq_Ib(); + 0x42: pcmpistrm_Vdq_Wdq_Ib(); + 0x43: pcmpistri_Vdq_Wdq_Ib(); + default: Inst::UD2(); + } + default: decode OPCODE_OP { + 0x0F: palignr_Pq_Qq_Ib(); + default: Inst::UD2(); + } + } + } + default: M5InternalError::error( + {{"Unexpected second opcode byte in three byte opcode!"}}); + } + default: M5InternalError::error( + {{"Unexpected first opcode byte in three byte opcode!"}}); +} diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 55056da81..c23eeccab 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -101,43 +101,31 @@ default: Inst::UD2(); } //0x01: group7(); // Ugly, ugly, ugly... - 0x01: decode MODRM_MOD { - 0x3: decode MODRM_REG { - 0x0: decode MODRM_RM { + 0x01: decode MODRM_REG { + 0x0: decode MODRM_MOD { + 0x3: decode MODRM_RM { 0x1: vmcall(); 0x2: vmlaunch(); 0x3: vmresume(); 0x4: vmxoff(); default: Inst::UD2(); } - 0x1: decode MODRM_RM { + default: sgdt_Ms(); + } + 0x1: decode MODRM_MOD { + 0x3: decode MODRM_RM { 0x0: monitor(); 0x1: mwait(); default: Inst::UD2(); } + default: sidt_Ms(); + } + 0x2: decode MODRM_MOD { 0x3: decode MODRM_RM { - 0x0: vmrun(); - 0x1: vmmcall(); - 0x2: vmload(); - 0x3: vmsave(); - 0x4: stgi(); - 0x5: clgi(); - 0x6: skinit(); - 0x7: invlpga(); - } - 0x4: Inst::SMSW(Rv); - 0x6: Inst::LMSW(Rv); - 0x7: decode MODRM_RM { - 0x0: Inst::SWAPGS(); - 0x1: rdtscp(); - default: Inst::UD2(); + 0x0: xgetbv(); + 0x1: xsetbv(); } - default: Inst::UD2(); - } - default: decode MODRM_REG { - 0x0: sgdt_Ms(); - 0x1: sidt_Ms(); - 0x2: decode MODE_SUBMODE { + default: decode MODE_SUBMODE { 0x0: Inst::LGDT(M); default: decode OPSIZE { // 16 bit operand sizes are special, but only @@ -146,7 +134,19 @@ default: Inst::LGDT(M); } } - 0x3: decode MODE_SUBMODE { + } + 0x3: decode MODRM_MOD { + 0x3: decode MODRM_RM { + 0x0: vmrun(); + 0x1: vmmcall(); + 0x2: vmload(); + 0x3: vmsave(); + 0x4: stgi(); + 0x5: clgi(); + 0x6: skinit(); + 0x7: invlpga(); + } + default: decode MODE_SUBMODE { 0x0: Inst::LIDT(M); default: decode OPSIZE { // 16 bit operand sizes are special, but only @@ -155,10 +155,19 @@ default: Inst::LIDT(M); } } - 0x4: Inst::SMSW(Mw); - 0x6: Inst::LMSW(Mw); - 0x7: Inst::INVLPG(M); - default: Inst::UD2(); + } + 0x4: decode MODRM_MOD { + 0x3: Inst::SMSW(Rv); + default: Inst::SMSW(Mw); + } + 0x6: Inst::LMSW(Ew); + 0x7: decode MODRM_MOD { + 0x3: decode MODRM_RM { + 0x0: Inst::SWAPGS(); + 0x1: rdtscp(); + default: Inst::UD2(); + } + default: Inst::INVLPG(M); } } 0x02: lar_Gv_Ew(); @@ -283,137 +292,152 @@ 0x4: Inst::UD2(); 0x5: Inst::PREFETCH(Mb); 0x6: FailUnimpl::femms(); - 0x7: FailUnimpl::threednow(); - } - 0x02: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: movups_Vo_Wo(); - 0x1: movups_Wo_Vo(); - 0x2: decode MODRM_MOD { - 0x3: movhlps_Vq_VRq(); - default: movlps_Vq_Mq(); - } - 0x3: movlps_Mq_Vq(); - 0x4: unpcklps(); - 0x5: unpckhps(); - 0x6: decode MODRM_MOD { - 0x3: movlhps_Vq_VRq(); - default: movhps_Vq_Mq(); - } - 0x7: movhps_Mq_Vq(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x0: movss_Vd_Wd(); - 0x1: movss_Wd_Vd(); - 0x2: movsldup_Vo_Wo(); - 0x6: movshdup_Vo_Wo(); - default: Inst::UD2(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: movupd_Vo_Wo(); - 0x1: movupd_Wo_Vo(); - 0x2: Inst::MOVLPD(Vq,Mq); - 0x3: Inst::MOVLPD(Mq,Vq); - 0x4: unpcklpd_Vo_Wq(); - 0x5: unpckhpd_Vo_Wo(); - 0x6: movhpd_Vq_Mq(); - 0x7: movhpd_Mq_Vq(); - } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x0: Inst::MOVSD(Vq,Wq); - 0x1: Inst::MOVSD(Wq,Vq); - 0x2: movddup_Vo_Wq(); - default: Inst::UD2(); - } - default: Inst::UD2(); - } - 0x03: decode OPCODE_OP_BOTTOM3 { - //group17(); - 0x0: decode MODRM_REG { - 0x0: prefetch_nta(); - 0x1: Inst::PREFETCH_T0(Mb); - 0x2: prefetch_t1(); - 0x3: prefetch_t2(); - default: Inst::HINT_NOP(); - } - 0x1: Inst::HINT_NOP(); - 0x2: Inst::HINT_NOP(); - 0x3: Inst::HINT_NOP(); - 0x4: Inst::HINT_NOP(); - 0x5: Inst::HINT_NOP(); - 0x6: Inst::HINT_NOP(); - 0x7: Inst::HINT_NOP(); - } - 0x04: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: Inst::MOV(Rd,Cd); - 0x1: Inst::MOV(Rd,Dd); - 0x2: Inst::MOV(Cd,Rd); - 0x3: Inst::MOV(Dd,Rd); - 0x4: mov_Rd_Td(); - 0x6: mov_Td_Rd(); + 0x7: decode IMMEDIATE { + 0x0C: pi2fw_Pq_Qq(); + 0x0D: pi2fd_Pq_Qq(); + 0x1C: pf2iw_Pq_Qq(); + 0x1D: pf2id_Pq_Qq(); + 0x8A: pfnacc_Pq_Qq(); + 0x8E: pfpnacc_Pq_Qq(); + 0x90: pfcmpge_Pq_Qq(); + 0x94: pfmin_Pq_Qq(); + 0x96: pfrcp_Pq_Qq(); + 0x97: pfrsqrt_Pq_Qq(); + 0x9A: Inst::PFSUB(Pq,Qq); + 0x9E: pfadd_Pq_Qq(); + 0xA0: pfcmpgt_Pq_Qq(); + 0xA4: pfmax_Pq_Qq(); + 0xA6: pfrcpit1_Pq_Qq(); + 0xA7: pfrsqit1_Pq_Qq(); + 0xAA: Inst::PFSUBR(Pq,Qq); + 0xAE: pfacc_Pq_Qq(); + 0xB0: pfcmpeq_Pq_Qq(); + 0xB4: Inst::PFMUL(Pq,Qq); + 0xB6: pfrcpit2_Pq_Qq(); + 0xB7: Inst::PMULHRW(Pq,Qq); + 0xBB: pswapd_Pq_Qq(); + 0xBF: pavgusb_Pq_Qq(); default: Inst::UD2(); } - // lock prefix (0xF0) - 0x2: decode OPCODE_OP_BOTTOM3 { - 0x0: mov_Rd_CR8D(); - 0x2: mov_CR8D_Rd(); - } - default: Inst::UD2(); } - 0x05: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - //These moves should really use size o (octword), but - //because they are split in two, they use q (quadword). - 0x0: Inst::MOVAPS(Vq,Wq); - 0x1: Inst::MOVAPS(Wq,Vq); - 0x2: decode MODRM_MOD { - 0x3: cvtpi2pS_Vq_Pq(); - default: cvtpi2ps_Vq_Mq(); + format Inst{ + 0x02: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVUPS(Vo,Wo); + 0x1: MOVUPS(Wo,Vo); + 0x2: decode MODRM_MOD { + 0x3: MOVHLPS(Vps,VRq); + default: MOVLPS(Vps,Mq); + } + 0x3: MOVLPS(Mq,Vps); + 0x4: UNPCKLPS(Vps,Wq); + 0x5: UNPCKHPS(Vps,Wq); + 0x6: decode MODRM_MOD { + 0x3: MOVLHPS(Vps,VRq); + default: MOVHPS(Vps,Mq); + } + 0x7: MOVHPS(Mq,Vq); } - 0x3: movntps_Mo_Vo(); - 0x4: cvttps2pi_Pq_Wq(); - 0x5: cvtpS2pi_Pq_Wq(); - 0x6: ucomiss_Vd_Wd(); - 0x7: comiss_Vd_Wd(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x2: cvtsi2ss_Vd_Ed(); - 0x4: cvttss2si_Gd_Wd(); - 0x5: cvtss2si_Gd_Wd(); - default: Inst::UD2(); + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVSS(Vd,Wd); + 0x1: MOVSS(Wd,Vd); + 0x2: WarnUnimpl::movsldup_Vo_Wo(); + 0x6: WarnUnimpl::movshdup_Vo_Wo(); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVUPD(Vo,Wo); + 0x1: MOVUPD(Wo,Vo); + 0x2: MOVLPD(Vq,Mq); + 0x3: MOVLPD(Mq,Vq); + 0x4: UNPCKLPD(Vo,Wq); + 0x5: UNPCKHPD(Vo,Wo); + 0x6: MOVHPD(Vq,Mq); + 0x7: MOVHPD(Mq,Vq); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVSD(Vq,Wq); + 0x1: MOVSD(Wq,Vq); + 0x2: MOVDDUP(Vo,Wq); + default: UD2(); + } + default: UD2(); } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: movapd_Vo_Wo(); - 0x1: movapd_Wo_Vo(); - 0x2: decode MODRM_MOD { - 0x3: cvtpi2pd_Vo_Pq(); - default: cvtpi2pd_Vo_Mq(); + 0x03: decode OPCODE_OP_BOTTOM3 { + //group16(); + 0x0: decode MODRM_REG { + 0x0: WarnUnimpl::prefetch_nta(); + 0x1: PREFETCH_T0(Mb); + 0x2: WarnUnimpl::prefetch_t1(); + 0x3: WarnUnimpl::prefetch_t2(); + default: HINT_NOP(); + } + 0x1: HINT_NOP(); + 0x2: HINT_NOP(); + 0x3: HINT_NOP(); + 0x4: HINT_NOP(); + 0x5: HINT_NOP(); + 0x6: HINT_NOP(); + 0x7: HINT_NOP(); + } + 0x04: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: MOV(Rd,Cd); + 0x1: MOV(Rd,Dd); + 0x2: MOV(Cd,Rd); + 0x3: MOV(Dd,Rd); + default: UD2(); } - 0x3: movntpd_Mo_Vo(); - 0x4: cvttpd2pi_Pq_Wo(); - 0x5: cvtpd2pi_Pq_Wo(); - 0x6: Inst::UCOMISD(Vq,Wq); - 0x7: comisd_Vq_Wq(); + default: UD2(); } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - // The size of the V operand should be q, not dp - 0x2: Inst::CVTSI2SD(Vdp,Edp); - // The size of the W operand should be q, not dp - 0x4: Inst::CVTTSD2SI(Gdp,Wdp); - 0x5: cvtsd2si_Gd_Wq(); - default: Inst::UD2(); + 0x05: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + //These moves should really use size o (octword), but + //because they are split in two, they use q (quadword). + 0x0: MOVAPS(Vq,Wq); + 0x1: MOVAPS(Wq,Vq); + 0x2: CVTPI2PS(Vq,Qq); + 0x3: WarnUnimpl::movntps_Mo_Vo(); + 0x4: CVTTPS2PI(Pq,Wq); + 0x5: CVTPS2PI(Pq,Wq); + 0x6: UCOMISS(Vd,Wd); + 0x7: COMISS(Vd,Wd); + } + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x2: CVTSI2SS(Vd,Ed); + 0x4: CVTTSS2SI(Gd,Wd); + 0x5: CVTSS2SI(Gd,Wd); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVAPD(Vo,Wo); + 0x1: MOVAPD(Wo,Vo); + 0x2: CVTPI2PD(Vo,Qq); + 0x3: WarnUnimpl::movntpd_Mo_Vo(); + 0x4: CVTTPD2PI(Pq,Wo); + 0x5: CVTPD2PI(Pq,Wo); + 0x6: UCOMISD(Vq,Wq); + 0x7: COMISD(Vq,Wq); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + // The size of the V operand should be q, not dp + 0x2: CVTSI2SD(Vdp,Edp); + // The size of the W operand should be q, not dp + 0x4: CVTTSD2SI(Gdp,Wdp); + 0x5: CVTSD2SI(Gd,Wq); + default: UD2(); + } + default: UD2(); } - default: Inst::UD2(); } 0x06: decode OPCODE_OP_BOTTOM3 { 0x0: Inst::WRMSR(); @@ -430,14 +454,13 @@ 0x7: getsec(); } 0x07: decode OPCODE_OP_BOTTOM3 { - 0x0: three_byte_opcode(); - 0x1: three_byte_opcode(); - 0x2: three_byte_opcode(); - 0x3: three_byte_opcode(); - 0x4: three_byte_opcode(); - 0x5: three_byte_opcode(); - 0x6: three_byte_opcode(); - 0x7: three_byte_opcode(); + 0x0: M5InternalError::error( + {{"Three byte opcode shouldn't be handled by " + "two_byte_opcodes.isa!"}}); + 0x2: M5InternalError::error( + {{"Three byte opcode shouldn't be handled by " + "two_byte_opcodes.isa!"}}); + default: UD2(); } format Inst { 0x08: decode OPCODE_OP_BOTTOM3 { @@ -460,39 +483,35 @@ 0x6: CMOVLE(Gv,Ev); 0x7: CMOVNLE(Gv,Ev); } - } - 0x0A: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: movmskps_Gd_VRo(); - 0x1: sqrtps_Vo_Wo(); - 0x2: rqsrtps_Vo_Wo(); - 0x3: rcpps_Vo_Wo(); - 0x4: andps_Vo_Wo(); - 0x5: andnps_Vo_Wo(); - 0x6: orps_Vo_Wo(); - 0x7: xorps_Vo_Wo(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x1: sqrtss_Vd_Wd(); - 0x2: rsqrtss_Vd_Wd(); - 0x3: rcpss_Vd_Wd(); - default: Inst::UD2(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: movmskpd_Gd_VRo(); - 0x1: sqrtpd_Vo_Wo(); - 0x4: andpd_Vo_Wo(); - 0x5: andnpd_Vo_Wo(); - 0x6: orpd_Vo_Wo(); - //This really should be type o, but it works on q sized - //chunks at a time. - 0x7: Inst::XORPD(Vq,Wq); - default: Inst::UD2(); - } - format Inst { + 0x0A: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVMSKPS(Gd,VRo); + 0x1: SQRTPS(Vo,Wo); + 0x2: WarnUnimpl::rqsrtps_Vo_Wo(); + 0x3: WarnUnimpl::rcpps_Vo_Wo(); + 0x4: ANDPS(Vo,Wo); + 0x5: ANDNPS(Vo,Wo); + 0x6: ORPS(Vo,Wo); + 0x7: XORPS(Vo,Wo); + } + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x1: SQRTSS(Vd,Wd); + 0x2: WarnUnimpl::rsqrtss_Vd_Wd(); + 0x3: WarnUnimpl::rcpss_Vd_Wd(); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: MOVMSKPD(Gd,VRo); + 0x1: SQRTPD(Vo,Wo); + 0x4: ANDPD(Vo,Wo); + 0x5: ANDNPD(Vo,Wo); + 0x6: ORPD(Vo,Wo); + 0x7: XORPD(Vo,Wo); + default: UD2(); + } // repne (0xF2) 0x8: decode OPCODE_OP_BOTTOM3 { 0x1: SQRTSD(Vq,Wq); @@ -500,273 +519,208 @@ } default: UD2(); } - } - 0x0B: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: addps_Vo_Wo(); - 0x1: mulps_Vo_Wo(); - 0x2: cvtps2pd_Vo_Wq(); - 0x3: cvtdq2ps_Vo_Wo(); - 0x4: subps_Vo_Wo(); - 0x5: minps_Vo_Wo(); - 0x6: divps_Vo_Wo(); - 0x7: maxps_Vo_Wo(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x0: addss_Vd_Wd(); - 0x1: mulss_Vd_Wd(); - 0x2: cvtss2sd_Vq_Wd(); - 0x3: cvttps2dq_Vo_Wo(); - 0x4: subss_Vd_Wd(); - 0x5: minss_Vd_Wd(); - 0x6: divss_Vd_Wd(); - 0x7: maxss_Vd_Wd(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: addpd_Vo_Wo(); - 0x1: mulpd_Vo_Wo(); - 0x2: cvtpd2ps_Vo_Wo(); - 0x3: cvtps2dq_Vo_Wo(); - 0x4: subpd_Vo_Wo(); - 0x5: minpd_Vo_Wo(); - 0x6: divpd_Vo_Wo(); - 0x7: maxpd_Vo_Wo(); - } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x0: Inst::ADDSD(Vq,Wq); - 0x1: Inst::MULSD(Vq,Wq); - 0x2: cvtsd2ss_Vd_Wq(); - 0x4: Inst::SUBSD(Vq,Wq); - 0x5: minsd_Vq_Wq(); - 0x6: Inst::DIVSD(Vq,Wq); - 0x7: maxsd_Vq_Wq(); - default: Inst::UD2(); - } - default: Inst::UD2(); - } - 0x0C: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: punpcklbw_Pq_Qd(); - 0x1: punpcklwd_Pq_Qd(); - 0x2: punpckldq_Pq_Qd(); - 0x3: packsswb_Pq_Qq(); - 0x4: pcmpgtb_Pq_Qq(); - 0x5: pcmpgtw_Pq_Qq(); - 0x6: pcmpgtd_Pq_Qq(); - 0x7: packuswb_Pq_Qq(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: punpcklbw_Vo_Wq(); - 0x1: punpcklwd_Vo_Wq(); - 0x2: punpckldq_Vo_Wq(); - 0x3: packsswb_Vo_Wo(); - 0x4: pcmpgtb_Vo_Wo(); - 0x5: pcmpgtw_Vo_Wo(); - 0x6: pcmpgtd_Vo_Wo(); - 0x7: packuswb_Vo_Wo(); - } - default: Inst::UD2(); - } - 0x0D: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: punpckhbw_Pq_Qq(); - 0x1: punpckhwd_Pq_Qq(); - 0x2: punpckhdq_Pq_Qq(); - 0x3: packssdw_Pq_Qq(); - 0x6: movd_Pq_Ed(); - 0x7: movq_Pq_Qq(); - default: Inst::UD2(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x7: movdqu_Vo_Wo(); - default: Inst::UD2(); + 0x0B: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: ADDPS(Vo,Wo); + 0x1: MULPS(Vo,Wo); + 0x2: CVTPS2PD(Vo,Wq); + 0x3: CVTDQ2PS(Vo,Wo); + 0x4: SUBPS(Vo,Wo); + 0x5: MINPS(Vo,Wo); + 0x6: DIVPS(Vo,Wo); + 0x7: MAXPS(Vo,Wo); + } + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x0: ADDSS(Vd,Wd); + 0x1: MULSS(Vd,Wd); + 0x2: CVTSS2SD(Vq,Wd); + 0x3: CVTTPS2DQ(Vo,Wo); + 0x4: SUBSS(Vd,Wd); + 0x5: MINSS(Vd,Wd); + 0x6: DIVSS(Vd,Wd); + 0x7: MAXSS(Vd,Wd); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: ADDPD(Vo,Wo); + 0x1: MULPD(Vo,Wo); + 0x2: CVTPD2PS(Vo,Wo); + 0x3: CVTPS2DQ(Vo,Wo); + 0x4: SUBPD(Vo,Wo); + 0x5: MINPD(Vo,Wo); + 0x6: DIVPD(Vo,Wo); + 0x7: MAXPD(Vo,Wo); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x0: ADDSD(Vq,Wq); + 0x1: MULSD(Vq,Wq); + 0x2: CVTSD2SS(Vd,Wq); + 0x4: SUBSD(Vq,Wq); + 0x5: MINSD(Vq,Wq); + 0x6: DIVSD(Vq,Wq); + 0x7: MAXSD(Vq,Wq); + default: UD2(); + } + default: UD2(); } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: punpckhbw_Vo_Wo(); - 0x1: punpckhwd_Vo_Wo(); - 0x2: punpckhdq_Vo_Wo(); - 0x3: packssdw_Vo_Wo(); - 0x4: punpcklqdq_Vo_Wq(); - 0x5: punpcklqdq_Vo_Wq(); - 0x6: movd_Vo_Ed(); - 0x7: movdqa_Vo_Wo(); + 0x0C: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PUNPCKLBW(Pq,Qd); + 0x1: PUNPCKLWD(Pq,Qd); + 0x2: PUNPCKLDQ(Pq,Qd); + 0x3: PACKSSWB(Pq,Qq); + 0x4: PCMPGTB(Pq,Qq); + 0x5: PCMPGTW(Pq,Qq); + 0x6: PCMPGTD(Pq,Qq); + 0x7: PACKUSWB(Pq,Qq); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PUNPCKLBW(Vo,Wq); + 0x1: PUNPCKLWD(Vo,Wq); + 0x2: PUNPCKLDQ(Vo,Wq); + 0x3: PACKSSWB(Vo,Wo); + 0x4: PCMPGTB(Vo,Wo); + 0x5: PCMPGTW(Vo,Wo); + 0x6: PCMPGTD(Vo,Wo); + 0x7: PACKUSWB(Vo,Wo); + } + default: UD2(); } - default: Inst::UD2(); - } - 0x0E: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: pshufw_Pq_Qq_Ib(); - //0x1: group13_pshimw(); - 0x1: decode MODRM_REG { - 0x2: decode LEGACY_OP { - 0x0: psrlw_PRq_Ib(); - 0x1: psrlw_VRo_Ib(); - } - 0x4: decode LEGACY_OP { - 0x0: psraw_PRq_Ib(); - 0x1: psraw_VRo_Ib(); - } - 0x6: decode LEGACY_OP { - 0x0: psllw_PRq_Ib(); - 0x1: psllw_VRo_Ib(); - } - default: Inst::UD2(); + 0x0D: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PUNPCKHBW(Pq,Qq); + 0x1: PUNPCKHWD(Pq,Qq); + 0x2: PUNPCKHDQ(Pq,Qq); + 0x3: PACKSSDW(Pq,Qq); + 0x6: MOVD(Pq,Edp); + 0x7: MOVQ(Pq,Qq); + default: UD2(); } - //0x2: group14_pshimd(); - 0x2: decode MODRM_REG { - 0x2: decode LEGACY_OP { - 0x0: psrld_PRq_Ib(); - 0x1: psrld_VRo_Ib(); - } - 0x4: decode LEGACY_OP { - 0x0: psrad_PRq_Ib(); - 0x1: psrad_VRo_Ib(); - } - 0x6: decode LEGACY_OP { - 0x0: pslld_PRq_Ib(); - 0x1: pslld_VRo_Ib(); - } - default: Inst::UD2(); + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x7: WarnUnimpl::movdqu_Vo_Wo(); + default: UD2(); } - //0x3: group15_pshimq(); - 0x3: decode MODRM_REG { - 0x2: decode LEGACY_OP { - 0x0: psrlq_PRq_Ib(); - 0x1: psrlq_VRo_Ib(); - } - 0x3: decode LEGACY_OP { - 0x0: Inst::UD2(); - 0x1: psrldq_VRo_Ib(); - } - 0x6: decode LEGACY_OP { - 0x0: psllq_PRq_Ib(); - 0x1: psllq_VRo_Ib(); - } - 0x7: decode LEGACY_OP { - 0x0: Inst::UD2(); - 0x1: pslldq_VRo_Ib(); - } - default: Inst::UD2(); + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PUNPCKHBW(Vo,Wo); + 0x1: PUNPCKHWD(Vo,Wo); + 0x2: PUNPCKHDQ(Vo,Wo); + 0x3: PACKSSDW(Vo,Wo); + 0x4: PUNPCKLQDQ(Vo,Wq); + 0x5: PUNPCKHQDQ(Vo,Wq); + 0x6: WarnUnimpl::movd_Vo_Ed(); + 0x7: WarnUnimpl::movdqa_Vo_Wo(); } - 0x4: pcmpeqb_Pq_Qq(); - 0x5: pcmpeqw_Pq_Qq(); - 0x6: pcmpeqd_Pq_Qq(); - 0x7: emms(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x0: pshufhw_Vo_Wo_Ib(); - default: Inst::UD2(); + default: UD2(); } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: pshufd_Vo_Wo_Ib(); - //0x1: group13_pshimw(); - 0x1: decode MODRM_REG { - 0x2: decode LEGACY_OP { - 0x0: psrlw_PRq_Ib(); - 0x1: psrlw_VRo_Ib(); + 0x0E: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PSHUFW(Pq,Qq,Ib); + //0x1: group12_pshimw(); + 0x1: decode MODRM_REG { + 0x2: PSRLW(PRq,Ib); + 0x4: PSRAW(PRq,Ib); + 0x6: PSLLW(PRq,Ib); + default: UD2(); } - 0x4: decode LEGACY_OP { - 0x0: psraw_PRq_Ib(); - 0x1: psraw_VRo_Ib(); + //0x2: group13_pshimd(); + 0x2: decode MODRM_REG { + 0x2: PSRLD(PRq,Ib); + 0x4: PSRAD(PRq,Ib); + 0x6: PSLLD(PRq,Ib); + default: UD2(); } - 0x6: decode LEGACY_OP { - 0x0: psllw_PRq_Ib(); - 0x1: psllw_VRo_Ib(); + //0x3: group14_pshimq(); + 0x3: decode MODRM_REG { + 0x2: PSRLQ(PRq,Ib); + 0x6: PSLLQ(PRq,Ib); + default: Inst::UD2(); } - default: Inst::UD2(); + 0x4: Inst::PCMPEQB(Pq,Qq); + 0x5: Inst::PCMPEQW(Pq,Qq); + 0x6: Inst::PCMPEQD(Pq,Qq); + 0x7: WarnUnimpl::emms(); } - //0x2: group14_pshimd(); - 0x2: decode MODRM_REG { - 0x2: decode LEGACY_OP { - 0x0: psrld_PRq_Ib(); - 0x1: psrld_VRo_Ib(); - } - 0x4: decode LEGACY_OP { - 0x0: psrad_PRq_Ib(); - 0x1: psrad_VRo_Ib(); - } - 0x6: decode LEGACY_OP { - 0x0: pslld_PRq_Ib(); - 0x1: pslld_VRo_Ib(); - } - default: Inst::UD2(); + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x0: PSHUFHW(Vo,Wo,Ib); + default: UD2(); } - //0x3: group15_pshimq(); - 0x3: decode MODRM_REG { - 0x2: decode LEGACY_OP { - 0x0: psrlq_PRq_Ib(); - 0x1: psrlq_VRo_Ib(); + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PSHUFD(Vo,Wo,Ib); + //0x1: group12_pshimw(); + 0x1: decode MODRM_REG { + 0x2: PSRLW(VRo,Ib); + 0x4: PSRAW(VRo,Ib); + 0x6: PSLLW(VRo,Ib); } - 0x3: decode LEGACY_OP { - 0x0: Inst::UD2(); - 0x1: psrldq_VRo_Ib(); + //0x2: group13_pshimd(); + 0x2: decode MODRM_REG { + 0x2: PSRLD(VRo,Ib); + 0x4: PSRAD(VRo,Ib); + 0x6: PSLLD(VRo,Ib); + default: UD2(); } - 0x6: decode LEGACY_OP { - 0x0: psllq_PRq_Ib(); - 0x1: psllq_VRo_Ib(); + //0x3: group14_pshimq(); + 0x3: decode MODRM_REG { + 0x2: PSRLQ(VRo,Ib); + 0x3: WarnUnimpl::psrldq_VRo_Ib(); + 0x6: PSLLQ(VRo,Ib); + 0x7: WarnUnimpl::pslldq_VRo_Ib(); + default: UD2(); } - 0x7: decode LEGACY_OP { - 0x0: Inst::UD2(); - 0x1: pslldq_VRo_Ib(); - } - default: Inst::UD2(); + 0x4: PCMPEQB(Vo,Wo); + 0x5: PCMPEQW(Vo,Wo); + 0x6: PCMPEQD(Vo,Wo); + default: UD2(); } - 0x4: pcmpeqb_Vo_Wo(); - 0x5: pcmpeqw_Vo_Wo(); - 0x6: pcmpeqd_Vo_Wo(); - default: Inst::UD2(); - } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x0: pshuflw_Vo_Wo_Ib(); - default: Inst::UD2(); - } - default: Inst::UD2(); - } - 0x0F: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: vmread_Ed_or_Eq_Gd_or_Gq(); - 0x1: vmwrite_Gd_or_Gq_Ed_or_Eq(); - 0x6: mov_Ed_Pd(); - 0x7: mov_Qq_Pq(); - default: Inst::UD2(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x6: movq_Vo_Mq_or_Vq_Vq(); - 0x7: movdqu_Wo_Vo(); - default: Inst::UD2(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x4: haddpd_Vo_Wo(); - 0x5: hsubpd_Vo_Wo(); - 0x6: movd_Ed_Vd(); - 0x7: movdqa_Wo_Vo(); - default: Inst::UD2(); + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x0: PSHUFLW(Vo,Wo,Ib); + default: UD2(); + } + default: UD2(); } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x4: haddps_Vo_Wo(); - 0x5: hsubps_Vo_Wo(); - default: Inst::UD2(); + 0x0F: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: WarnUnimpl::vmread_Edp_Gdp(); + 0x1: WarnUnimpl::vmwrite_Gdp_Edp(); + 0x6: MOVD(Edp,Pdp); + 0x7: MOVQ(Qq,Pq); + default: UD2(); + } + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x6: MOVQ(Vq,Wq); + 0x7: WarnUnimpl::movdqu_Wo_Vo(); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x4: WarnUnimpl::haddpd_Vo_Wo(); + 0x5: WarnUnimpl::hsubpd_Vo_Wo(); + 0x6: WarnUnimpl::movd_Ed_Vd(); + 0x7: WarnUnimpl::movdqa_Wo_Vo(); + default: UD2(); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x4: WarnUnimpl::haddps_Vo_Wo(); + 0x5: WarnUnimpl::hsubps_Vo_Wo(); + default: UD2(); + } + default: UD2(); } - default: Inst::UD2(); - } - format Inst { 0x10: decode OPCODE_OP_BOTTOM3 { 0x0: JO(Jz); 0x1: JNO(Jz); @@ -822,8 +776,7 @@ 0x3: Inst::BT(Ev,Gv); 0x4: Inst::SHLD(Ev,Gv,Ib); 0x5: Inst::SHLD(Ev,Gv); - 0x6: xbts_and_cmpxchg(); - 0x7: ibts_and_cmpxchg(); + default: Inst::UD2(); } 0x15: decode OPCODE_OP_BOTTOM3 { 0x0: push_gs(); @@ -832,48 +785,51 @@ 0x3: Inst::BTS(Ev,Gv); 0x4: Inst::SHRD(Ev,Gv,Ib); 0x5: Inst::SHRD(Ev,Gv); - //0x6: group16(); - 0x6: decode MODRM_REG { - 0x0: fxsave(); - 0x1: fxrstor(); - 0x2: ldmxcsr(); - 0x3: stmxcsr(); - 0x4: Inst::UD2(); - 0x5: decode MODRM_MOD { - 0x3: BasicOperate::LFENCE( + //0x6: group15(); + 0x6: decode MODRM_MOD { + 0x3: decode MODRM_REG { + 0x5: BasicOperate::LFENCE( {{/*Nothing*/}}, IsReadBarrier); - default: Inst::UD2(); - } - 0x6: decode MODRM_MOD { - 0x3: BasicOperate::MFENCE( + 0x6: BasicOperate::MFENCE( {{/*Nothing*/}}, IsMemBarrier); - default: Inst::UD2(); - } - 0x7: decode MODRM_MOD { - 0x3: BasicOperate::SFENCE( + 0x7: BasicOperate::SFENCE( {{/*Nothing*/}}, IsWriteBarrier); default: Inst::UD2(); } + default: decode MODRM_REG { + 0x0: fxsave(); + 0x1: fxrstor(); + 0x2: Inst::LDMXCSR(Md); + 0x3: Inst::STMXCSR(Md); + 0x4: xsave(); + 0x5: xrstor(); + 0x6: Inst::UD2(); + 0x7: clflush(); + } } 0x7: Inst::IMUL(Gv,Ev); } - 0x16: decode OPCODE_OP_BOTTOM3 { - 0x0: Inst::CMPXCHG(Eb,Gb); - 0x1: Inst::CMPXCHG(Ev,Gv); - 0x2: lss_Gz_Mp(); - 0x3: Inst::BTR(Ev,Gv); - 0x4: lfs_Gz_Mp(); - 0x5: lgs_Gz_Mp(); - //The size of the second operand in these instructions should - //really be "b" or "w", but it's set to v in order to have a - //consistent register size. This shouldn't affect behavior. - 0x6: Inst::MOVZX_B(Gv,Ev); - 0x7: Inst::MOVZX_W(Gv,Ev); - } - 0x17: decode OPCODE_OP_BOTTOM3 { - 0x0: jmpe_Jz(); // IA-64? - format Inst { - //0x1: group11_UD2(); + format Inst { + 0x16: decode OPCODE_OP_BOTTOM3 { + 0x0: CMPXCHG(Eb,Gb); + 0x1: CMPXCHG(Ev,Gv); + 0x2: WarnUnimpl::lss_Gz_Mp(); + 0x3: BTR(Ev,Gv); + 0x4: WarnUnimpl::lfs_Gz_Mp(); + 0x5: WarnUnimpl::lgs_Gz_Mp(); + //The size of the second operand in these instructions + //should really be "b" or "w", but it's set to v in order + //to have a consistent register size. This shouldn't + //affect behavior. + 0x6: MOVZX_B(Gv,Ev); + 0x7: MOVZX_W(Gv,Ev); + } + 0x17: decode OPCODE_OP_BOTTOM3 { + 0x0: decode LEGACY_REP { + 0x0: WarnUnimpl::jmpe_Jz(); + 0x1: WarnUnimpl::popcnt_Gv_Ev(); + } + //0x1: group10_UD2(); 0x1: UD2(); //0x2: group8_Ev_Ib(); 0x2: decode MODRM_REG { @@ -884,244 +840,242 @@ default: UD2(); } 0x3: BTC(Ev,Gv); - } - 0x4: Inst::BSF(Gv,Ev); - 0x5: Inst::BSR(Gv,Ev); - //The size of the second operand in these instructions should - //really be "b" or "w", but it's set to v in order to have a - //consistent register size. This shouldn't affect behavior. - 0x6: Inst::MOVSX_B(Gv,Ev); - 0x7: Inst::MOVSX_W(Gv,Ev); - } - 0x18: decode OPCODE_OP_BOTTOM3 { - 0x0: Inst::XADD(Eb,Gb); - 0x1: Inst::XADD(Ev,Gv); - //0x7: group9(); - 0x7: decode MODRM_REG { - //Also CMPXCHG16B - 0x1: Inst::CMPXCHG8B(Mdp); - 0x6: decode LEGACY_OP { - 0x1: vmclear_Mq(); - default: decode LEGACY_REP { - 0x1: vmxon_Mq(); - 0x0: vmptrld_Mq(); + 0x4: BSF(Gv,Ev); + 0x5: BSR(Gv,Ev); + //The size of the second operand in these instructions + //should really be "b" or "w", but it's set to v in order + //to have a consistent register size. This shouldn't + //affect behavior. + 0x6: MOVSX_B(Gv,Ev); + 0x7: MOVSX_W(Gv,Ev); + } + 0x18: decode OPCODE_OP_BOTTOM3 { + 0x0: XADD(Eb,Gb); + 0x1: XADD(Ev,Gv); + //0x7: group9(); + 0x7: decode MODRM_REG { + //Also CMPXCHG16B + 0x1: CMPXCHG8B(Mdp); + 0x6: decode LEGACY_OP { + 0x1: WarnUnimpl::vmclear_Mq(); + default: decode LEGACY_REP { + 0x1: WarnUnimpl::vmxon_Mq(); + 0x0: WarnUnimpl::vmptrld_Mq(); + } } + 0x7: WarnUnimpl::vmptrst_Mq(); + default: UD2(); + } + default: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x2: CMPPS(Vo,Wo,Ib); + 0x3: MOVNTI(Mdp,Gdp); + 0x4: PINSRW(Pq,Ew,Ib); + 0x5: PEXTRW(Gd,PRq,Ib); + 0x6: SHUFPS(Vps,Wps,Ib); + } + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x2: CMPSS(Vd,Wd,Ib); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x2: CMPPD(Vo,Wo,Ib); + 0x4: PINSRW(Vdw,Ew,Ib); + 0x5: PEXTRW(Gd,VRdq,Ib); + 0x6: SHUFPD(Vpd,Wpd,Ib); + default: UD2(); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x2: CMPSD(Vq,Wq,Ib); + default: UD2(); + } + default: UD2(); } - 0x7: vmptrst_Mq(); - default: Inst::UD2(); } - default: decode LEGACY_DECODEVAL { + 0x19: decode OPSIZE { + 4: BSWAP_D(Bd); + 8: BSWAP_Q(Bq); + default: UD2(); + } + 0x1A: decode LEGACY_DECODEVAL { // no prefix 0x0: decode OPCODE_OP_BOTTOM3 { - 0x2: cmpccps_Vo_Wo_Ib(); - 0x3: cvtdq2ps_Vo_Wo(); - 0x4: subps_Vo_Wo(); - 0x5: minps_Vo_Wo(); - 0x6: divps_Vo_Wo(); + 0x1: PSRLW(Pq,Qq); + 0x2: PSRLD(Pq,Qq); + 0x3: PSRLQ(Pq,Qq); + 0x4: PADDQ(Pq,Qq); + 0x5: PMULLW(Pq,Qq); + 0x7: PMOVMSKB(Gd,PRq); + default: UD2(); } // repe (0xF3) 0x4: decode OPCODE_OP_BOTTOM3 { - 0x2: cmpccss_Vd_Wd_Ib(); - default: Inst::UD2(); + 0x6: MOVQ2DQ(Vo,PRq); + default: UD2(); } // operand size (0x66) 0x1: decode OPCODE_OP_BOTTOM3 { - 0x2: cmpccpd_Vo_Wo_Ib(); - 0x4: subpd_Vo_Wo(); - 0x5: minpd_Vo_Wo(); - 0x6: divpd_Vo_Wo(); - default: Inst::UD2(); + 0x0: WarnUnimpl::addsubpd_Vo_Wo(); + 0x1: PSRLW(Vo,Wo); + 0x2: PSRLD(Vo,Wo); + 0x3: PSRLQ(Vo,Wo); + 0x4: PADDQ(Vo,Wo); + 0x5: PMULLW(Vo,Wo); + 0x6: MOVQ(Wq,Vq); + 0x7: PMOVMSKB(Gd,VRo); } // repne (0xF2) 0x8: decode OPCODE_OP_BOTTOM3 { - 0x2: cmpccsd_Vq_Wq_Ib(); - default: Inst::UD2(); + 0x0: WarnUnimpl::addsubps_Vo_Wo(); + 0x6: MOVDQ2Q(Pq,VRq); + default: UD2(); } - default: Inst::UD2(); - } - } - 0x19: decode OPSIZE { - 4: Inst::BSWAP_D(Bd); - 8: Inst::BSWAP_Q(Bq); - default: Inst::UD2(); - } - 0x1A: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x1: psrlw_Pq_Qq(); - 0x2: psrld_Pq_Qq(); - 0x3: psrlq_Pq_Qq(); - 0x4: paddq_Pq_Qq(); - 0x5: pmullw_Pq_Qq(); - 0x7: pmovmskb_Gd_PRq(); - default: Inst::UD2(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x6: movq2dq_Vo_PRq(); - default: Inst::UD2(); + default: UD2(); } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: addsubpd_Vo_Wo(); - 0x1: psrlw_Vo_Wo(); - 0x2: psrld_Vo_Wo(); - 0x3: psrlq_Vo_Wo(); - 0x4: paddq_Vo_Wo(); - 0x5: pmullw_Vo_Wo(); - 0x6: decode MODRM_MOD { - 0x3: movq_Vq_Vq(); - default: movq_Mq_Vq(); + 0x1B: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PSUBUSB(Pq,Qq); + 0x1: PSUBUSW(Pq,Qq); + 0x2: PMINUB(Pq,Qq); + 0x3: PAND(Pq,Qq); + 0x4: PADDUSB(Pq,Qq); + 0x5: PADDUSW(Pq,Qq); + 0x6: PMAXUB(Pq,Qq); + 0x7: PANDN(Pq,Qq); } - 0x7: pmovmskb_Gd_VRo(); - } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x0: addsubps_Vo_Wo(); - 0x6: movdq2q_Pq_VRq(); - default: Inst::UD2(); - } - default: Inst::UD2(); - } - 0x1B: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: psubusb_Pq_Qq(); - 0x1: psubusw_Pq_Qq(); - 0x2: pminub_Pq_Qq(); - 0x3: pand_Pq_Qq(); - 0x4: paddusb_Pq_Qq(); - 0x5: paddusw_Pq_Qq(); - 0x6: pmaxub_Pq_Qq(); - 0x7: pandn_Pq_Qq(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: psubusb_Vo_Wo(); - 0x1: psubusw_Vo_Wo(); - 0x2: pminub_Vo_Wo(); - 0x3: pand_Vo_Wo(); - 0x4: paddusb_Vo_Wo(); - 0x5: paddusw_Vo_Wo(); - 0x6: pmaxub_Vo_Wo(); - 0x7: pandn_Vo_Wo(); - } - default: Inst::UD2(); - } - 0x1C: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: pavgb_Pq_Qq(); - 0x1: psraw_Pq_Qq(); - 0x2: psrad_Pq_Qq(); - 0x3: pavgw_Pq_Qq(); - 0x4: pmulhuw_Pq_Qq(); - 0x5: pmulhw_Pq_Qq(); - 0x7: movntq_Mq_Pq(); - default: Inst::UD2(); - } - // repe (0xF3) - 0x4: decode OPCODE_OP_BOTTOM3 { - 0x6: cvtdq2pd_Vo_Wq(); - default: Inst::UD2(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: pavgb_Vo_Wo(); - 0x1: psraw_Vo_Wo(); - 0x2: psrad_Vo_Wo(); - 0x3: pavgw_Vo_Wo(); - 0x4: pmulhuw_Vo_Wo(); - 0x5: pmulhw_Vo_Wo(); - 0x6: cvttpd2dq_Vo_Wo(); - 0x7: movntdq_Mo_Vo(); - } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x6: cvtpd2dq_Vo_Wo(); - default: Inst::UD2(); - } - default: Inst::UD2(); - } - 0x1D: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: psubsb_Pq_Qq(); - 0x1: psubsw_Pq_Qq(); - 0x2: pminsw_Pq_Qq(); - 0x3: por_Pq_Qq(); - 0x4: paddsb_Pq_Qq(); - 0x5: paddsw_Pq_Qq(); - 0x6: pmaxsw_Pq_Qq(); - 0x7: pxor_Pq_Qq(); - } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: psubsb_Vo_Wo(); - 0x1: psubsw_Vo_Wo(); - 0x2: pminsw_Vo_Wo(); - 0x3: por_Vo_Wo(); - 0x4: paddsb_Vo_Wo(); - 0x5: paddsw_Vo_Wo(); - 0x6: pmaxsw_Vo_Wo(); - 0x7: pxor_Vo_Wo(); - } - default: Inst::UD2(); - } - 0x1E: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x1: psllw_Pq_Qq(); - 0x2: pslld_Pq_Qq(); - 0x3: psllq_Pq_Qq(); - 0x4: pmuludq_Pq_Qq(); - 0x5: pmaddwd_Pq_Qq(); - 0x6: psadbw_Pq_Qq(); - 0x7: maskmovq_Pq_PRq(); - default: Inst::UD2(); + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PSUBUSB(Vo,Wo); + 0x1: PSUBUSW(Vo,Wo); + 0x2: PMINUB(Vo,Wo); + 0x3: PAND(Vo,Wo); + 0x4: PADDUSB(Vo,Wo); + 0x5: PADDUSW(Vo,Wo); + 0x6: PMAXUB(Vo,Wo); + 0x7: PANDN(Vo,Wo); + } + default: UD2(); } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x1: psllw_Vo_Wo(); - 0x2: pslld_Vo_Wo(); - 0x3: psllq_Vo_Wo(); - 0x4: pmuludq_Vo_Wo(); - 0x5: pmaddwd_Vo_Wo(); - 0x6: psadbw_Vo_Wo(); - 0x7: maskmovdqu_Vo_VRo(); - default: Inst::UD2(); + 0x1C: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PAVGB(Pq,Qq); + 0x1: PSRAW(Pq,Qq); + 0x2: PSRAD(Pq,Qq); + 0x3: PAVGW(Pq,Qq); + 0x4: PMULHUW(Pq,Qq); + 0x5: PMULHW(Pq,Qq); + 0x7: WarnUnimpl::movntq_Mq_Pq(); + default: UD2(); + } + // repe (0xF3) + 0x4: decode OPCODE_OP_BOTTOM3 { + 0x6: CVTDQ2PD(Vo,Wq); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PAVGB(Vo,Wo); + 0x1: PSRAW(Vo,Wo); + 0x2: PSRAD(Vo,Wo); + 0x3: PAVGW(Vo,Wo); + 0x4: PMULHUW(Vo,Wo); + 0x5: PMULHW(Vo,Wo); + 0x6: CVTTPD2DQ(Vo,Wo); + 0x7: WarnUnimpl::movntdq_Mo_Vo(); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x6: CVTPD2DQ(Vo,Wo); + default: UD2(); + } + default: UD2(); } - // repne (0xF2) - 0x8: decode OPCODE_OP_BOTTOM3 { - 0x0: lddqu_Vo_Mo(); - default: Inst::UD2(); + 0x1D: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PSUBSB(Pq,Qq); + 0x1: PSUBSW(Pq,Qq); + 0x2: PMINSW(Pq,Qq); + 0x3: POR(Pq,Qq); + 0x4: PADDSB(Pq,Qq); + 0x5: PADDSW(Pq,Qq); + 0x6: PMAXSW(Pq,Qq); + 0x7: PXOR(Pq,Qq); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PSUBSB(Vo,Wo); + 0x1: PSUBSW(Vo,Wo); + 0x2: PMINSW(Vo,Wo); + 0x3: POR(Vo,Wo); + 0x4: PADDSB(Vo,Wo); + 0x5: PADDSW(Vo,Wo); + 0x6: PMAXSW(Vo,Wo); + 0x7: PXOR(Vo,Wo); + } + default: UD2(); } - default: Inst::UD2(); - } - 0x1F: decode LEGACY_DECODEVAL { - // no prefix - 0x0: decode OPCODE_OP_BOTTOM3 { - 0x0: psubb_Pq_Qq(); - 0x1: psubw_Pq_Qq(); - 0x2: psubd_Pq_Qq(); - 0x3: psubq_Pq_Qq(); - 0x4: paddb_Pq_Qq(); - 0x5: paddw_Pq_Qq(); - 0x6: paddd_Pq_Qq(); - 0x7: Inst::UD2(); + 0x1E: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x1: PSLLW(Pq,Qq); + 0x2: PSLLD(Pq,Qq); + 0x3: PSLLQ(Pq,Qq); + 0x4: PMULUDQ(Pq,Qq); + 0x5: PMADDWD(Pq,Qq); + 0x6: PSADBW(Pq,Qq); + 0x7: MASKMOVQ(Pq,PRq); + default: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x1: PSLLW(Vo,Wo); + 0x2: PSLLD(Vo,Wo); + 0x3: PSLLQ(Vo,Wo); + 0x4: PMULUDQ(Vo,Wo); + 0x5: PMADDWD(Vo,Wo); + 0x6: PSADBW(Vo,Wo); + 0x7: MASKMOVDQU(Vo,VRo); + default: UD2(); + } + // repne (0xF2) + 0x8: decode OPCODE_OP_BOTTOM3 { + 0x0: WarnUnimpl::lddqu_Vo_Mo(); + default: UD2(); + } + default: UD2(); } - // operand size (0x66) - 0x1: decode OPCODE_OP_BOTTOM3 { - 0x0: psubb_Vo_Wo(); - 0x1: psubw_Vo_Wo(); - 0x2: psubd_Vo_Wo(); - 0x3: psubq_Vo_Wo(); - 0x4: paddb_Vo_Wo(); - 0x5: paddw_Vo_Wo(); - 0x6: paddd_Vo_Wo(); - 0x7: Inst::UD2(); + 0x1F: decode LEGACY_DECODEVAL { + // no prefix + 0x0: decode OPCODE_OP_BOTTOM3 { + 0x0: PSUBB(Pq,Qq); + 0x1: PSUBW(Pq,Qq); + 0x2: PSUBD(Pq,Qq); + 0x3: PSUBQ(Pq,Qq); + 0x4: PADDB(Pq,Qq); + 0x5: PADDW(Pq,Qq); + 0x6: PADDD(Pq,Qq); + 0x7: UD2(); + } + // operand size (0x66) + 0x1: decode OPCODE_OP_BOTTOM3 { + 0x0: PSUBB(Vo,Wo); + 0x1: PSUBW(Vo,Wo); + 0x2: PSUBD(Vo,Wo); + 0x3: PSUBQ(Vo,Wo); + 0x4: PADDB(Vo,Wo); + 0x5: PADDW(Vo,Wo); + 0x6: PADDD(Vo,Wo); + 0x7: UD2(); + } + default: UD2(); } - default: Inst::UD2(); } default: FailUnimpl::twoByteOps(); } diff --git a/src/arch/x86/isa/decoder/x87.isa b/src/arch/x86/isa/decoder/x87.isa index 9a6473141..cfd69b3ba 100644 --- a/src/arch/x86/isa/decoder/x87.isa +++ b/src/arch/x86/isa/decoder/x87.isa @@ -249,8 +249,8 @@ format WarnUnimpl { 0x3: Inst::UD2(); default: fisttp(); } - 0x2: Inst::FST(Mq); - 0x3: Inst::FSTP(Mq); + 0x2: Inst::FST(Eq); + 0x3: Inst::FSTP(Eq); 0x4: decode MODRM_MOD { 0x3: fucom(); default: frstor(); diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa index 78046c0c8..6b1fda93f 100644 --- a/src/arch/x86/isa/includes.isa +++ b/src/arch/x86/isa/includes.isa @@ -100,6 +100,7 @@ output header {{ #include "arch/x86/insts/macroop.hh" #include "arch/x86/insts/microfpop.hh" #include "arch/x86/insts/microldstop.hh" +#include "arch/x86/insts/micromediaop.hh" #include "arch/x86/insts/microregop.hh" #include "arch/x86/insts/static_inst.hh" #include "arch/x86/isa_traits.hh" @@ -155,6 +156,7 @@ output exec {{ #include "arch/x86/miscregs.hh" #include "arch/x86/tlb.hh" #include "base/bigint.hh" +#include "base/condcodes.hh" #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "sim/sim_exit.hh" diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py index 86f1946ba..800549359 100644 --- a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py +++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py @@ -221,62 +221,26 @@ def macroop IMUL_R_P_I mulel reg muleh t0 }; +''' + +pcRel = ''' + rdip t7 + ld %s, seg, riprel, disp +''' +sibRel = ''' + ld %s, seg, sib, disp +''' # # One byte version of unsigned division # -def macroop DIV_B_R -{ - # Do the initial part of the division - div1 ah, reg, dataSize=1 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t1, rax, 8, dataSize=1 - div2 t1, rax, t1, dataSize=1 - - #Loop until we're out of bits to shift in -divLoopTop: - div2 t1, rax, t1, dataSize=1 - div2 t1, rax, t1, flags=(EZF,), dataSize=1 - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq rax, dataSize=1 - divr ah, dataSize=1 -}; - -def macroop DIV_B_M +divcode = ''' +def macroop DIV_B_%(suffix)s { - ld t2, seg, sib, disp - + %(readOp1)s # Do the initial part of the division - div1 ah, t2, dataSize=1 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t1, rax, 8, dataSize=1 - div2 t1, rax, t1, dataSize=1 - - #Loop until we're out of bits to shift in -divLoopTop: - div2 t1, rax, t1, dataSize=1 - div2 t1, rax, t1, flags=(EZF,), dataSize=1 - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq rax, dataSize=1 - divr ah, dataSize=1 -}; - -def macroop DIV_B_P -{ - rdip t7 - ld t2, seg, riprel, disp - - # Do the initial part of the division - div1 ah, t2, dataSize=1 + div1 ah, %(op1)s, dataSize=1 #These are split out so we can initialize the number of bits in the #second register @@ -293,68 +257,18 @@ divLoopTop: divq rax, dataSize=1 divr ah, dataSize=1 }; +''' # # Unsigned division # -def macroop DIV_R -{ - # Do the initial part of the division - div1 rdx, reg - - #These are split out so we can initialize the number of bits in the - #second register - div2i t1, rax, "env.dataSize * 8" - div2 t1, rax, t1 - - #Loop until we're out of bits to shift in - #The amount of unrolling here could stand some tuning -divLoopTop: - div2 t1, rax, t1 - div2 t1, rax, t1 - div2 t1, rax, t1 - div2 t1, rax, t1, flags=(EZF,) - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq rax - divr rdx -}; - -def macroop DIV_M -{ - ld t2, seg, sib, disp - - # Do the initial part of the division - div1 rdx, t2 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t1, rax, "env.dataSize * 8" - div2 t1, rax, t1 - - #Loop until we're out of bits to shift in - #The amount of unrolling here could stand some tuning -divLoopTop: - div2 t1, rax, t1 - div2 t1, rax, t1 - div2 t1, rax, t1 - div2 t1, rax, t1, flags=(EZF,) - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq rax - divr rdx -}; - -def macroop DIV_P +divcode += ''' +def macroop DIV_%(suffix)s { - rdip t7 - ld t2, seg, riprel, disp - + %(readOp1)s # Do the initial part of the division - div1 rdx, t2 + div1 rdx, %(op1)s #These are split out so we can initialize the number of bits in the #second register @@ -374,12 +288,14 @@ divLoopTop: divq rax divr rdx }; +''' # # One byte version of signed division # -def macroop IDIV_B_R +divcode += ''' +def macroop IDIV_B_%(suffix)s { # Negate dividend sub t1, t0, rax, flags=(ECF,), dataSize=1 @@ -387,84 +303,15 @@ def macroop IDIV_B_R sub t2, t0, ah, dataSize=1 sub t2, t2, t4 - #Find the sign of the divisor - slli t0, reg, 1, flags=(ECF,), dataSize=1 - - # Negate divisor - sub t3, t0, reg, dataSize=1 - # Put the divisor's absolute value into t3 - mov t3, t3, reg, flags=(nCECF,), dataSize=1 - - #Find the sign of the dividend - slli t0, ah, 1, flags=(ECF,), dataSize=1 - - # Put the dividend's absolute value into t1 and t2 - mov t1, t1, rax, flags=(nCECF,), dataSize=1 - mov t2, t2, ah, flags=(nCECF,), dataSize=1 - - # Do the initial part of the division - div1 t2, t3, dataSize=1 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t4, t1, 8, dataSize=1 - div2 t4, t1, t4, dataSize=1 - - #Loop until we're out of bits to shift in -divLoopTop: - div2 t4, t1, t4, dataSize=1 - div2 t4, t1, t4, flags=(EZF,), dataSize=1 - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq t5, dataSize=1 - divr t6, dataSize=1 - - # Fix up signs. The sign of the dividend is still lying around in ECF. - # The sign of the remainder, ah, is the same as the dividend. The sign - # of the quotient is negated if the signs of the divisor and dividend - # were different. - - # Negate the remainder - sub t4, t0, t6, dataSize=1 - # If the dividend was negitive, put the negated remainder in ah. - mov ah, ah, t4, (CECF,), dataSize=1 - # Otherwise put the regular remainder in ah. - mov ah, ah, t6, (nCECF,), dataSize=1 - - # Negate the quotient. - sub t4, t0, t5, dataSize=1 - # If the dividend was negative, start using the negated quotient - mov t5, t5, t4, (CECF,), dataSize=1 - - # Check the sign of the divisor - slli t0, reg, 1, flags=(ECF,), dataSize=1 - - # Negate the (possibly already negated) quotient - sub t4, t0, t5, dataSize=1 - # If the divisor was negative, put the negated quotient in rax. - mov rax, rax, t4, (CECF,), dataSize=1 - # Otherwise put the one that wasn't negated (at least here) in rax. - mov rax, rax, t5, (nCECF,), dataSize=1 -}; - -def macroop IDIV_B_M -{ - # Negate dividend - sub t1, t0, rax, flags=(ECF,), dataSize=1 - ruflag t4, 3 - sub t2, t0, ah, dataSize=1 - sub t2, t2, t4 - - ld t8, seg, sib, disp + %(readOp1)s #Find the sign of the divisor - slli t0, t8, 1, flags=(ECF,), dataSize=1 + slli t0, %(op1)s, 1, flags=(ECF,), dataSize=1 # Negate divisor - sub t3, t0, t8, dataSize=1 + sub t3, t0, %(op1)s, dataSize=1 # Put the divisor's absolute value into t3 - mov t3, t3, t8, flags=(nCECF,), dataSize=1 + mov t3, t3, %(op1)s, flags=(nCECF,), dataSize=1 #Find the sign of the dividend slli t0, ah, 1, flags=(ECF,), dataSize=1 @@ -509,79 +356,7 @@ divLoopTop: mov t5, t5, t4, (CECF,), dataSize=1 # Check the sign of the divisor - slli t0, t8, 1, flags=(ECF,), dataSize=1 - - # Negate the (possibly already negated) quotient - sub t4, t0, t5, dataSize=1 - # If the divisor was negative, put the negated quotient in rax. - mov rax, rax, t4, (CECF,), dataSize=1 - # Otherwise put the one that wasn't negated (at least here) in rax. - mov rax, rax, t5, (nCECF,), dataSize=1 -}; - -def macroop IDIV_B_P -{ - # Negate dividend - sub t1, t0, rax, flags=(ECF,), dataSize=1 - ruflag t4, 3 - sub t2, t0, ah, dataSize=1 - sub t2, t2, t4 - - rdip t7 - ld t8, seg, riprel, disp - - #Find the sign of the divisor - slli t0, t8, 1, flags=(ECF,), dataSize=1 - - # Negate divisor - sub t3, t0, t8, dataSize=1 - # Put the divisor's absolute value into t3 - mov t3, t3, t8, flags=(nCECF,), dataSize=1 - - #Find the sign of the dividend - slli t0, ah, 1, flags=(ECF,), dataSize=1 - - # Put the dividend's absolute value into t1 and t2 - mov t1, t1, rax, flags=(nCECF,), dataSize=1 - mov t2, t2, ah, flags=(nCECF,), dataSize=1 - - # Do the initial part of the division - div1 t2, t3, dataSize=1 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t4, t1, 8, dataSize=1 - div2 t4, t1, t4, dataSize=1 - - #Loop until we're out of bits to shift in -divLoopTop: - div2 t4, t1, t4, dataSize=1 - div2 t4, t1, t4, flags=(EZF,), dataSize=1 - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq t5, dataSize=1 - divr t6, dataSize=1 - - # Fix up signs. The sign of the dividend is still lying around in ECF. - # The sign of the remainder, ah, is the same as the dividend. The sign - # of the quotient is negated if the signs of the divisor and dividend - # were different. - - # Negate the remainder - sub t4, t0, t6, dataSize=1 - # If the dividend was negitive, put the negated remainder in ah. - mov ah, ah, t4, (CECF,), dataSize=1 - # Otherwise put the regular remainder in ah. - mov ah, ah, t6, (nCECF,), dataSize=1 - - # Negate the quotient. - sub t4, t0, t5, dataSize=1 - # If the dividend was negative, start using the negated quotient - mov t5, t5, t4, (CECF,), dataSize=1 - - # Check the sign of the divisor - slli t0, t8, 1, flags=(ECF,), dataSize=1 + slli t0, %(op1)s, 1, flags=(ECF,), dataSize=1 # Negate the (possibly already negated) quotient sub t4, t0, t5, dataSize=1 @@ -590,12 +365,14 @@ divLoopTop: # Otherwise put the one that wasn't negated (at least here) in rax. mov rax, rax, t5, (nCECF,), dataSize=1 }; +''' # # Signed division # -def macroop IDIV_R +divcode += ''' +def macroop IDIV_%(suffix)s { # Negate dividend sub t1, t0, rax, flags=(ECF,) @@ -603,166 +380,17 @@ def macroop IDIV_R sub t2, t0, rdx sub t2, t2, t4 - #Find the sign of the divisor - slli t0, reg, 1, flags=(ECF,) - - # Negate divisor - sub t3, t0, reg - # Put the divisor's absolute value into t3 - mov t3, t3, reg, flags=(nCECF,) - - #Find the sign of the dividend - slli t0, rdx, 1, flags=(ECF,) - - # Put the dividend's absolute value into t1 and t2 - mov t1, t1, rax, flags=(nCECF,) - mov t2, t2, rdx, flags=(nCECF,) - - # Do the initial part of the division - div1 t2, t3 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t4, t1, "env.dataSize * 8" - div2 t4, t1, t4 - - #Loop until we're out of bits to shift in -divLoopTop: - div2 t4, t1, t4 - div2 t4, t1, t4 - div2 t4, t1, t4 - div2 t4, t1, t4, flags=(EZF,) - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq t5 - divr t6 - - # Fix up signs. The sign of the dividend is still lying around in ECF. - # The sign of the remainder, ah, is the same as the dividend. The sign - # of the quotient is negated if the signs of the divisor and dividend - # were different. - - # Negate the remainder - sub t4, t0, t6 - # If the dividend was negitive, put the negated remainder in rdx. - mov rdx, rdx, t4, (CECF,) - # Otherwise put the regular remainder in rdx. - mov rdx, rdx, t6, (nCECF,) - - # Negate the quotient. - sub t4, t0, t5 - # If the dividend was negative, start using the negated quotient - mov t5, t5, t4, (CECF,) - - # Check the sign of the divisor - slli t0, reg, 1, flags=(ECF,) - - # Negate the (possibly already negated) quotient - sub t4, t0, t5 - # If the divisor was negative, put the negated quotient in rax. - mov rax, rax, t4, (CECF,) - # Otherwise put the one that wasn't negated (at least here) in rax. - mov rax, rax, t5, (nCECF,) -}; - -def macroop IDIV_M -{ - # Negate dividend - sub t1, t0, rax, flags=(ECF,) - ruflag t4, 3 - sub t2, t0, rdx - sub t2, t2, t4 - - ld t8, seg, sib, disp - - #Find the sign of the divisor - #FIXME!!! This depends on shifts setting the carry flag correctly. - slli t0, t8, 1, flags=(ECF,) - - # Negate divisor - sub t3, t0, t8 - # Put the divisor's absolute value into t3 - mov t3, t3, t8, flags=(nCECF,) - - #Find the sign of the dividend - #FIXME!!! This depends on shifts setting the carry flag correctly. - slli t0, rdx, 1, flags=(ECF,) - - # Put the dividend's absolute value into t1 and t2 - mov t1, t1, rax, flags=(nCECF,) - mov t2, t2, rdx, flags=(nCECF,) - - # Do the initial part of the division - div1 t2, t3 - - #These are split out so we can initialize the number of bits in the - #second register - div2i t4, t1, "env.dataSize * 8" - div2 t4, t1, t4 - - #Loop until we're out of bits to shift in -divLoopTop: - div2 t4, t1, t4 - div2 t4, t1, t4 - div2 t4, t1, t4 - div2 t4, t1, t4, flags=(EZF,) - br label("divLoopTop"), flags=(nCEZF,) - - #Unload the answer - divq t5 - divr t6 - - # Fix up signs. The sign of the dividend is still lying around in ECF. - # The sign of the remainder, ah, is the same as the dividend. The sign - # of the quotient is negated if the signs of the divisor and dividend - # were different. - - # Negate the remainder - sub t4, t0, t6 - # If the dividend was negitive, put the negated remainder in rdx. - mov rdx, rdx, t4, (CECF,) - # Otherwise put the regular remainder in rdx. - mov rdx, rdx, t6, (nCECF,) - - # Negate the quotient. - sub t4, t0, t5 - # If the dividend was negative, start using the negated quotient - mov t5, t5, t4, (CECF,) - - # Check the sign of the divisor - slli t0, t8, 1, flags=(ECF,) - - # Negate the (possibly already negated) quotient - sub t4, t0, t5 - # If the divisor was negative, put the negated quotient in rax. - mov rax, rax, t4, (CECF,) - # Otherwise put the one that wasn't negated (at least here) in rax. - mov rax, rax, t5, (nCECF,) -}; - -def macroop IDIV_P -{ - # Negate dividend - sub t1, t0, rax, flags=(ECF,) - ruflag t4, 3 - sub t2, t0, rdx - sub t2, t2, t4 - - rdip t7 - ld t8, seg, riprel, disp + %(readOp1)s #Find the sign of the divisor - #FIXME!!! This depends on shifts setting the carry flag correctly. - slli t0, t8, 1, flags=(ECF,) + slli t0, %(op1)s, 1, flags=(ECF,) # Negate divisor - sub t3, t0, t8 + sub t3, t0, %(op1)s # Put the divisor's absolute value into t3 - mov t3, t3, t4, flags=(nCECF,) + mov t3, t3, %(op1)s, flags=(nCECF,) #Find the sign of the dividend - #FIXME!!! This depends on shifts setting the carry flag correctly. slli t0, rdx, 1, flags=(ECF,) # Put the dividend's absolute value into t1 and t2 @@ -807,7 +435,7 @@ divLoopTop: mov t5, t5, t4, (CECF,) # Check the sign of the divisor - slli t0, t8, 1, flags=(ECF,) + slli t0, %(op1)s, 1, flags=(ECF,) # Negate the (possibly already negated) quotient sub t4, t0, t5 @@ -817,3 +445,10 @@ divLoopTop: mov rax, rax, t5, (nCECF,) }; ''' + +microcode += divcode % {"suffix": "R", + "readOp1": "", "op1": "reg"} +microcode += divcode % {"suffix": "M", + "readOp1": sibRel % "t2", "op1": "t2"} +microcode += divcode % {"suffix": "P", + "readOp1": pcRel % "t2", "op1": "t2"} diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py index 560a86e64..7ccdca6c3 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/move.py @@ -346,10 +346,17 @@ processDescriptor: wrdl reg, t3, t1 wrsel reg, t1 }; + +def macroop MOVNTI_M_R { + st reg, seg, sib, disp +}; + +def macroop MOVNTI_P_R { + rdip t7 + st reg, seg, riprel, disp +}; ''' #let {{ # class MOVD(Inst): # "GenFault ${new UnimpInstFault}" -# class MOVNTI(Inst): -# "GenFault ${new UnimpInstFault}" #}}; diff --git a/src/arch/x86/isa/insts/general_purpose/semaphores.py b/src/arch/x86/isa/insts/general_purpose/semaphores.py index 2bdbd0ada..b3c0d21cb 100644 --- a/src/arch/x86/isa/insts/general_purpose/semaphores.py +++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py @@ -98,100 +98,6 @@ def macroop CMPXCHG_LOCKED_P_R { mov rax, rax, t1, flags=(nCZF,) }; -def macroop CMPXCHG8B_M { - lea t1, seg, sib, disp, dataSize=asz - ldst t2, seg, [1, t0, t1], 0 - ldst t3, seg, [1, t0, t1], dsz - - sub t0, rax, t2, flags=(ZF,) - br label("doneComparing"), flags=(nCZF,) - sub t0, rdx, t3, flags=(ZF,) -doneComparing: - - # If they're equal, set t3:t2 to rbx:rcx to write to memory - mov t2, t2, rbx, flags=(CZF,) - mov t3, t3, rcx, flags=(CZF,) - - # If they're not equal, set rdx:rax to the value from memory. - mov rax, rax, t2, flags=(nCZF,) - mov rdx, rdx, t3, flags=(nCZF,) - - # Write to memory - st t3, seg, [1, t0, t1], dsz - st t2, seg, [1, t0, t1], 0 -}; - -def macroop CMPXCHG8B_P { - rdip t7 - lea t1, seg, riprel, disp, dataSize=asz - ldst t2, seg, [1, t0, t1], 0 - ldst t3, seg, [1, t0, t1], dsz - - sub t0, rax, t2, flags=(ZF,) - br label("doneComparing"), flags=(nCZF,) - sub t0, rdx, t3, flags=(ZF,) -doneComparing: - - # If they're equal, set t3:t2 to rbx:rcx to write to memory - mov t2, t2, rbx, flags=(CZF,) - mov t3, t3, rcx, flags=(CZF,) - - # If they're not equal, set rdx:rax to the value from memory. - mov rax, rax, t2, flags=(nCZF,) - mov rdx, rdx, t3, flags=(nCZF,) - - # Write to memory - st t3, seg, [1, t0, t1], dsz - st t2, seg, [1, t0, t1], 0 -}; - -def macroop CMPXCHG8B_LOCKED_M { - lea t1, seg, sib, disp, dataSize=asz - ldstl t2, seg, [1, t0, t1], 0 - ldstl t3, seg, [1, t0, t1], dsz - - sub t0, rax, t2, flags=(ZF,) - br label("doneComparing"), flags=(nCZF,) - sub t0, rdx, t3, flags=(ZF,) -doneComparing: - - # If they're equal, set t3:t2 to rbx:rcx to write to memory - mov t2, t2, rbx, flags=(CZF,) - mov t3, t3, rcx, flags=(CZF,) - - # If they're not equal, set rdx:rax to the value from memory. - mov rax, rax, t2, flags=(nCZF,) - mov rdx, rdx, t3, flags=(nCZF,) - - # Write to memory - stul t3, seg, [1, t0, t1], dsz - stul t2, seg, [1, t0, t1], 0 -}; - -def macroop CMPXCHG8B_LOCKED_P { - rdip t7 - lea t1, seg, riprel, disp, dataSize=asz - ldstl t2, seg, [1, t0, t1], 0 - ldstl t3, seg, [1, t0, t1], dsz - - sub t0, rax, t2, flags=(ZF,) - br label("doneComparing"), flags=(nCZF,) - sub t0, rdx, t3, flags=(ZF,) -doneComparing: - - # If they're equal, set t3:t2 to rbx:rcx to write to memory - mov t2, t2, rbx, flags=(CZF,) - mov t3, t3, rcx, flags=(CZF,) - - # If they're not equal, set rdx:rax to the value from memory. - mov rax, rax, t2, flags=(nCZF,) - mov rdx, rdx, t3, flags=(nCZF,) - - # Write to memory - stul t3, seg, [1, t0, t1], dsz - stul t2, seg, [1, t0, t1], 0 -}; - def macroop XADD_M_R { ldst t1, seg, sib, disp add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) @@ -229,6 +135,46 @@ def macroop XADD_R_R { }; ''' + +cmpxchg8bCode = ''' +def macroop CMPXCHG8B_%(suffix)s { + %(rdip)s + lea t1, seg, %(sib)s, disp, dataSize=asz + ldst%(l)s t2, seg, [1, t0, t1], 0 + ldst%(l)s t3, seg, [1, t0, t1], dsz + + sub t0, rax, t2, flags=(ZF,) + br label("doneComparing"), flags=(nCZF,) + sub t0, rdx, t3, flags=(ZF,) +doneComparing: + + # If they're equal, set t3:t2 to rbx:rcx to write to memory + mov t2, t2, rbx, flags=(CZF,) + mov t3, t3, rcx, flags=(CZF,) + + # If they're not equal, set rdx:rax to the value from memory. + mov rax, rax, t2, flags=(nCZF,) + mov rdx, rdx, t3, flags=(nCZF,) + + # Write to memory + st%(ul)s t3, seg, [1, t0, t1], dsz + st%(ul)s t2, seg, [1, t0, t1], 0 +}; +''' + +microcode += cmpxchg8bCode % {"rdip": "", "sib": "sib", + "l": "", "ul": "", + "suffix": "M"} +microcode += cmpxchg8bCode % {"rdip": "rdip t7", "sib": "riprel", + "l": "", "ul": "", + "suffix": "P"} +microcode += cmpxchg8bCode % {"rdip": "", "sib": "sib", + "l": "l", "ul": "ul", + "suffix": "LOCKED_M"} +microcode += cmpxchg8bCode % {"rdip": "rdip t7", "sib": "riprel", + "l": "l", "ul": "ul", + "suffix": "LOCKED_P"} + #let {{ # class XCHG(Inst): # "GenFault ${new UnimpInstFault}" diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py index c1764ff12..083d8775d 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/addition.py @@ -54,22 +54,73 @@ # Authors: Gabe Black microcode = ''' -# ADDPS -# ADDPD -# ADDSS +def macroop ADDSS_XMM_XMM { + maddf xmml, xmml, xmmlm, size=4, ext=1 +}; + +def macroop ADDSS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop ADDSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop ADDSD_XMM_XMM { + maddf xmml, xmml, xmmlm, size=8, ext=1 +}; + +def macroop ADDSD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop ADDSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop ADDPS_XMM_XMM { + maddf xmml, xmml, xmmlm, size=4, ext=0 + maddf xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop ADDPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddf xmml, xmml, ufp1, size=4, ext=0 + maddf xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop ADDPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddf xmml, xmml, ufp1, size=4, ext=0 + maddf xmmh, xmmh, ufp2, size=4, ext=0 +}; -def macroop ADDSD_R_R { - addfp xmml, xmml, xmmlm +def macroop ADDPD_XMM_XMM { + maddf xmml, xmml, xmmlm, size=8, ext=0 + maddf xmmh, xmmh, xmmhm, size=8, ext=0 }; -def macroop ADDSD_R_M { - ldfp ufp1, seg, sib, disp - addfp xmml, xmml, ufp1 +def macroop ADDPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddf xmml, xmml, ufp1, size=8, ext=0 + maddf xmmh, xmmh, ufp2, size=8, ext=0 }; -def macroop ADDSD_R_P { +def macroop ADDPD_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - addfp xmml, xmml, ufp1 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddf xmml, xmml, ufp1, size=8, ext=0 + maddf xmmh, xmmh, ufp2, size=8, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py index 31f336696..3e565278c 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/division.py @@ -54,22 +54,73 @@ # Authors: Gabe Black microcode = ''' -# DIVPS -# DIVPD -# DIVSS +def macroop DIVSS_XMM_XMM { + mdivf xmml, xmml, xmmlm, size=4, ext=1 +}; + +def macroop DIVSS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mdivf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop DIVSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mdivf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop DIVSD_XMM_XMM { + mdivf xmml, xmml, xmmlm, size=8, ext=1 +}; + +def macroop DIVSD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mdivf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop DIVSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mdivf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop DIVPS_XMM_XMM { + mdivf xmml, xmml, xmmlm, size=4, ext=0 + mdivf xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop DIVPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mdivf xmml, xmml, ufp1, size=4, ext=0 + mdivf xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop DIVPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mdivf xmml, xmml, ufp1, size=4, ext=0 + mdivf xmmh, xmmh, ufp2, size=4, ext=0 +}; -def macroop DIVSD_R_R { - divfp xmml, xmml, xmmlm +def macroop DIVPD_XMM_XMM { + mdivf xmml, xmml, xmmlm, size=8, ext=0 + mdivf xmmh, xmmh, xmmhm, size=8, ext=0 }; -def macroop DIVSD_R_M { - ldfp ufp1, seg, sib, disp - divfp xmml, xmml, ufp1 +def macroop DIVPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mdivf xmml, xmml, ufp1, size=8, ext=0 + mdivf xmmh, xmmh, ufp2, size=8, ext=0 }; -def macroop DIVSD_R_P { +def macroop DIVPD_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - divfp xmml, xmml, ufp1 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mdivf xmml, xmml, ufp1, size=8, ext=0 + mdivf xmmh, xmmh, ufp2, size=8, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py index 1a53eb27f..fc28fbda4 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/multiplication.py @@ -54,22 +54,73 @@ # Authors: Gabe Black microcode = ''' -# MULPS -# MULPD -# MULSS +def macroop MULSS_XMM_XMM { + mmulf xmml, xmml, xmmlm, size=4, ext=1 +}; + +def macroop MULSS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmulf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop MULSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmulf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop MULSD_XMM_XMM { + mmulf xmml, xmml, xmmlm, size=8, ext=1 +}; + +def macroop MULSD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmulf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop MULSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmulf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop MULPS_XMM_XMM { + mmulf xmml, xmml, xmmlm, size=4, ext=0 + mmulf xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop MULPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmulf xmml, xmml, ufp1, size=4, ext=0 + mmulf xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop MULPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmulf xmml, xmml, ufp1, size=4, ext=0 + mmulf xmmh, xmmh, ufp2, size=4, ext=0 +}; -def macroop MULSD_R_R { - mulfp xmml, xmml, xmmlm +def macroop MULPD_XMM_XMM { + mmulf xmml, xmml, xmmlm, size=8, ext=0 + mmulf xmmh, xmmh, xmmhm, size=8, ext=0 }; -def macroop MULSD_R_M { - ldfp ufp1, seg, sib, disp - mulfp xmml, xmml, ufp1 +def macroop MULPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmulf xmml, xmml, ufp1, size=8, ext=0 + mmulf xmmh, xmmh, ufp2, size=8, ext=0 }; -def macroop MULSD_R_P { +def macroop MULPD_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - mulfp xmml, xmml, ufp1 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmulf xmml, xmml, ufp1, size=8, ext=0 + mmulf xmmh, xmmh, ufp2, size=8, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py index 4f67aee88..fdeb30ddc 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/square_root.py @@ -54,22 +54,73 @@ # Authors: Gabe Black microcode = ''' -# SQRTPS -# SQRTPD -# SQRTSS +def macroop SQRTSS_XMM_XMM { + msqrt xmml, xmmlm, size=4, ext=1 +}; + +def macroop SQRTSS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msqrt xmml, ufp1, size=4, ext=1 +}; + +def macroop SQRTSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msqrt xmml, ufp1, size=4, ext=1 +}; + +def macroop SQRTPS_XMM_XMM { + msqrt xmml, xmmlm, size=4, ext=0 + msqrt xmmh, xmmhm, size=4, ext=0 +}; + +def macroop SQRTPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msqrt xmml, ufp1, size=4, ext=0 + msqrt xmmh, ufp2, size=4, ext=0 +}; + +def macroop SQRTPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msqrt xmml, ufp1, size=4, ext=0 + msqrt xmmh, ufp2, size=4, ext=0 +}; + +def macroop SQRTSD_XMM_XMM { + msqrt xmml, xmmlm, size=8, ext=1 +}; + +def macroop SQRTSD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msqrt xmml, ufp1, size=8, ext=1 +}; + +def macroop SQRTSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msqrt xmml, ufp1, size=8, ext=1 +}; -def macroop SQRTSD_R_R { - sqrtfp xmml, xmml, xmmlm +def macroop SQRTPD_XMM_XMM { + msqrt xmml, xmmlm, size=8, ext=0 + msqrt xmmh, xmmhm, size=8, ext=0 }; -def macroop SQRTSD_R_M { - ldfp ufp1, seg, sib, disp - sqrtfp xmml, xmml, ufp1 +def macroop SQRTPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msqrt xmml, ufp1, size=8, ext=0 + msqrt xmmh, ufp2, size=8, ext=0 }; -def macroop SQRTSD_R_P { +def macroop SQRTPD_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - sqrtfp xmml, xmml, ufp1 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msqrt xmml, ufp1, size=8, ext=0 + msqrt xmmh, ufp2, size=8, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py index 4f73fa899..378abc070 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/subtraction.py @@ -54,22 +54,73 @@ # Authors: Gabe Black microcode = ''' -# SUBPS -# SUBPD -# SUBSS +def macroop SUBSS_XMM_XMM { + msubf xmml, xmml, xmmlm, size=4, ext=1 +}; + +def macroop SUBSS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop SUBSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubf xmml, xmml, ufp1, size=4, ext=1 +}; + +def macroop SUBSD_XMM_XMM { + msubf xmml, xmml, xmmlm, size=8, ext=1 +}; + +def macroop SUBSD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop SUBSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubf xmml, xmml, ufp1, size=8, ext=1 +}; + +def macroop SUBPS_XMM_XMM { + msubf xmml, xmml, xmmlm, size=4, ext=0 + msubf xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop SUBPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubf xmml, xmml, ufp1, size=4, ext=0 + msubf xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop SUBPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubf xmml, xmml, ufp1, size=4, ext=0 + msubf xmmh, xmmh, ufp2, size=4, ext=0 +}; -def macroop SUBSD_R_R { - subfp xmml, xmml, xmmlm +def macroop SUBPD_XMM_XMM { + msubf xmml, xmml, xmmlm, size=8, ext=0 + msubf xmmh, xmmh, xmmhm, size=8, ext=0 }; -def macroop SUBSD_R_M { - ldfp ufp1, seg, sib, disp - subfp xmml, xmml, ufp1 +def macroop SUBPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubf xmml, xmml, ufp1, size=8, ext=0 + msubf xmmh, xmmh, ufp2, size=8, ext=0 }; -def macroop SUBSD_R_P { +def macroop SUBPD_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - subfp xmml, xmml, ufp1 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubf xmml, xmml, ufp1, size=8, ext=0 + msubf xmmh, xmmh, ufp2, size=8, ext=0 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py index 705f64093..09c34600b 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_mask.py @@ -54,8 +54,73 @@ # Authors: Gabe Black microcode = ''' -# CMPPS -# CMPPD -# CMPSS -# CMPSD +def macroop CMPPS_XMM_XMM_I { + mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE & mask(3)" + mcmpf2r xmmh, xmmh, xmmhm, size=4, ext="IMMEDIATE & mask(3)" +}; + +def macroop CMPPS_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE & mask(3)" + mcmpf2r xmmh, xmmh, ufp2, size=4, ext="IMMEDIATE & mask(3)" +}; + +def macroop CMPPS_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE & mask(3)" + mcmpf2r xmmh, xmmh, ufp2, size=4, ext="IMMEDIATE & mask(3)" +}; + +def macroop CMPPD_XMM_XMM_I { + mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE & mask(3)" + mcmpf2r xmmh, xmmh, xmmhm, size=8, ext="IMMEDIATE & mask(3)" +}; + +def macroop CMPPD_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE & mask(3)" + mcmpf2r xmmh, xmmh, ufp2, size=8, ext="IMMEDIATE & mask(3)" +}; + +def macroop CMPPD_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE & mask(3)" + mcmpf2r xmmh, xmmh, ufp2, size=8, ext="IMMEDIATE & mask(3)" +}; + +def macroop CMPSS_XMM_XMM_I { + mcmpf2r xmml, xmml, xmmlm, size=4, ext="IMMEDIATE | 0x8" +}; + +def macroop CMPSS_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8" +}; + +def macroop CMPSS_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=4, ext="IMMEDIATE | 0x8" +}; + +def macroop CMPSD_XMM_XMM_I { + mcmpf2r xmml, xmml, xmmlm, size=8, ext="IMMEDIATE | 0x8" +}; + +def macroop CMPSD_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8" +}; + +def macroop CMPSD_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mcmpf2r xmml, xmml, ufp1, size=8, ext="IMMEDIATE | 0x8" +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py index 8ef363333..17c97662c 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py @@ -54,12 +54,143 @@ # Authors: Gabe Black microcode = ''' -# MAXPS -# MAXPD -# MAXSS -# MAXSD -# MINPS -# MINPD -# MINSS -# MINSD +def macroop MINPS_XMM_XMM { + mminf xmml, xmml, xmmlm, ext=0, size=4 + mminf xmmh, xmmh, xmmhm, ext=0, size=4 +}; + +def macroop MINPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mminf xmml, xmml, ufp1, ext=0, size=4 + mminf xmmh, xmmh, ufp2, ext=0, size=4 +}; + +def macroop MINPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mminf xmml, xmml, ufp1, ext=0, size=4 + mminf xmmh, xmmh, ufp2, ext=0, size=4 +}; + +def macroop MINPD_XMM_XMM { + mminf xmml, xmml, xmmlm, ext=0, size=8 + mminf xmmh, xmmh, xmmhm, ext=0, size=8 +}; + +def macroop MINPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mminf xmml, xmml, ufp1, ext=0, size=8 + mminf xmmh, xmmh, ufp2, ext=0, size=8 +}; + +def macroop MINPD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mminf xmml, xmml, ufp1, ext=0, size=8 + mminf xmmh, xmmh, ufp2, ext=0, size=8 +}; + +def macroop MINSS_XMM_XMM { + mminf xmml, xmml, xmmlm, ext=1, size=4 +}; + +def macroop MINSS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mminf xmml, xmml, ufp1, ext=1, size=4 +}; + +def macroop MINSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mminf xmml, xmml, ufp1, ext=1, size=4 +}; + +def macroop MINSD_XMM_XMM { + mminf xmml, xmml, xmmlm, ext=1, size=8 +}; + +def macroop MINSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mminf xmml, xmml, ufp1, ext=1, size=8 +}; + +def macroop MINSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mminf xmml, xmml, ufp1, ext=1, size=8 +}; + +def macroop MAXPS_XMM_XMM { + mmaxf xmml, xmml, xmmlm, ext=0, size=4 + mmaxf xmmh, xmmh, xmmhm, ext=0, size=4 +}; + +def macroop MAXPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=0, size=4 + mmaxf xmmh, xmmh, ufp2, ext=0, size=4 +}; + +def macroop MAXPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=0, size=4 + mmaxf xmmh, xmmh, ufp2, ext=0, size=4 +}; + +def macroop MAXPD_XMM_XMM { + mmaxf xmml, xmml, xmmlm, ext=0, size=8 + mmaxf xmmh, xmmh, xmmhm, ext=0, size=8 +}; + +def macroop MAXPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=0, size=8 + mmaxf xmmh, xmmh, ufp2, ext=0, size=8 +}; + +def macroop MAXPD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=0, size=8 + mmaxf xmmh, xmmh, ufp2, ext=0, size=8 +}; + +def macroop MAXSS_XMM_XMM { + mmaxf xmml, xmml, xmmlm, ext=1, size=4 +}; + +def macroop MAXSS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=1, size=4 +}; + +def macroop MAXSS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=1, size=4 +}; + +def macroop MAXSD_XMM_XMM { + mmaxf xmml, xmml, xmmlm, ext=1, size=8 +}; + +def macroop MAXSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=1, size=8 +}; + +def macroop MAXSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mmaxf xmml, xmml, ufp1, ext=1, size=8 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py index e05018495..50afddf7b 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_rflags.py @@ -54,22 +54,63 @@ # Authors: Gabe Black microcode = ''' -# COMISS -# COMISD -# UCOMISS +def macroop UCOMISS_XMM_XMM { + mcmpf2rf xmml, xmmlm, size=4 +}; + +def macroop UCOMISS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=4 +}; + +def macroop UCOMISS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=4 +}; + +def macroop UCOMISD_XMM_XMM { + mcmpf2rf xmml, xmmlm, size=8 +}; + +def macroop UCOMISD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=8 +}; + +def macroop UCOMISD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=8 +}; + +def macroop COMISS_XMM_XMM { + mcmpf2rf xmml, xmmlm, size=4 +}; + +def macroop COMISS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=4 +}; + +def macroop COMISS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=4 +}; -def macroop UCOMISD_R_R { - compfp xmml, xmmlm +def macroop COMISD_XMM_XMM { + mcmpf2rf xmml, xmmlm, size=8 }; -def macroop UCOMISD_R_M { - ldfp ufp1, seg, sib, disp - compfp xmml, ufp1 +def macroop COMISD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=8 }; -def macroop UCOMISD_R_P { +def macroop COMISD_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - compfp xmml, ufp1 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpf2rf xmml, ufp1, size=8 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py index 2de33efa2..1c36f7e45 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_floating_point.py @@ -54,8 +54,74 @@ # Authors: Gabe Black microcode = ''' -# CVTPS2PD -# CVTPD2PS -# CVTSS2SD -# CVTSD2SS +def macroop CVTSS2SD_XMM_XMM { + cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=1 +}; + +def macroop CVTSS2SD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1 +}; + +def macroop CVTSS2SD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=1 +}; + +def macroop CVTSD2SS_XMM_XMM { + cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=1 +}; + +def macroop CVTSD2SS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1 +}; + +def macroop CVTSD2SS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=1 +}; + +def macroop CVTPS2PD_XMM_XMM { + cvtf2f xmmh, xmmlm, destSize=8, srcSize=4, ext=2 + cvtf2f xmml, xmmlm, destSize=8, srcSize=4, ext=0 +}; + +def macroop CVTPS2PD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2f xmmh, ufp1, destSize=8, srcSize=4, ext=2 + cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=0 +}; + +def macroop CVTPS2PD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2f xmmh, ufp1, destSize=8, srcSize=4, ext=2 + cvtf2f xmml, ufp1, destSize=8, srcSize=4, ext=0 +}; + +def macroop CVTPD2PS_XMM_XMM { + cvtf2f xmml, xmmlm, destSize=4, srcSize=8, ext=0 + cvtf2f xmml, xmmhm, destSize=4, srcSize=8, ext=2 + lfpimm xmmh, 0 +}; + +def macroop CVTPD2PS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=0 + cvtf2f xmml, ufp2, destSize=4, srcSize=8, ext=2 + lfpimm xmmh, 0 +}; + +def macroop CVTPD2PS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2f xmml, ufp1, destSize=4, srcSize=8, ext=0 + cvtf2f xmml, ufp2, destSize=4, srcSize=8, ext=2 + lfpimm xmmh, 0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py index 8d5f4e659..16abd96f4 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_gpr_integer.py @@ -54,22 +54,75 @@ # Authors: Gabe Black microcode = ''' -# CVTSS2SI -# CVTSD2SI -# CVTTSS2SI +def macroop CVTSS2SI_R_XMM { + cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=(1 | 4) + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTSS2SI_R_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4) + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTSS2SI_R_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=(1 | 4) + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTSD2SI_R_XMM { + cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=(1 | 4) + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTSD2SI_R_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4) + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTSD2SI_R_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=(1 | 4) + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTTSS2SI_R_XMM { + cvtf2i ufp1, xmmlm, srcSize=4, destSize=dsz, ext=1 + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTTSS2SI_R_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1 + mov2int reg, ufp1, size=dsz +}; + +def macroop CVTTSS2SI_R_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=4, destSize=dsz, ext=1 + mov2int reg, ufp1, size=dsz +}; -def macroop CVTTSD2SI_R_R { - cvtf_d2i reg, xmmlm +def macroop CVTTSD2SI_R_XMM { + cvtf2i ufp1, xmmlm, srcSize=8, destSize=dsz, ext=1 + mov2int reg, ufp1, size=dsz }; def macroop CVTTSD2SI_R_M { - ldfp ufp1, seg, sib, disp - cvtf_d2i reg, ufp1 + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1 + mov2int reg, ufp1, size=dsz }; def macroop CVTTSD2SI_R_P { rdip t7 - ldfp ufp1, seg, riprel, disp - cvtf_d2i reg, ufp1 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2i ufp1, ufp1, srcSize=8, destSize=dsz, ext=1 + mov2int reg, ufp1, size=dsz }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py index af579a46f..900e91c99 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_mmx_integer.py @@ -54,8 +54,73 @@ # Authors: Gabe Black microcode = ''' -# CVTPS2PI -# CVTPD2PI -# CVTTPS2PI -# CVTTPD2PI +def macroop CVTPS2PI_MMX_XMM { + cvtf2i mmx, xmmlm, size=4, ext=4 +}; + +def macroop CVTPS2PI_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2i mmx, ufp1, size=4, ext=4 +}; + +def macroop CVTPS2PI_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2i mmx, ufp1, size=4, ext=4 +}; + +def macroop CVTPD2PI_MMX_XMM { + cvtf2i mmx, xmmlm, srcSize=8, destSize=4, ext=4 + cvtf2i mmx, xmmhm, srcSize=8, destSize=4, ext=(4 | 2) +}; + +def macroop CVTPD2PI_MMX_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=4 + cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=(4 | 2) +}; + +def macroop CVTPD2PI_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=4 + cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=(4 | 2) +}; + +def macroop CVTTPS2PI_MMX_XMM { + cvtf2i mmx, xmmlm, size=4, ext=0 +}; + +def macroop CVTTPS2PI_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvtf2i mmx, ufp1, size=4, ext=0 +}; + +def macroop CVTTPS2PI_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvtf2i mmx, ufp1, size=4, ext=0 +}; + +def macroop CVTTPD2PI_MMX_XMM { + cvtf2i mmx, xmmlm, srcSize=8, destSize=4, ext=0 + cvtf2i mmx, xmmhm, srcSize=8, destSize=4, ext=2 +}; + +def macroop CVTTPD2PI_MMX_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=0 + cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=2 +}; + +def macroop CVTTPD2PI_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2i mmx, ufp1, srcSize=8, destSize=4, ext=0 + cvtf2i mmx, ufp2, srcSize=8, destSize=4, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py index 683e2808d..041f891ef 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/convert_floating_point_to_xmm_integer.py @@ -54,8 +54,89 @@ # Authors: Gabe Black microcode = ''' -# CVTPS2DQ -# CVTPD2DQ -# CVTTPS2DQ -# CVTTPD2DQ +def macroop CVTPS2DQ_XMM_XMM { + cvtf2i xmml, xmmlm, size=4, ext=4 + cvtf2i xmmh, xmmhm, size=4, ext=4 +}; + +def macroop CVTPS2DQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, size=4, ext=4 + cvtf2i xmmh, ufp2, size=4, ext=4 +}; + +def macroop CVTPS2DQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, size=4, ext=4 + cvtf2i xmmh, ufp2, size=4, ext=4 +}; + +def macroop CVTPD2DQ_XMM_XMM { + cvtf2i xmml, xmmlm, srcSize=8, destSize=4, ext=4 + cvtf2i xmml, xmmhm, srcSize=8, destSize=4, ext=(4 | 2) + lfpimm xmmh, 0 +}; + +def macroop CVTPD2DQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=4 + cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=(4 | 2) + lfpimm xmmh, 0 +}; + +def macroop CVTPD2DQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=4 + cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=(4 | 2) + lfpimm xmmh, 0 +}; + +def macroop CVTTPS2DQ_XMM_XMM { + cvtf2i xmml, xmmlm, size=4, ext=0 + cvtf2i xmmh, xmmhm, size=4, ext=0 +}; + +def macroop CVTTPS2DQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, size=4, ext=0 + cvtf2i xmmh, ufp2, size=4, ext=0 +}; + +def macroop CVTTPS2DQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, size=4, ext=0 + cvtf2i xmmh, ufp2, size=4, ext=0 +}; + +def macroop CVTTPD2DQ_XMM_XMM { + cvtf2i xmml, xmmlm, srcSize=8, destSize=4, ext=0 + cvtf2i xmml, xmmhm, srcSize=8, destSize=4, ext=2 + lfpimm xmmh, 0 +}; + +def macroop CVTTPD2DQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=0 + cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=2 + lfpimm xmmh, 0 +}; + +def macroop CVTTPD2DQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvtf2i xmml, ufp1, srcSize=8, destSize=4, ext=0 + cvtf2i xmml, ufp2, srcSize=8, destSize=4, ext=2 + lfpimm xmmh, 0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py index 868d72b06..7b09e796a 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/shuffle.py @@ -54,6 +54,45 @@ # Authors: Gabe Black microcode = ''' -# SHUFPS -# SHUFPD +def macroop SHUFPS_XMM_XMM_I { + shuffle ufp1, xmml, xmmh, size=4, ext="IMMEDIATE" + shuffle xmmh, xmmlm, xmmhm, size=4, ext="IMMEDIATE >> 4" + movfp xmml, ufp1, dataSize=8 +}; + +def macroop SHUFPS_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + shuffle xmml, xmml, xmmh, size=4, ext="IMMEDIATE" + shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4" +}; + +def macroop SHUFPS_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + shuffle xmml, xmml, xmmh, size=4, ext="IMMEDIATE" + shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4" +}; + +def macroop SHUFPD_XMM_XMM_I { + shuffle ufp1, xmml, xmmh, size=8, ext="IMMEDIATE" + shuffle xmmh, xmmlm, xmmhm, size=8, ext="IMMEDIATE >> 1" + movfp xmml, ufp1, dataSize=8 +}; + +def macroop SHUFPD_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + shuffle xmml, xmml, xmmh, size=8, ext="IMMEDIATE" + shuffle xmmh, ufp1, ufp2, size=8, ext="IMMEDIATE >> 1" +}; + +def macroop SHUFPD_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + shuffle xmml, xmml, xmmh, size=8, ext="IMMEDIATE" + shuffle xmmh, ufp1, ufp2, size=8, ext="IMMEDIATE >> 1" +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py index 93112f7d6..0bf654b15 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/unpack_and_interleave.py @@ -54,8 +54,74 @@ # Authors: Gabe Black microcode = ''' -# UNPCKHPS -# UNPCKHPD -# UNPCKLPS -# UNPCKLPD +def macroop UNPCKLPS_XMM_XMM { + unpack xmmh, xmml, xmmlm, ext=1, size=4 + unpack xmml, xmml, xmmlm, ext=0, size=4 +}; + +def macroop UNPCKLPS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=4 + unpack xmml, xmml, ufp1, ext=0, size=4 +}; + +def macroop UNPCKLPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=4 + unpack xmml, xmml, ufp1, ext=0, size=4 +}; + +def macroop UNPCKLPD_XMM_XMM { + movfp xmmh, xmmlm +}; + +def macroop UNPCKLPD_XMM_M { + ldfp xmmh, seg, sib, disp, dataSize=8 +}; + +def macroop UNPCKLPD_XMM_P { + rdip t7 + ldfp xmmh, seg, riprel, disp, dataSize=8 +}; + +def macroop UNPCKHPS_XMM_XMM { + unpack xmml, xmmh, xmmhm, ext=0, size=4 + unpack xmmh, xmmh, xmmhm, ext=1, size=4 +}; + +def macroop UNPCKHPS_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=4 + unpack xmmh, xmmh, ufp1, ext=1, size=4 +}; + +def macroop UNPCKHPS_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=4 + unpack xmmh, xmmh, ufp1, ext=1, size=4 +}; + +def macroop UNPCKHPD_XMM_XMM { + movfp xmml, xmmh + movfp xmmh, xmmhm +}; + +def macroop UNPCKHPD_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + movfp xmml, xmmh + movfp xmmh, ufp1 +}; + +def macroop UNPCKHPD_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + movfp xmml, xmmh + movfp xmmh, ufp1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py index 76279fc70..1f4044bde 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py @@ -54,92 +54,248 @@ # Authors: Gabe Black microcode = ''' -def macroop MOVAPS_R_M { +def macroop MOVAPS_XMM_M { # Check low address. ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 ldfp xmml, seg, sib, disp, dataSize=8 }; -def macroop MOVAPS_R_P { +def macroop MOVAPS_XMM_P { rdip t7 # Check low address. ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 ldfp xmml, seg, riprel, disp, dataSize=8 }; -def macroop MOVAPS_M_R { +def macroop MOVAPS_M_XMM { # Check low address. stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 stfp xmml, seg, sib, disp, dataSize=8 }; -def macroop MOVAPS_P_R { +def macroop MOVAPS_P_XMM { rdip t7 # Check low address. stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 stfp xmml, seg, riprel, disp, dataSize=8 }; -def macroop MOVAPS_R_R { +def macroop MOVAPS_XMM_XMM { # Check low address. movfp xmml, xmmlm, dataSize=8 movfp xmmh, xmmhm, dataSize=8 }; -# MOVAPD -# MOVUPS -# MOVUPD -# MOVHPS -# MOVHPD -# MOVLPS +def macroop MOVAPD_XMM_XMM { + movfp xmml, xmmlm, dataSize=8 + movfp xmmh, xmmhm, dataSize=8 +}; + +def macroop MOVAPD_XMM_M { + ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVAPD_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVAPD_M_XMM { + stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVAPD_P_XMM { + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPS_XMM_XMM { + movfp xmml, xmmlm, dataSize=8 + movfp xmmh, xmmhm, dataSize=8 +}; + +def macroop MOVUPS_XMM_M { + ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPS_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; -def macroop MOVLPD_R_M { +def macroop MOVUPS_M_XMM { + stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPS_P_XMM { + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPD_XMM_XMM { + movfp xmml, xmmlm, dataSize=8 + movfp xmmh, xmmhm, dataSize=8 +}; + +def macroop MOVUPD_XMM_M { + ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPD_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPD_M_XMM { + stfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVUPD_P_XMM { + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPS_XMM_M { + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPS_XMM_P { + rdip t7 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPS_M_XMM { + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPS_P_XMM { + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPD_XMM_M { + ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPD_XMM_P { + rdip t7 + ldfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPD_M_XMM { + stfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVHPD_P_XMM { + rdip t7 + stfp xmml, seg, riprel, "DISPLACEMENT", dataSize=8 + stfp xmmh, seg, riprel, "DISPLACEMENT + 8", dataSize=8 +}; + +def macroop MOVLPS_XMM_M { ldfp xmml, seg, sib, disp, dataSize=8 }; -def macroop MOVLPD_R_P { +def macroop MOVLPS_XMM_P { rdip t7 ldfp xmml, seg, riprel, disp, dataSize=8 }; -def macroop MOVLPD_M_R { +def macroop MOVLPS_M_XMM { stfp xmml, seg, sib, disp, dataSize=8 }; -def macroop MOVLPD_P_R { +def macroop MOVLPS_P_XMM { rdip t7 stfp xmml, seg, riprel, disp, dataSize=8 }; -def macroop MOVLPD_R_R { - movfp xmml, xmmlm, dataSize=8 +def macroop MOVLPD_XMM_M { + ldfp xmml, seg, sib, disp, dataSize=8 +}; + +def macroop MOVLPD_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, disp, dataSize=8 +}; + +def macroop MOVLPD_M_XMM { + stfp xmml, seg, sib, disp, dataSize=8 +}; + +def macroop MOVLPD_P_XMM { + rdip t7 + stfp xmml, seg, riprel, disp, dataSize=8 +}; + +def macroop MOVHLPS_XMM_XMM { + movfp xmml, xmmhm, dataSize=8 }; -# MOVHLPS -# MOVLHPS -# MOVSS +def macroop MOVLHPS_XMM_XMM { + movfp xmmh, xmmlm, dataSize=8 +}; + +def macroop MOVSS_XMM_XMM { + movfp xmml, xmmlm, dataSize=4 +}; + +def macroop MOVSS_XMM_M { + lfpimm xmml, 0 + lfpimm xmmh, 0 + ldfp xmml, seg, sib, disp, dataSize=4 +}; + +def macroop MOVSS_XMM_P { + rdip t7 + lfpimm xmml, 0 + lfpimm xmmh, 0 + ldfp xmml, seg, riprel, disp, dataSize=4 +}; + +def macroop MOVSS_M_XMM { + stfp xmml, seg, sib, disp, dataSize=4 +}; + +def macroop MOVSS_P_XMM { + rdip t7 + stfp xmml, seg, riprel, disp, dataSize=4 +}; -def macroop MOVSD_R_M { +def macroop MOVSD_XMM_M { # Zero xmmh ldfp xmml, seg, sib, disp, dataSize=8 }; -def macroop MOVSD_R_P { +def macroop MOVSD_XMM_P { rdip t7 # Zero xmmh ldfp xmml, seg, riprel, disp, dataSize=8 }; -def macroop MOVSD_M_R { +def macroop MOVSD_M_XMM { stfp xmml, seg, sib, disp, dataSize=8 }; -def macroop MOVSD_P_R { +def macroop MOVSD_P_XMM { rdip t7 stfp xmml, seg, riprel, disp, dataSize=8 }; -def macroop MOVSD_R_R { +def macroop MOVSD_XMM_XMM { movfp xmml, xmmlm, dataSize=8 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py index abd1c7327..5c5fb81d2 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_mask.py @@ -54,6 +54,15 @@ # Authors: Gabe Black microcode = ''' -# MOVMSKPS -# MOVMSKPD +def macroop MOVMSKPS_R_XMM { + limm reg, 0 + movsign reg, xmmlm, size=4, ext=0 + movsign reg, xmmhm, size=4, ext=1 +}; + +def macroop MOVMSKPD_R_XMM { + limm reg, 0 + movsign reg, xmmlm, size=8, ext=0 + movsign reg, xmmhm, size=8, ext=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py index 096708393..539edfd74 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move_with_duplication.py @@ -54,7 +54,22 @@ # Authors: Gabe Black microcode = ''' -# MOVDDUP +def macroop MOVDDUP_XMM_XMM { + movfp xmmh, xmmlm, dataSize=8 + movfp xmml, xmmlm, dataSize=8 +}; + +def macroop MOVDDUP_XMM_M { + ldfp xmml, seg, sib, disp, dataSize=8 + movfp xmmh, xmml, dataSize=8 +}; + +def macroop MOVDDUP_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, disp, dataSize=8 + movfp xmmh, xmml, dataSize=8 +}; + # MOVSLDUP # MOVSHDUP ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py index 8d7d3ba25..e3eaf16a0 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/andp.py @@ -54,8 +54,91 @@ # Authors: Gabe Black microcode = ''' -# ANDPS -# ANDPD -# ANDNPS -# ANDNPD +def macroop ANDPS_XMM_XMM { + mand xmml, xmml, xmmlm + mand xmmh, xmmh, xmmhm +}; + +def macroop ANDPS_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mand xmml, xmml, ufp1 + mand xmmh, xmmh, ufp2 +}; + +def macroop ANDPS_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mand xmml, xmml, ufp1 + mand xmmh, xmmh, ufp2 +}; + +def macroop ANDPD_XMM_XMM { + mand xmml, xmml, xmmlm + mand xmmh, xmmh, xmmhm +}; + +def macroop ANDPD_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mand xmml, xmml, ufp1 + mand xmmh, xmmh, ufp2 +}; + +def macroop ANDPD_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mand xmml, xmml, ufp1 + mand xmmh, xmmh, ufp2 +}; + +def macroop ANDNPS_XMM_XMM { + mandn xmml, xmml, xmmlm + mandn xmmh, xmmh, xmmhm +}; + +def macroop ANDNPS_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mandn xmml, xmml, ufp1 + mandn xmmh, xmmh, ufp2 +}; + +def macroop ANDNPS_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mandn xmml, xmml, ufp1 + mandn xmmh, xmmh, ufp2 +}; + +def macroop ANDNPD_XMM_XMM { + mandn xmml, xmml, xmmlm + mandn xmmh, xmmh, xmmhm +}; + +def macroop ANDNPD_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mandn xmml, xmml, ufp1 + mandn xmmh, xmmh, ufp2 +}; + +def macroop ANDNPD_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mandn xmml, xmml, ufp1 + mandn xmmh, xmmh, ufp2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py index 974771e98..5c20db204 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/exclusive_or.py @@ -54,25 +54,43 @@ # Authors: Gabe Black microcode = ''' -# XORPS +def macroop XORPD_XMM_XMM { + mxor xmml, xmml, xmmlm + mxor xmmh, xmmh, xmmhm +}; + +def macroop XORPD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mxor xmml, xmml, ufp1 + mxor xmmh, xmmh, ufp2 +}; + +def macroop XORPD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mxor xmml, xmml, ufp1 + mxor xmmh, xmmh, ufp2 +}; -def macroop XORPD_R_R { - xorfp xmml, xmml, xmmlm - xorfp xmmh, xmmh, xmmhm +def macroop XORPS_XMM_XMM { + mxor xmml, xmml, xmmlm + mxor xmmh, xmmh, xmmhm }; -def macroop XORPD_R_M { - ldfp ufp1, seg, sib, disp - ldfp ufp2, seg, sib, "DISPLACEMENT + 8" - xorfp xmml, xmml, ufp1 - xorfp xmmh, xmmh, ufp2 +def macroop XORPS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mxor xmml, xmml, ufp1 + mxor xmmh, xmmh, ufp2 }; -def macroop XORPD_R_P { +def macroop XORPS_XMM_P { rdip t7 - ldfp ufp1, seg, riprel, disp - ldfp ufp2, seg, riprel, "DISPLACEMENT + 8" - xorfp xmml, xmml, ufp1 - xorfp xmmh, xmmh, ufp2 + ldfp ufp1, seg, riprel, disp, dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mxor xmml, xmml, ufp1 + mxor xmmh, xmmh, ufp2 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py index fba71c899..c99a713aa 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/orp.py @@ -54,6 +54,47 @@ # Authors: Gabe Black microcode = ''' -# ORPS -# ORPD +def macroop ORPS_XMM_XMM { + mor xmml, xmml, xmmlm + mor xmmh, xmmh, xmmhm +}; + +def macroop ORPS_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mor xmml, xmml, ufp1 + mor xmmh, xmmh, ufp2 +}; + +def macroop ORPS_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mor xmml, xmml, ufp1 + mor xmmh, xmmh, ufp2 +}; + +def macroop ORPD_XMM_XMM { + mor xmml, xmml, xmmlm + mor xmmh, xmmh, xmmhm +}; + +def macroop ORPD_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mor xmml, xmml, ufp1 + mor xmmh, xmmh, ufp2 +}; + +def macroop ORPD_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mor xmml, xmml, ufp1 + mor xmmh, xmmh, ufp2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py index 235a4fc7b..05e2b80d5 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/addition.py @@ -54,12 +54,163 @@ # Authors: Gabe Black microcode = ''' -# PADDB -# PADDW -# PADDD -# PADDQ -# PADDSB -# PADDSW -# PADDUSB -# PADDUSW +def macroop PADDB_XMM_XMM { + maddi xmml, xmml, xmmlm, size=1, ext=0 + maddi xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PADDB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=1, ext=0 + maddi xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PADDB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=1, ext=0 + maddi xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PADDW_XMM_XMM { + maddi xmml, xmml, xmmlm, size=2, ext=0 + maddi xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PADDW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=2, ext=0 + maddi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PADDW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=2, ext=0 + maddi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PADDD_XMM_XMM { + maddi xmml, xmml, xmmlm, size=4, ext=0 + maddi xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PADDD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=4, ext=0 + maddi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PADDD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=4, ext=0 + maddi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PADDQ_XMM_XMM { + maddi xmml, xmml, xmmlm, size=8, ext=0 + maddi xmmh, xmmh, xmmhm, size=8, ext=0 +}; + +def macroop PADDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=8, ext=0 + maddi xmmh, xmmh, ufp2, size=8, ext=0 +}; + +def macroop PADDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=8, ext=0 + maddi xmmh, xmmh, ufp2, size=8, ext=0 +}; + +def macroop PADDSB_XMM_XMM { + maddi xmml, xmml, xmmlm, size=1, ext=4 + maddi xmmh, xmmh, xmmhm, size=1, ext=4 +}; + +def macroop PADDSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=1, ext=4 + maddi xmmh, xmmh, ufp2, size=1, ext=4 +}; + +def macroop PADDSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=1, ext=4 + maddi xmmh, xmmh, ufp2, size=1, ext=4 +}; + +def macroop PADDSW_XMM_XMM { + maddi xmml, xmml, xmmlm, size=2, ext=4 + maddi xmmh, xmmh, xmmhm, size=2, ext=4 +}; + +def macroop PADDSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=2, ext=4 + maddi xmmh, xmmh, ufp2, size=2, ext=4 +}; + +def macroop PADDSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=2, ext=4 + maddi xmmh, xmmh, ufp2, size=2, ext=4 +}; + +def macroop PADDUSB_XMM_XMM { + maddi xmml, xmml, xmmlm, size=1, ext=2 + maddi xmmh, xmmh, xmmhm, size=1, ext=2 +}; + +def macroop PADDUSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=1, ext=2 + maddi xmmh, xmmh, ufp2, size=1, ext=2 +}; + +def macroop PADDUSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=1, ext=2 + maddi xmmh, xmmh, ufp2, size=1, ext=2 +}; + +def macroop PADDUSW_XMM_XMM { + maddi xmml, xmml, xmmlm, size=2, ext=2 + maddi xmmh, xmmh, xmmhm, size=2, ext=2 +}; + +def macroop PADDUSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=2, ext=2 + maddi xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PADDUSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + maddi xmml, xmml, ufp1, size=2, ext=2 + maddi xmmh, xmmh, ufp2, size=2, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py index 9bda3371f..017443e55 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/average.py @@ -54,6 +54,63 @@ # Authors: Gabe Black microcode = ''' -# PAVGB -# PAVGW +def macroop PAVGB_XMM_XMM { + mavg xmml, xmml, xmmlm, size=1, ext=0 + mavg xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PAVGB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mavg xmml, xmml, ufp1, size=1, ext=0 + mavg xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PAVGB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mavg xmml, xmml, ufp1, size=1, ext=0 + mavg xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PAVGUSB_XMM_XMM { + mavg xmml, xmml, xmmlm, size=1, ext=0 + mavg xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PAVGUSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mavg xmml, xmml, ufp1, size=1, ext=0 + mavg xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PAVGUSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mavg xmml, xmml, ufp1, size=1, ext=0 + mavg xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PAVGW_XMM_XMM { + mavg xmml, xmml, xmmlm, size=2, ext=0 + mavg xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PAVGW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mavg xmml, xmml, ufp1, size=2, ext=0 + mavg xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PAVGW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mavg xmml, xmml, ufp1, size=2, ext=0 + mavg xmmh, xmmh, ufp2, size=2, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py index 3e7345a9f..a5d90c6b2 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py @@ -54,8 +54,83 @@ # Authors: Gabe Black microcode = ''' -# PMULHW -# PMULLW -# PMULHUW -# PMULUDQ +def macroop PMULHW_XMM_XMM { + mmuli xmml, xmml, xmmlm, size=2, ext=(0x2 | 0x8) + mmuli xmmh, xmmh, xmmhm, size=2, ext=(0x2 | 0x8) +}; + +def macroop PMULHW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8) + mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8) +}; + +def macroop PMULHW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=2, ext=(0x2 | 0x8) + mmuli xmmh, xmmh, ufp2, size=2, ext=(0x2 | 0x8) +}; + +def macroop PMULLW_XMM_XMM { + mmuli xmml, xmml, xmmlm, size=2, ext=2 + mmuli xmmh, xmmh, xmmhm, size=2, ext=2 +}; + +def macroop PMULLW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=2, ext=2 + mmuli xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PMULLW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=2, ext=2 + mmuli xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PMULHUW_XMM_XMM { + mmuli xmml, xmml, xmmlm, size=2, ext=8 + mmuli xmmh, xmmh, xmmhm, size=2, ext=8 +}; + +def macroop PMULHUW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=2, ext=8 + mmuli xmmh, xmmh, ufp2, size=2, ext=8 +}; + +def macroop PMULHUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=2, ext=8 + mmuli xmmh, xmmh, ufp2, size=2, ext=8 +}; + +def macroop PMULUDQ_XMM_XMM { + mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=1 + mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=1 +}; + +def macroop PMULUDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1 + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1 +}; + +def macroop PMULUDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=1 + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py index 40b38867b..f157d165f 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiply_add.py @@ -54,5 +54,35 @@ # Authors: Gabe Black microcode = ''' -# PMADDWD +def macroop PMADDWD_XMM_XMM { + mmuli ufp3, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, xmml, xmmlm, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi xmml, ufp3, ufp4, size=4, ext=0 + mmuli ufp3, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, xmmh, xmmhm, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi xmmh, ufp3, ufp4, size=4, ext=0 +}; + +def macroop PMADDWD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi xmml, ufp3, ufp4, size=4, ext=0 + mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi xmmh, ufp3, ufp4, size=4, ext=0 +}; + +def macroop PMADDWD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli ufp3, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, xmml, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi xmml, ufp3, ufp4, size=4, ext=0 + mmuli ufp3, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, xmmh, ufp2, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi xmmh, ufp3, ufp4, size=4, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py index 44781eb55..fdfb08667 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/subtraction.py @@ -54,12 +54,163 @@ # Authors: Gabe Black microcode = ''' -# PSUBB -# PSUBW -# PSUBD -# PSUBQ -# PSUBSB -# PSUBSW -# PSUBUSB -# PSUBUSW +def macroop PSUBB_XMM_XMM { + msubi xmml, xmml, xmmlm, size=1, ext=0 + msubi xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PSUBB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=1, ext=0 + msubi xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PSUBB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=1, ext=0 + msubi xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PSUBW_XMM_XMM { + msubi xmml, xmml, xmmlm, size=2, ext=0 + msubi xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PSUBW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=2, ext=0 + msubi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PSUBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=2, ext=0 + msubi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PSUBD_XMM_XMM { + msubi xmml, xmml, xmmlm, size=4, ext=0 + msubi xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PSUBD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=4, ext=0 + msubi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PSUBD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=4, ext=0 + msubi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PSUBQ_XMM_XMM { + msubi xmml, xmml, xmmlm, size=8, ext=0 + msubi xmmh, xmmh, xmmhm, size=8, ext=0 +}; + +def macroop PSUBQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=8, ext=0 + msubi xmmh, xmmh, ufp2, size=8, ext=0 +}; + +def macroop PSUBQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=8, ext=0 + msubi xmmh, xmmh, ufp2, size=8, ext=0 +}; + +def macroop PSUBSB_XMM_XMM { + msubi xmml, xmml, xmmlm, size=1, ext=4 + msubi xmmh, xmmh, xmmhm, size=1, ext=4 +}; + +def macroop PSUBSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=1, ext=4 + msubi xmmh, xmmh, ufp2, size=1, ext=4 +}; + +def macroop PSUBSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=1, ext=4 + msubi xmmh, xmmh, ufp2, size=1, ext=4 +}; + +def macroop PSUBSW_XMM_XMM { + msubi xmml, xmml, xmmlm, size=2, ext=4 + msubi xmmh, xmmh, xmmhm, size=2, ext=4 +}; + +def macroop PSUBSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=2, ext=4 + msubi xmmh, xmmh, ufp2, size=2, ext=4 +}; + +def macroop PSUBSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=2, ext=4 + msubi xmmh, xmmh, ufp2, size=2, ext=4 +}; + +def macroop PSUBUSB_XMM_XMM { + msubi xmml, xmml, xmmlm, size=1, ext=2 + msubi xmmh, xmmh, xmmhm, size=1, ext=2 +}; + +def macroop PSUBUSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=1, ext=2 + msubi xmmh, xmmh, ufp2, size=1, ext=2 +}; + +def macroop PSUBUSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=1, ext=2 + msubi xmmh, xmmh, ufp2, size=1, ext=2 +}; + +def macroop PSUBUSW_XMM_XMM { + msubi xmml, xmml, xmmlm, size=2, ext=2 + msubi xmmh, xmmh, xmmhm, size=2, ext=2 +}; + +def macroop PSUBUSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=2, ext=2 + msubi xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PSUBUSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msubi xmml, xmml, ufp1, size=2, ext=2 + msubi xmmh, xmmh, ufp2, size=2, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py index df1ca2301..ef3758e49 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/sum_of_absolute_differences.py @@ -54,5 +54,23 @@ # Authors: Gabe Black microcode = ''' -# PSADBW +def macroop PSADBW_XMM_XMM { + msad xmml, xmml, xmmlm, srcSize=1, destSize=2 + msad xmmh, xmmh, xmmhm, srcSize=1, destSize=2 +}; + +def macroop PSADBW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + msad xmml, xmml, ufp1, srcSize=1, destSize=2 + msad xmmh, xmmh, ufp2, srcSize=1, destSize=2 +}; + +def macroop PSADBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + msad xmml, xmml, ufp1, srcSize=1, destSize=2 + msad xmmh, xmmh, ufp2, srcSize=1, destSize=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py index 59380c6a7..2cb41861c 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py @@ -54,10 +54,123 @@ # Authors: Gabe Black microcode = ''' -# PCMPEQB -# PCMPEQW -# PCMPEQD -# PCMPGTB -# PCMPGTW -# PCMPGTD +def macroop PCMPEQB_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=1, ext=0 + mcmpi2r xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PCMPEQB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=1, ext=0 + mcmpi2r xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PCMPEQB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=1, ext=0 + mcmpi2r xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PCMPEQW_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=2, ext=0 + mcmpi2r xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PCMPEQW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=2, ext=0 + mcmpi2r xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PCMPEQW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=2, ext=0 + mcmpi2r xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PCMPEQD_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=4, ext=0 + mcmpi2r xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PCMPEQD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=4, ext=0 + mcmpi2r xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PCMPEQD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=4, ext=0 + mcmpi2r xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PCMPGTB_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=1, ext=2 + mcmpi2r xmmh, xmmh, xmmhm, size=1, ext=2 +}; + +def macroop PCMPGTB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=1, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=1, ext=2 +}; + +def macroop PCMPGTB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=1, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=1, ext=2 +}; + +def macroop PCMPGTW_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=2, ext=2 + mcmpi2r xmmh, xmmh, xmmhm, size=2, ext=2 +}; + +def macroop PCMPGTW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=2, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PCMPGTW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=2, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PCMPGTD_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=4, ext=2 + mcmpi2r xmmh, xmmh, xmmhm, size=4, ext=2 +}; + +def macroop PCMPGTD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=4, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2 +}; + +def macroop PCMPGTD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=4, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py index 79b1aca4c..d3bfbb529 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py @@ -54,8 +54,83 @@ # Authors: Gabe Black microcode = ''' -# PMAXUB -# PMINUB -# PMAXSW -# PMINSW +def macroop PMINUB_XMM_XMM { + mmini xmml, xmml, xmmlm, size=1, ext=0 + mmini xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PMINUB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=1, ext=0 + mmini xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PMINUB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=1, ext=0 + mmini xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PMINSW_XMM_XMM { + mmini xmml, xmml, xmmlm, size=2, ext=2 + mmini xmmh, xmmh, xmmhm, size=2, ext=2 +}; + +def macroop PMINSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=2, ext=2 + mmini xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PMINSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=2, ext=2 + mmini xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PMAXUB_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=1, ext=0 + mmaxi xmmh, xmmh, xmmhm, size=1, ext=0 +}; + +def macroop PMAXUB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=1, ext=0 + mmaxi xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PMAXUB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=1, ext=0 + mmaxi xmmh, xmmh, ufp2, size=1, ext=0 +}; + +def macroop PMAXSW_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=2, ext=2 + mmaxi xmmh, xmmh, xmmhm, size=2, ext=2 +}; + +def macroop PMAXSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=2, ext=2 + mmaxi xmmh, xmmh, ufp2, size=2, ext=2 +}; + +def macroop PMAXSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=2, ext=2 + mmaxi xmmh, xmmh, ufp2, size=2, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py index b667055dc..8d632a0ac 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_gpr_integer_to_floating_point.py @@ -54,20 +54,35 @@ # Authors: Gabe Black microcode = ''' -# CVTSI2SS +def macroop CVTSI2SS_XMM_R { + mov2fp ufp1, regm, destSize=dsz, srcSize=dsz + cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1 +}; + +def macroop CVTSI2SS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1 +}; + +def macroop CVTSI2SS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=dsz, destSize=4, ext=1 +}; -def macroop CVTSI2SD_R_R { - cvtf_i2d xmml, regm +def macroop CVTSI2SD_XMM_R { + mov2fp ufp1, regm, destSize=dsz, srcSize=dsz + cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1 }; -def macroop CVTSI2SD_R_M { - ld t1, seg, sib, disp - cvtf_i2d xmml, t1 +def macroop CVTSI2SD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1 }; -def macroop CVTSI2SD_R_P { +def macroop CVTSI2SD_XMM_P { rdip t7 - ld t1, seg, riprel, disp - cvtf_i2d xmml, t1 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=dsz, destSize=8, ext=1 }; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py index 41e8549b5..21cfdcc68 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_integer_to_floating_point.py @@ -54,6 +54,41 @@ # Authors: Gabe Black microcode = ''' -# CVTDQ2PS -# CVTDQ2PD +def macroop CVTDQ2PS_XMM_XMM { + cvti2f xmml, xmmlm, size=4, ext=0 + cvti2f xmmh, xmmhm, size=4, ext=0 +}; + +def macroop CVTDQ2PS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + cvti2f xmml, ufp1, size=4, ext=0 + cvti2f xmmh, ufp2, size=4, ext=0 +}; + +def macroop CVTDQ2PS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + cvti2f xmml, ufp1, size=4, ext=0 + cvti2f xmmh, ufp2, size=4, ext=0 +}; + +def macroop CVTDQ2PD_XMM_XMM { + cvti2f xmmh, xmmlm, srcSize=4, destSize=8, ext=2 + cvti2f xmml, xmmlm, srcSize=4, destSize=8, ext=0 +}; + +def macroop CVTDQ2PD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0 + cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2 +}; + +def macroop CVTDQ2PD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0 + cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py index aeaea26cd..bc09d8b92 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/convert_mmx_integer_to_floating_point.py @@ -54,6 +54,36 @@ # Authors: Gabe Black microcode = ''' -# CVTPI2PS -# CVTPI2PD +def macroop CVTPI2PS_XMM_MMX { + cvti2f xmml, mmxm, size=4, ext=0 +}; + +def macroop CVTPI2PS_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvti2f xmml, ufp1, size=4, ext=0 +}; + +def macroop CVTPI2PS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvti2f xmml, ufp1, size=4, ext=0 +}; + +def macroop CVTPI2PD_XMM_MMX { + cvti2f xmml, mmxm, srcSize=4, destSize=8, ext=0 + cvti2f xmmh, mmxm, srcSize=4, destSize=8, ext=2 +}; + +def macroop CVTPI2PD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0 + cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2 +}; + +def macroop CVTPI2PD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + cvti2f xmml, ufp1, srcSize=4, destSize=8, ext=0 + cvti2f xmmh, ufp1, srcSize=4, destSize=8, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py index 80f7a3e71..f4f06ca67 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py @@ -54,6 +54,26 @@ # Authors: Gabe Black microcode = ''' -# PEXTRW -# PINSRW +def macroop PEXTRW_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1 +}; + +def macroop PINSRW_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(3)", size=2, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(3)", size=2, ext=1 +}; + +def macroop PINSRW_XMM_M_I { + ld t1, seg, sib, disp, dataSize=2 + mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1 +}; + +def macroop PINSRW_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=2 + mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py index 859d2bc7e..9112a7382 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py @@ -54,7 +54,66 @@ # Authors: Gabe Black microcode = ''' -# PACKSSDW -# PACKSSWB -# PACKUSWB +def macroop PACKSSDW_XMM_XMM { + pack ufp1, xmml, xmmh, ext=1, srcSize=4, destSize=2 + pack xmmh, xmmlm, xmmhm, ext=1, srcSize=4, destSize=2 + movfp xmml, ufp1, dataSize=8 +}; + +def macroop PACKSSDW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2 +}; + +def macroop PACKSSDW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=1, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=1, srcSize=4, destSize=2 +}; + +def macroop PACKSSWB_XMM_XMM { + pack ufp1, xmml, xmmh, ext=1, srcSize=2, destSize=1 + pack xmmh, xmmlm, xmmhm, ext=1, srcSize=2, destSize=1 + movfp xmml, ufp1, dataSize=8 +}; + +def macroop PACKSSWB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1 +}; + +def macroop PACKSSWB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=1, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=1, srcSize=2, destSize=1 +}; + +def macroop PACKUSWB_XMM_XMM { + pack ufp1, xmml, xmmh, ext=0, srcSize=2, destSize=1 + pack xmmh, xmmlm, xmmhm, ext=0, srcSize=2, destSize=1 + movfp xmml, ufp1, dataSize=8 +}; + +def macroop PACKUSWB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1 +}; + +def macroop PACKUSWB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py index 9f66c3f05..46cfbce82 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py @@ -54,7 +54,54 @@ # Authors: Gabe Black microcode = ''' -# PSHUFD -# PSHUFHW -# PSHUFLW +def macroop PSHUFD_XMM_XMM_I { + shuffle ufp1, xmmlm, xmmhm, size=4, ext="IMMEDIATE" + shuffle xmmh, xmmlm, xmmhm, size=4, ext="IMMEDIATE >> 4" + movfp xmml, ufp1, dataSize=8 +}; + +def macroop PSHUFD_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + shuffle xmml, ufp1, ufp2, size=4, ext="IMMEDIATE" + shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4" +}; + +def macroop PSHUFD_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + shuffle xmml, ufp1, ufp2, size=4, ext="IMMEDIATE" + shuffle xmmh, ufp1, ufp2, size=4, ext="IMMEDIATE >> 4" +}; + +def macroop PSHUFHW_XMM_XMM_I { + shuffle xmmh, xmmhm, xmmhm, size=2, ext=imm +}; + +def macroop PSHUFHW_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT + 8", dataSize=8 + shuffle xmmh, ufp1, ufp1, size=2, ext=imm +}; + +def macroop PSHUFHW_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + shuffle xmmh, ufp1, ufp1, size=2, ext=imm +}; + +def macroop PSHUFLW_XMM_XMM_I { + shuffle xmml, xmmlm, xmmlm, size=2, ext=imm +}; + +def macroop PSHUFLW_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + shuffle xmml, ufp1, ufp1, size=2, ext=imm +}; + +def macroop PSHUFLW_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + shuffle xmml, ufp1, ufp1, size=2, ext=imm +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py index e8c22ea9f..b4dfb4607 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/unpack_and_interleave.py @@ -54,12 +54,150 @@ # Authors: Gabe Black microcode = ''' -# PUNPCKHBW -# PUNPCKHWD -# PUNPCKHDQ -# PUNPCKHQDQ -# PUNPCKLBW -# PUNPCKLWD -# PUNPCKLDQ -# PUNPCKLQDQ +def macroop PUNPCKLBW_XMM_XMM { + unpack xmmh, xmml, xmmlm, ext=1, size=1 + unpack xmml, xmml, xmmlm, ext=0, size=1 +}; + +def macroop PUNPCKLBW_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=1 + unpack xmml, xmml, ufp1, ext=0, size=1 +}; + +def macroop PUNPCKLBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=1 + unpack xmml, xmml, ufp1, ext=0, size=1 +}; + +def macroop PUNPCKLWD_XMM_XMM { + unpack xmmh, xmml, xmmlm, ext=1, size=2 + unpack xmml, xmml, xmmlm, ext=0, size=2 +}; + +def macroop PUNPCKLWD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=2 + unpack xmml, xmml, ufp1, ext=0, size=2 +}; + +def macroop PUNPCKLWD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=2 + unpack xmml, xmml, ufp1, ext=0, size=2 +}; + +def macroop PUNPCKLDQ_XMM_XMM { + unpack xmmh, xmml, xmmlm, ext=1, size=4 + unpack xmml, xmml, xmmlm, ext=0, size=4 +}; + +def macroop PUNPCKLDQ_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=4 + unpack xmml, xmml, ufp1, ext=0, size=4 +}; + +def macroop PUNPCKLDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack xmmh, xmml, ufp1, ext=1, size=4 + unpack xmml, xmml, ufp1, ext=0, size=4 +}; + +def macroop PUNPCKHBW_XMM_XMM { + unpack xmml, xmmh, xmmhm, ext=0, size=1 + unpack xmmh, xmmh, xmmhm, ext=1, size=1 +}; + +def macroop PUNPCKHBW_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=1 + unpack xmmh, xmmh, ufp1, ext=1, size=1 +}; + +def macroop PUNPCKHBW_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=1 + unpack xmmh, xmmh, ufp1, ext=1, size=1 +}; + +def macroop PUNPCKHWD_XMM_XMM { + unpack xmml, xmmh, xmmhm, ext=0, size=2 + unpack xmmh, xmmh, xmmhm, ext=1, size=2 +}; + +def macroop PUNPCKHWD_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=2 + unpack xmmh, xmmh, ufp1, ext=1, size=2 +}; + +def macroop PUNPCKHWD_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=2 + unpack xmmh, xmmh, ufp1, ext=1, size=2 +}; + +def macroop PUNPCKHDQ_XMM_XMM { + unpack xmml, xmmh, xmmhm, ext=0, size=4 + unpack xmmh, xmmh, xmmhm, ext=1, size=4 +}; + +def macroop PUNPCKHDQ_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=4 + unpack xmmh, xmmh, ufp1, ext=1, size=4 +}; + +def macroop PUNPCKHDQ_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + unpack xmml, xmmh, ufp1, ext=0, size=4 + unpack xmmh, xmmh, ufp1, ext=1, size=4 +}; + +def macroop PUNPCKHQDQ_XMM_XMM { + movfp xmml, xmmh + movfp xmmh, xmmhm +}; + +def macroop PUNPCKHQDQ_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], 8, dataSize=8 + movfp xmml, xmmh + movfp xmmh, ufp1 +}; + +def macroop PUNPCKHQDQ_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, riprel, 8, dataSize=8 + movfp xmml, xmmh + movfp xmmh, ufp1 +}; + +def macroop PUNPCKLQDQ_XMM_XMM { + movfp xmmh, xmmlm +}; + +def macroop PUNPCKLQDQ_XMM_M { + ldfp xmmh, seg, sib, disp, dataSize=8 +}; + +def macroop PUNPCKLQDQ_XMM_P { + rdip t7 + ldfp xmmh, seg, riprel, disp, dataSize=8 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py index 4bee18c19..c34bd42bb 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py @@ -54,11 +54,40 @@ # Authors: Gabe Black microcode = ''' -# MOVD -# MOVQ +def macroop MOVQ_XMM_XMM { + movfp xmml, xmmlm + lfpimm xmmh, 0 +}; + +def macroop MOVQ_XMM_M { + ldfp xmml, seg, sib, disp, dataSize=8 + lfpimm xmmh, 0 +}; + +def macroop MOVQ_XMM_P { + rdip t7 + ldfp xmml, seg, riprel, disp, dataSize=8 + lfpimm xmmh, 0 +}; + +def macroop MOVQ_M_XMM { + stfp xmml, seg, sib, disp, dataSize=8 +}; + +def macroop MOVQ_P_XMM { + rdip t7 + stfp xmml, seg, riprel, disp, dataSize=8 +}; + +def macroop MOVDQ2Q_MMX_XMM { + movfp mmx, xmmlm, dataSize=8 +}; + +def macroop MOVQ2DQ_XMM_MMX { + movfp xmml, mmxm, dataSize=8 + lfpimm xmmh, 0 +}; +''' # MOVDQA # MOVDQU -# MOVDQ2Q -# MOVQ2DQ # LDDQU -''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py index 558391c6a..0be1229b4 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_mask.py @@ -54,5 +54,9 @@ # Authors: Gabe Black microcode = ''' -# PMOVMSKB +def macroop PMOVMSKB_R_XMM { + limm reg, 0 + movsign reg, xmmlm, size=1, ext=0 + movsign reg, xmmhm, size=1, ext=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py index 413dddb84..c8df3b403 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_non_temporal.py @@ -55,5 +55,13 @@ microcode = ''' # MOVNTDQ -# MASKMOVDQU + +def macroop MASKMOVDQU_XMM_XMM { + ldfp ufp1, ds, [1, t0, rdi], dataSize=8 + ldfp ufp2, ds, [1, t0, rdi], 8, dataSize=8 + maskmov ufp1, xmml, xmmlm, size=1 + maskmov ufp2, xmmh, xmmhm, size=1 + stfp ufp1, ds, [1, t0, rdi], dataSize=8 + stfp ufp2, ds, [1, t0, rdi], 8, dataSize=8 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py b/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py index 336796e23..e72deae0d 100644 --- a/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py +++ b/src/arch/x86/isa/insts/simd128/integer/logical/exclusive_or.py @@ -54,5 +54,25 @@ # Authors: Gabe Black microcode = ''' -# PXOR +def macroop PXOR_XMM_XMM { + mxor xmml, xmml, xmmlm + mxor xmmh, xmmh, xmmhm +}; + +def macroop PXOR_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mxor xmml, xmml, ufp1 + mxor xmmh, xmmh, ufp2 +}; + +def macroop PXOR_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mxor xmml, xmml, ufp1 + mxor xmmh, xmmh, ufp2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/pand.py b/src/arch/x86/isa/insts/simd128/integer/logical/pand.py index 055b7c5f6..69699454d 100644 --- a/src/arch/x86/isa/insts/simd128/integer/logical/pand.py +++ b/src/arch/x86/isa/insts/simd128/integer/logical/pand.py @@ -54,6 +54,47 @@ # Authors: Gabe Black microcode = ''' -# PAND -# PANDN +def macroop PAND_XMM_XMM { + mand xmml, xmml, xmmlm + mand xmmh, xmmh, xmmhm +}; + +def macroop PAND_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mand xmml, xmml, ufp1 + mand xmmh, xmmh, ufp2 +}; + +def macroop PAND_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mand xmml, xmml, ufp1 + mand xmmh, xmmh, ufp2 +}; + +def macroop PANDN_XMM_XMM { + mandn xmml, xmml, xmmlm + mandn xmmh, xmmh, xmmhm +}; + +def macroop PANDN_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mandn xmml, xmml, ufp1 + mandn xmmh, xmmh, ufp2 +}; + +def macroop PANDN_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mandn xmml, xmml, ufp1 + mandn xmmh, xmmh, ufp2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/por.py b/src/arch/x86/isa/insts/simd128/integer/logical/por.py index addb28a60..dc0d7575b 100644 --- a/src/arch/x86/isa/insts/simd128/integer/logical/por.py +++ b/src/arch/x86/isa/insts/simd128/integer/logical/por.py @@ -54,5 +54,25 @@ # Authors: Gabe Black microcode = ''' -# POR +def macroop POR_XMM_XMM { + mor xmml, xmml, xmmlm + mor xmmh, xmmh, xmmhm +}; + +def macroop POR_XMM_M { + lea t1, seg, sib, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mor xmml, xmml, ufp1 + mor xmmh, xmmh, ufp2 +}; + +def macroop POR_XMM_P { + rdip t7 + lea t1, seg, riprel, disp, dataSize=asz + ldfp ufp1, seg, [1, t0, t1], dataSize=8 + ldfp ufp2, seg, [1, t0, t1], 8, dataSize=8 + mor xmml, xmml, ufp1 + mor xmmh, xmmh, ufp2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py b/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py index 687391b47..2481b744f 100644 --- a/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py +++ b/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/save_and_restore_control_and_status.py @@ -54,6 +54,25 @@ # Authors: Gabe Black microcode = ''' -# STMXCSR -# LDMXCSR +def macroop STMXCSR_M { + rdval t1, "InstRegIndex(MISCREG_MXCSR)" + st t1, seg, sib, disp +}; + +def macroop STMXCSR_P { + rdval t1, "InstRegIndex(MISCREG_MXCSR)" + rdip t7 + st t1, seg, riprel, disp +}; + +def macroop LDMXCSR_M { + ld t1, seg, sib, disp + wrval "InstRegIndex(MISCREG_MXCSR)", t1 +}; + +def macroop LDMXCSR_P { + rdip t7 + ld t1, seg, riprel, disp + wrval "InstRegIndex(MISCREG_MXCSR)", t1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py b/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py index 18d6feb24..617033bc0 100644 --- a/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py +++ b/src/arch/x86/isa/insts/simd128/integer/shift/left_logical_shift.py @@ -54,8 +54,73 @@ # Authors: Gabe Black microcode = ''' -# PSLLW -# PSLLD -# PSLLQ -# PSLLDQ +def macroop PSLLW_XMM_XMM { + msll xmmh, xmmh, xmmlm, size=2, ext=0 + msll xmml, xmml, xmmlm, size=2, ext=0 +}; + +def macroop PSLLW_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msll xmml, xmml, ufp1, size=2, ext=0 + msll xmmh, xmmh, ufp1, size=2, ext=0 +}; + +def macroop PSLLW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msll xmml, xmml, ufp1, size=2, ext=0 + msll xmmh, xmmh, ufp1, size=2, ext=0 +}; + +def macroop PSLLW_XMM_I { + mslli xmml, xmml, imm, size=2, ext=0 + mslli xmmh, xmmh, imm, size=2, ext=0 +}; + +def macroop PSLLD_XMM_XMM { + msll xmmh, xmmh, xmmlm, size=4, ext=0 + msll xmml, xmml, xmmlm, size=4, ext=0 +}; + +def macroop PSLLD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msll xmml, xmml, ufp1, size=4, ext=0 + msll xmmh, xmmh, ufp1, size=4, ext=0 +}; + +def macroop PSLLD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msll xmml, xmml, ufp1, size=4, ext=0 + msll xmmh, xmmh, ufp1, size=4, ext=0 +}; + +def macroop PSLLD_XMM_I { + mslli xmml, xmml, imm, size=4, ext=0 + mslli xmmh, xmmh, imm, size=4, ext=0 +}; + +def macroop PSLLQ_XMM_XMM { + msll xmmh, xmmh, xmmlm, size=8, ext=0 + msll xmml, xmml, xmmlm, size=8, ext=0 +}; + +def macroop PSLLQ_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msll xmml, xmml, ufp1, size=8, ext=0 + msll xmmh, xmmh, ufp1, size=8, ext=0 +}; + +def macroop PSLLQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msll xmml, xmml, ufp1, size=8, ext=0 + msll xmmh, xmmh, ufp1, size=8, ext=0 +}; + +def macroop PSLLQ_XMM_I { + mslli xmml, xmml, imm, size=8, ext=0 + mslli xmmh, xmmh, imm, size=8, ext=0 +}; ''' +# PSLLDQ diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py b/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py index 63750e292..b88457a02 100644 --- a/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py +++ b/src/arch/x86/isa/insts/simd128/integer/shift/right_arithmetic_shift.py @@ -54,6 +54,49 @@ # Authors: Gabe Black microcode = ''' -# PSRAW -# PSRAD +def macroop PSRAW_XMM_XMM { + msra xmmh, xmmh, xmmlm, size=2, ext=0 + msra xmml, xmml, xmmlm, size=2, ext=0 +}; + +def macroop PSRAW_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msra xmml, xmml, ufp1, size=2, ext=0 + msra xmmh, xmmh, ufp1, size=2, ext=0 +}; + +def macroop PSRAW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msra xmml, xmml, ufp1, size=2, ext=0 + msra xmmh, xmmh, ufp1, size=2, ext=0 +}; + +def macroop PSRAW_XMM_I { + msrai xmml, xmml, imm, size=2, ext=0 + msrai xmmh, xmmh, imm, size=2, ext=0 +}; + +def macroop PSRAD_XMM_XMM { + msra xmmh, xmmh, xmmlm, size=4, ext=0 + msra xmml, xmml, xmmlm, size=4, ext=0 +}; + +def macroop PSRAD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msra xmml, xmml, ufp1, size=4, ext=0 + msra xmmh, xmmh, ufp1, size=4, ext=0 +}; + +def macroop PSRAD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msra xmml, xmml, ufp1, size=4, ext=0 + msra xmmh, xmmh, ufp1, size=4, ext=0 +}; + +def macroop PSRAD_XMM_I { + msrai xmml, xmml, imm, size=4, ext=0 + msrai xmmh, xmmh, imm, size=4, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py b/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py index fc6fb180b..c904eaf50 100644 --- a/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py +++ b/src/arch/x86/isa/insts/simd128/integer/shift/right_logical_shift.py @@ -54,8 +54,73 @@ # Authors: Gabe Black microcode = ''' -# PSRLW -# PSRLD -# PSRLQ -# PSRLDQ +def macroop PSRLW_XMM_XMM { + msrl xmmh, xmmh, xmmlm, size=2, ext=0 + msrl xmml, xmml, xmmlm, size=2, ext=0 +}; + +def macroop PSRLW_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msrl xmml, xmml, ufp1, size=2, ext=0 + msrl xmmh, xmmh, ufp1, size=2, ext=0 +}; + +def macroop PSRLW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msrl xmml, xmml, ufp1, size=2, ext=0 + msrl xmmh, xmmh, ufp1, size=2, ext=0 +}; + +def macroop PSRLW_XMM_I { + msrli xmml, xmml, imm, size=2, ext=0 + msrli xmmh, xmmh, imm, size=2, ext=0 +}; + +def macroop PSRLD_XMM_XMM { + msrl xmmh, xmmh, xmmlm, size=4, ext=0 + msrl xmml, xmml, xmmlm, size=4, ext=0 +}; + +def macroop PSRLD_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msrl xmml, xmml, ufp1, size=4, ext=0 + msrl xmmh, xmmh, ufp1, size=4, ext=0 +}; + +def macroop PSRLD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msrl xmml, xmml, ufp1, size=4, ext=0 + msrl xmmh, xmmh, ufp1, size=4, ext=0 +}; + +def macroop PSRLD_XMM_I { + msrli xmml, xmml, imm, size=4, ext=0 + msrli xmmh, xmmh, imm, size=4, ext=0 +}; + +def macroop PSRLQ_XMM_XMM { + msrl xmmh, xmmh, xmmlm, size=8, ext=0 + msrl xmml, xmml, xmmlm, size=8, ext=0 +}; + +def macroop PSRLQ_XMM_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msrl xmml, xmml, ufp1, size=8, ext=0 + msrl xmmh, xmmh, ufp1, size=8, ext=0 +}; + +def macroop PSRLQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msrl xmml, xmml, ufp1, size=8, ext=0 + msrl xmmh, xmmh, ufp1, size=8, ext=0 +}; + +def macroop PSRLQ_XMM_I { + msrli xmml, xmml, imm, size=8, ext=0 + msrli xmmh, xmmh, imm, size=8, ext=0 +}; ''' +# PSRLDQ diff --git a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py index 54047ec7c..f970018ac 100644 --- a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/multiplication.py @@ -54,5 +54,18 @@ # Authors: Gabe Black microcode = ''' -# PFMUL +def macroop PFMUL_MMX_MMX { + mmulf mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PFMUL_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmulf mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PFMUL_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmulf mmx, mmx, ufp1, size=4, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py index 6e4c1804d..363794411 100644 --- a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/subtraction.py @@ -54,6 +54,33 @@ # Authors: Gabe Black microcode = ''' -# PFSUB -# PFSUBR +def macroop PFSUB_MMX_MMX { + msubf mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PFSUB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubf mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PFSUB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubf mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PFSUBR_MMX_MMX { + msubf mmx, mmxm, mmx, size=4, ext=0 +}; + +def macroop PFSUBR_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubf mmx, ufp1, mmx, size=4, ext=0 +}; + +def macroop PFSUBR_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubf mmx, ufp1, mmx, size=4, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py b/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py index 6c942065d..1ade48152 100644 --- a/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py +++ b/src/arch/x86/isa/insts/simd64/floating_point/data_conversion.py @@ -54,10 +54,7 @@ # Authors: Gabe Black microcode = ''' -# CVTPS2PI -# CVTTPS2PI -# CVTPD2PI -# CVTTPD2PI +# CVTPS2PI, CVTTPS2PI, CVTPD2PI, and CVTTPD2PI are implemented in simd128 # PF2IW # PF2ID ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py index 235a4fc7b..b663d15b7 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/addition.py @@ -54,12 +54,123 @@ # Authors: Gabe Black microcode = ''' -# PADDB -# PADDW -# PADDD -# PADDQ -# PADDSB -# PADDSW -# PADDUSB -# PADDUSW +def macroop PADDB_MMX_MMX { + maddi mmx, mmx, mmxm, size=1, ext=0 +}; + +def macroop PADDB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PADDB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PADDW_MMX_MMX { + maddi mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PADDW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PADDW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PADDD_MMX_MMX { + maddi mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PADDD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PADDD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PADDQ_MMX_MMX { + maddi mmx, mmx, mmxm, size=8, ext=0 +}; + +def macroop PADDQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PADDQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PADDSB_MMX_MMX { + maddi mmx, mmx, mmxm, size=1, ext=4 +}; + +def macroop PADDSB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=1, ext=4 +}; + +def macroop PADDSB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=1, ext=4 +}; + +def macroop PADDSW_MMX_MMX { + maddi mmx, mmx, mmxm, size=2, ext=4 +}; + +def macroop PADDSW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=2, ext=4 +}; + +def macroop PADDSW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=2, ext=4 +}; + +def macroop PADDUSB_MMX_MMX { + maddi mmx, mmx, mmxm, size=1, ext=2 +}; + +def macroop PADDUSB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=1, ext=2 +}; + +def macroop PADDUSB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=1, ext=2 +}; + +def macroop PADDUSW_MMX_MMX { + maddi mmx, mmx, mmxm, size=2, ext=2 +}; + +def macroop PADDUSW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PADDUSW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + maddi mmx, mmx, ufp1, size=2, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py index af1b39097..54bc9e53c 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/average.py @@ -54,7 +54,34 @@ # Authors: Gabe Black microcode = ''' -# PAVGB -# PAVGW +def macroop PAVGB_MMX_MMX { + mavg mmx, mmx, mmxm, size=1, ext=0 +}; + +def macroop PAVGB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mavg mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PAVGB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mavg mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PAVGW_MMX_MMX { + mavg mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PAVGW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mavg mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PAVGW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mavg mmx, mmx, ufp1, size=2, ext=0 +}; # PAVGUSB ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py index 8382bc439..7383a744f 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiplication.py @@ -54,9 +54,78 @@ # Authors: Gabe Black microcode = ''' -# PMULHW -# PMULLW -# PMULHRW -# PMULHUW -# PMULUDQ +def macroop PMULHW_MMX_MMX { + mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x8) +}; + +def macroop PMULHW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8) +}; + +def macroop PMULHW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x8) +}; + +def macroop PMULLW_MMX_MMX { + mmuli mmx, mmx, mmxm, size=2, ext=2 +}; + +def macroop PMULLW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PMULLW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PMULHRW_MMX_MMX { + mmuli mmx, mmx, mmxm, size=2, ext=(0x2 | 0x4 | 0x8) +}; + +def macroop PMULHRW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8) +}; + +def macroop PMULHRW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=(0x2 | 0x4 | 0x8) +}; + +def macroop PMULHUW_MMX_MMX { + mmuli mmx, mmx, mmxm, size=2, ext=8 +}; + +def macroop PMULHUW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=8 +}; + +def macroop PMULHUW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmuli mmx, mmx, ufp1, size=2, ext=8 +}; + +def macroop PMULUDQ_MMX_MMX { + mmuli mmx, mmx, mmxm, srcSize=4, destSize=8, ext=1 +}; + +def macroop PMULUDQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1 +}; + +def macroop PMULUDQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmuli mmx, mmx, ufp1, srcSize=4, destSize=8, ext=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py index 40b38867b..f6940d159 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/multiply_add.py @@ -54,5 +54,24 @@ # Authors: Gabe Black microcode = ''' -# PMADDWD +def macroop PMADDWD_MMX_MMX { + mmuli ufp3, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, mmx, mmxm, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi mmx, ufp3, ufp4, size=4, ext=0 +}; + +def macroop PMADDWD_MMX_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi mmx, ufp3, ufp4, size=4, ext=0 +}; + +def macroop PMADDWD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + mmuli ufp3, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10 | 0x20) + mmuli ufp4, mmx, ufp1, srcSize=2, destSize=4, ext=(0x2 | 0x10) + maddi mmx, ufp3, ufp4, size=4, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py index 44781eb55..a60c0b1a8 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/subtraction.py @@ -54,12 +54,123 @@ # Authors: Gabe Black microcode = ''' -# PSUBB -# PSUBW -# PSUBD -# PSUBQ -# PSUBSB -# PSUBSW -# PSUBUSB -# PSUBUSW +def macroop PSUBB_MMX_MMX { + msubi mmx, mmx, mmxm, size=1, ext=0 +}; + +def macroop PSUBB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PSUBB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PSUBW_MMX_MMX { + msubi mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PSUBW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSUBW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSUBD_MMX_MMX { + msubi mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PSUBD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSUBD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSUBQ_MMX_MMX { + msubi mmx, mmx, mmxm, size=8, ext=0 +}; + +def macroop PSUBQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PSUBQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PSUBSB_MMX_MMX { + msubi mmx, mmx, mmxm, size=1, ext=4 +}; + +def macroop PSUBSB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=1, ext=4 +}; + +def macroop PSUBSB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=1, ext=4 +}; + +def macroop PSUBSW_MMX_MMX { + msubi mmx, mmx, mmxm, size=2, ext=4 +}; + +def macroop PSUBSW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=2, ext=4 +}; + +def macroop PSUBSW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=2, ext=4 +}; + +def macroop PSUBUSB_MMX_MMX { + msubi mmx, mmx, mmxm, size=1, ext=2 +}; + +def macroop PSUBUSB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=1, ext=2 +}; + +def macroop PSUBUSB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=1, ext=2 +}; + +def macroop PSUBUSW_MMX_MMX { + msubi mmx, mmx, mmxm, size=2, ext=2 +}; + +def macroop PSUBUSW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PSUBUSW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msubi mmx, mmx, ufp1, size=2, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py index df1ca2301..8c3c239bd 100644 --- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py +++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/sum_of_absolute_differences.py @@ -54,5 +54,18 @@ # Authors: Gabe Black microcode = ''' -# PSADBW +def macroop PSADBW_MMX_MMX { + msad mmx, mmx, mmxm, srcSize=1, destSize=2 +}; + +def macroop PSADBW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msad mmx, mmx, ufp1, srcSize=1, destSize=2 +}; + +def macroop PSADBW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msad mmx, mmx, ufp1, srcSize=1, destSize=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py index 59380c6a7..60640f45a 100644 --- a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py +++ b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_mask.py @@ -54,10 +54,93 @@ # Authors: Gabe Black microcode = ''' -# PCMPEQB -# PCMPEQW -# PCMPEQD -# PCMPGTB -# PCMPGTW -# PCMPGTD +def macroop PCMPEQB_MMX_MMX { + mcmpi2r mmx, mmx, mmxm, size=1, ext=0 +}; + +def macroop PCMPEQB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PCMPEQB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PCMPEQW_MMX_MMX { + mcmpi2r mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PCMPEQW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PCMPEQW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PCMPEQD_MMX_MMX { + mcmpi2r mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PCMPEQD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PCMPEQD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PCMPGTB_MMX_MMX { + mcmpi2r mmx, mmx, mmxm, size=1, ext=2 +}; + +def macroop PCMPGTB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=1, ext=2 +}; + +def macroop PCMPGTB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=1, ext=2 +}; + +def macroop PCMPGTW_MMX_MMX { + mcmpi2r mmx, mmx, mmxm, size=2, ext=2 +}; + +def macroop PCMPGTW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PCMPGTW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PCMPGTD_MMX_MMX { + mcmpi2r mmx, mmx, mmxm, size=4, ext=2 +}; + +def macroop PCMPGTD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=4, ext=2 +}; + +def macroop PCMPGTD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mcmpi2r mmx, mmx, ufp1, size=4, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py index 79b1aca4c..8d8247300 100644 --- a/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd64/integer/compare/compare_and_write_minimum_or_maximum.py @@ -54,8 +54,63 @@ # Authors: Gabe Black microcode = ''' -# PMAXUB -# PMINUB -# PMAXSW -# PMINSW +def macroop PMINUB_MMX_MMX { + mmini mmx, mmx, mmxm, size=1, ext=0 +}; + +def macroop PMINUB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmini mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PMINUB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmini mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PMINSW_MMX_MMX { + mmini mmx, mmx, mmxm, size=2, ext=2 +}; + +def macroop PMINSW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmini mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PMINSW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmini mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PMAXUB_MMX_MMX { + mmaxi mmx, mmx, mmxm, size=1, ext=0 +}; + +def macroop PMAXUB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmaxi mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PMAXUB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmaxi mmx, mmx, ufp1, size=1, ext=0 +}; + +def macroop PMAXSW_MMX_MMX { + mmaxi mmx, mmx, mmxm, size=2, ext=2 +}; + +def macroop PMAXSW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mmaxi mmx, mmx, ufp1, size=2, ext=2 +}; + +def macroop PMAXSW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mmaxi mmx, mmx, ufp1, size=2, ext=2 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_conversion.py b/src/arch/x86/isa/insts/simd64/integer/data_conversion.py index f41049171..5b1c8703a 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_conversion.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_conversion.py @@ -54,8 +54,7 @@ # Authors: Gabe Black microcode = ''' -# CVTPI2PS -# CVTPI2PD +# CVTPI2PS and CVTPI2PD are implemented in simd128 # PI2FW # PI2FD ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py index 80f7a3e71..c9ebbcf14 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/extract_and_insert.py @@ -54,6 +54,22 @@ # Authors: Gabe Black microcode = ''' -# PEXTRW -# PINSRW +def macroop PEXTRW_R_MMX_I { + mov2int reg, mmxm, "IMMEDIATE & mask(2)", size=2, ext=0 +}; + +def macroop PINSRW_MMX_R_I { + mov2fp mmx, regm, "IMMEDIATE & mask(2)", size=2, ext=0 +}; + +def macroop PINSRW_MMX_M_I { + ld t1, seg, sib, disp, dataSize=2 + mov2fp mmx, t1, "IMMEDIATE & mask(2)", size=2, ext=0 +}; + +def macroop PINSRW_MMX_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=2 + mov2fp mmx, t1, "IMMEDIATE & mask(2)", size=2, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py index 859d2bc7e..4235d7f26 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/pack_with_saturation.py @@ -54,7 +54,48 @@ # Authors: Gabe Black microcode = ''' -# PACKSSDW -# PACKSSWB -# PACKUSWB +def macroop PACKSSDW_MMX_MMX { + pack mmx, mmx, mmxm, ext=1, srcSize=4, destSize=2 +}; + +def macroop PACKSSDW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2 +}; + +def macroop PACKSSDW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + pack mmx, mmx, ufp1, ext=1, srcSize=4, destSize=2 +}; + +def macroop PACKSSWB_MMX_MMX { + pack mmx, mmx, mmxm, ext=1, srcSize=2, destSize=1 +}; + +def macroop PACKSSWB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1 +}; + +def macroop PACKSSWB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + pack mmx, mmx, ufp1, ext=1, srcSize=2, destSize=1 +}; + +def macroop PACKUSWB_MMX_MMX { + pack mmx, mmx, mmxm, ext=0, srcSize=2, destSize=1 +}; + +def macroop PACKUSWB_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + pack mmx, mmx, ufp1, ext=0, srcSize=2, destSize=1 +}; + +def macroop PACKUSWB_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + pack mmx, mmx, ufp1, ext=0, srcSize=2, destSize=1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py index 8f95f0f48..a6dd8748a 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/shuffle_and_swap.py @@ -54,6 +54,20 @@ # Authors: Gabe Black microcode = ''' -# PSHUFW -# PSWAPD +def macroop PSHUFW_MMX_MMX_I { + shuffle mmx, mmxm, mmxm, size=2, ext=imm +}; + +def macroop PSHUFW_MMX_M_I { + ldfp ufp1, seg, sib, disp, dataSize=8 + shuffle mmx, ufp1, ufp1, size=2, ext=imm +}; + +def macroop PSHUFW_MMX_P_I { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + shuffle mmx, ufp1, ufp1, size=2, ext=imm +}; + ''' +# PSWAPD diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py index 1f4c617dd..5a58beca3 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/unpack_and_interleave.py @@ -54,10 +54,93 @@ # Authors: Gabe Black microcode = ''' -# PUNPCKHBW -# PUNPCKHWD -# PUNPCKHDQ -# PUNPCKLBW -# PUNPCKLWD -# PUNPCKLDQ +def macroop PUNPCKLBW_MMX_MMX { + unpack mmx, mmx, mmxm, ext=0, size=1 +}; + +def macroop PUNPCKLBW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=0, size=1 +}; + +def macroop PUNPCKLBW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=0, size=1 +}; + +def macroop PUNPCKLWD_MMX_MMX { + unpack mmx, mmx, mmxm, ext=0, size=2 +}; + +def macroop PUNPCKLWD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=0, size=2 +}; + +def macroop PUNPCKLWD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=0, size=2 +}; + +def macroop PUNPCKLDQ_MMX_MMX { + unpack mmx, mmx, mmxm, ext=0, size=4 +}; + +def macroop PUNPCKLDQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=0, size=4 +}; + +def macroop PUNPCKLDQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=0, size=4 +}; + +def macroop PUNPCKHBW_MMX_MMX { + unpack mmx, mmx, mmxm, ext=1, size=1 +}; + +def macroop PUNPCKHBW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=1, size=1 +}; + +def macroop PUNPCKHBW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=1, size=1 +}; + +def macroop PUNPCKHWD_MMX_MMX { + unpack mmx, mmx, mmxm, ext=1, size=2 +}; + +def macroop PUNPCKHWD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=1, size=2 +}; + +def macroop PUNPCKHWD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=1, size=2 +}; + +def macroop PUNPCKHDQ_MMX_MMX { + unpack mmx, mmx, mmxm, ext=1, size=4 +}; + +def macroop PUNPCKHDQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=1, size=4 +}; + +def macroop PUNPCKHDQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + unpack mmx, mmx, ufp1, ext=1, size=4 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py index e60273022..f35a1e4c8 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move.py @@ -54,8 +54,53 @@ # Authors: Gabe Black microcode = ''' -# MOVD -# MOVQ +def macroop MOVD_MMX_R { + mov2fp mmx, regm, srcSize=dsz, destSize=8 +}; + +def macroop MOVD_MMX_M { + ldfp mmx, seg, sib, disp, dataSize=8 +}; + +def macroop MOVD_MMX_P { + rdip t7 + ldfp mmx, seg, riprel, disp, dataSize=8 +}; + +def macroop MOVD_R_MMX { + mov2int reg, mmxm, size=dsz +}; + +def macroop MOVD_M_MMX { + stfp mmx, seg, sib, disp, dataSize=8 +}; + +def macroop MOVD_P_MMX { + rdip t7 + stfp mmx, seg, riprel, disp, dataSize=8 +}; + +def macroop MOVQ_MMX_MMX { + movfp mmx, mmxm +}; + +def macroop MOVQ_MMX_M { + ldfp mmx, seg, sib, disp, dataSize=8 +}; + +def macroop MOVQ_MMX_P { + rdip t7 + ldfp mmx, seg, riprel, disp, dataSize=8 +}; + +def macroop MOVQ_M_MMX { + stfp mmx, seg, sib, disp, dataSize=8 +}; + +def macroop MOVQ_P_MMX { + rdip t7 + stfp mmx, seg, riprel, disp, dataSize=8 +}; +''' # MOVDQ2Q # MOVQ2DQ -''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py index 558391c6a..11e670b40 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_mask.py @@ -54,5 +54,8 @@ # Authors: Gabe Black microcode = ''' -# PMOVMSKB +def macroop PMOVMSKB_R_MMX { + limm reg, 0 + movsign reg, mmxm, size=1, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py index edd55d35a..f43d75e68 100644 --- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py +++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/move_non_temporal.py @@ -55,5 +55,10 @@ microcode = ''' # MOVNTQ -# MASKMOVQ + +def macroop MASKMOVQ_MMX_MMX { + ldfp ufp1, ds, [1, t0, rdi], dataSize=8 + maskmov ufp1, mmx, mmxm, size=1 + stfp ufp1, ds, [1, t0, rdi], dataSize=8 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py b/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py index 336796e23..3e70093e0 100644 --- a/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py +++ b/src/arch/x86/isa/insts/simd64/integer/logical/exclusive_or.py @@ -54,5 +54,18 @@ # Authors: Gabe Black microcode = ''' -# PXOR +def macroop PXOR_MMX_MMX { + mxor mmx, mmx, mmxm +}; + +def macroop PXOR_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mxor mmx, mmx, ufp1 +}; + +def macroop PXOR_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mxor mmx, mmx, ufp1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/pand.py b/src/arch/x86/isa/insts/simd64/integer/logical/pand.py index 055b7c5f6..181c26a62 100644 --- a/src/arch/x86/isa/insts/simd64/integer/logical/pand.py +++ b/src/arch/x86/isa/insts/simd64/integer/logical/pand.py @@ -54,6 +54,33 @@ # Authors: Gabe Black microcode = ''' -# PAND -# PANDN +def macroop PAND_MMX_MMX { + mand mmx, mmx, mmxm +}; + +def macroop PAND_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mand mmx, mmx, ufp1 +}; + +def macroop PAND_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mand mmx, mmx, ufp1 +}; + +def macroop PANDN_MMX_MMX { + mandn mmx, mmx, mmxm +}; + +def macroop PANDN_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mandn mmx, mmx, ufp1 +}; + +def macroop PANDN_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mandn mmx, mmx, ufp1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/por.py b/src/arch/x86/isa/insts/simd64/integer/logical/por.py index addb28a60..701ac95b5 100644 --- a/src/arch/x86/isa/insts/simd64/integer/logical/por.py +++ b/src/arch/x86/isa/insts/simd64/integer/logical/por.py @@ -54,5 +54,18 @@ # Authors: Gabe Black microcode = ''' -# POR +def macroop POR_MMX_MMX { + mor mmx, mmx, mmxm +}; + +def macroop POR_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + mor mmx, mmx, ufp1 +}; + +def macroop POR_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + mor mmx, mmx, ufp1 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py b/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py index 4687cab8d..011337ef7 100644 --- a/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py +++ b/src/arch/x86/isa/insts/simd64/integer/shift/left_logical_shift.py @@ -54,7 +54,60 @@ # Authors: Gabe Black microcode = ''' -# PSLLW -# PSLLD -# PSLLQ +def macroop PSLLW_MMX_MMX { + msll mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PSLLW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msll mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSLLW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msll mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSLLW_MMX_I { + mslli mmx, mmx, imm, size=2, ext=0 +}; + +def macroop PSLLD_MMX_MMX { + msll mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PSLLD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msll mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSLLD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msll mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSLLD_MMX_I { + mslli mmx, mmx, imm, size=4, ext=0 +}; + +def macroop PSLLQ_MMX_MMX { + msll mmx, mmx, mmxm, size=8, ext=0 +}; + +def macroop PSLLQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msll mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PSLLQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msll mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PSLLQ_MMX_I { + mslli mmx, mmx, imm, size=8, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py b/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py index 63750e292..951b3ea9f 100644 --- a/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py +++ b/src/arch/x86/isa/insts/simd64/integer/shift/right_arithmetic_shift.py @@ -54,6 +54,41 @@ # Authors: Gabe Black microcode = ''' -# PSRAW -# PSRAD +def macroop PSRAW_MMX_MMX { + msra mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PSRAW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msra mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSRAW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msra mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSRAW_MMX_I { + msrai mmx, mmx, imm, size=2, ext=0 +}; + +def macroop PSRAD_MMX_MMX { + msra mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PSRAD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msra mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSRAD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msra mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSRAD_MMX_I { + msrai mmx, mmx, imm, size=4, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py b/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py index 1f870dc32..dc6182de7 100644 --- a/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py +++ b/src/arch/x86/isa/insts/simd64/integer/shift/right_logical_shift.py @@ -54,7 +54,60 @@ # Authors: Gabe Black microcode = ''' -# PSRLW -# PSRLD -# PSRLQ +def macroop PSRLW_MMX_MMX { + msrl mmx, mmx, mmxm, size=2, ext=0 +}; + +def macroop PSRLW_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msrl mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSRLW_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msrl mmx, mmx, ufp1, size=2, ext=0 +}; + +def macroop PSRLW_MMX_I { + msrli mmx, mmx, imm, size=2, ext=0 +}; + +def macroop PSRLD_MMX_MMX { + msrl mmx, mmx, mmxm, size=4, ext=0 +}; + +def macroop PSRLD_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msrl mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSRLD_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msrl mmx, mmx, ufp1, size=4, ext=0 +}; + +def macroop PSRLD_MMX_I { + msrli mmx, mmx, imm, size=4, ext=0 +}; + +def macroop PSRLQ_MMX_MMX { + msrl mmx, mmx, mmxm, size=8, ext=0 +}; + +def macroop PSRLQ_MMX_M { + ldfp ufp1, seg, sib, disp, dataSize=8 + msrl mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PSRLQ_MMX_P { + rdip t7 + ldfp ufp1, seg, riprel, disp, dataSize=8 + msrl mmx, mmx, ufp1, size=8, ext=0 +}; + +def macroop PSRLQ_MMX_I { + msrli mmx, mmx, imm, size=8, ext=0 +}; ''' diff --git a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py index 2a4c3f0ed..4837e1b45 100644 --- a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py +++ b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py @@ -65,6 +65,10 @@ def macroop FLD_P { movfp st(-1), ufp1, spm=-1 }; +def macroop FST_R { + movfp sti, st(0) +}; + def macroop FST_M { stfp st(0), seg, sib, disp }; @@ -74,6 +78,10 @@ def macroop FST_P { stfp st(0), seg, riprel, disp }; +def macroop FSTP_R { + movfp sti, st(0), spm=1 +}; + def macroop FSTP_M { movfp ufp1, st(0), spm=1 stfp ufp1, seg, sib, disp diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index c6f5e9cdd..25b58dfb7 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -112,6 +112,8 @@ let {{ "regm" : regIdx("env.regm"), "xmmlm" : regIdx("FLOATREG_XMM_LOW(env.regm)"), "xmmhm" : regIdx("FLOATREG_XMM_HIGH(env.regm)"), + "mmx" : regIdx("FLOATREG_MMX(env.reg)"), + "mmxm" : regIdx("FLOATREG_MMX(env.regm)"), "imm" : "adjustedImm", "disp" : "adjustedDisp", "seg" : regIdx("env.seg"), @@ -217,6 +219,8 @@ let {{ return regIdx("NUM_FLOATREGS + (((%s) + 8) %% 8)" % index) assembler.symbols["st"] = stack_index + assembler.symbols["sti"] = stack_index("env.reg") + assembler.symbols["stim"] = stack_index("env.regm") macroopDict = assembler.assemble(microcode) diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 94c707f73..912aa3511 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -529,7 +529,8 @@ let {{ dataSize="env.dataSize", addressSize="env.addressSize", atCPL0=False): super(CdaOp, self).__init__("InstRegIndex(NUM_INTREGS)", segment, - addr, disp, dataSize, addressSize, "0", atCPL0, False) + addr, disp, dataSize, addressSize, "Request::NO_ACCESS", + atCPL0, False) self.className = "Cda" self.mnemonic = "cda" diff --git a/src/arch/x86/isa/microops/limmop.isa b/src/arch/x86/isa/microops/limmop.isa index f7e7728ab..91ddb1465 100644 --- a/src/arch/x86/isa/microops/limmop.isa +++ b/src/arch/x86/isa/microops/limmop.isa @@ -164,6 +164,30 @@ let {{ return allocator microopClasses["limm"] = LimmOp + + class LfpimmOp(X86Microop): + def __init__(self, dest, imm, dataSize="env.dataSize"): + self.className = "Lfpimm" + self.mnemonic = "lfpimm" + self.dest = dest + if isinstance(imm, (int, long)): + imm = "ULL(%d)" % imm + if isinstance(imm, float): + imm = "reinterpret_cast<uint64_t>((double)(%d))" + self.imm = imm + self.dataSize = dataSize + + def getAllocator(self, *microFlags): + allocator = '''new %(class_name)s(machInst, macrocodeBlock + %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % { + "class_name" : self.className, + "mnemonic" : self.mnemonic, + "flags" : self.microFlagsText(microFlags), + "dest" : self.dest, "imm" : self.imm, + "dataSize" : self.dataSize} + return allocator + + microopClasses["lfpimm"] = LfpimmOp }}; let {{ @@ -174,4 +198,11 @@ let {{ decoder_output += MicroLimmOpConstructor.subst(iop) decoder_output += MicroLimmOpDisassembly.subst(iop) exec_output += MicroLimmOpExecute.subst(iop) + + iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase', + {"code" : "FpDestReg.uqw = imm"}) + header_output += MicroLimmOpDeclare.subst(iop) + decoder_output += MicroLimmOpConstructor.subst(iop) + decoder_output += MicroLimmOpDisassembly.subst(iop) + exec_output += MicroLimmOpExecute.subst(iop) }}; diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa new file mode 100644 index 000000000..9c53fa0fb --- /dev/null +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -0,0 +1,1554 @@ +/// Copyright (c) 2009 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template MediaOpExecute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + %(code)s; + + //Write the resulting state to the execution context + if(fault == NoFault) + { + %(op_wb)s; + } + return fault; + } +}}; + +def template MediaOpRegDeclare {{ + class %(class_name)s : public %(base_class)s + { + protected: + void buildMe(); + + public: + %(class_name)s(ExtMachInst _machInst, + const char * instMnem, + bool isMicro, bool isDelayed, bool isFirst, bool isLast, + InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); + + %(class_name)s(ExtMachInst _machInst, + const char * instMnem, + InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); + + %(BasicExecDeclare)s + }; +}}; + +def template MediaOpImmDeclare {{ + + class %(class_name)s : public %(base_class)s + { + protected: + void buildMe(); + + public: + %(class_name)s(ExtMachInst _machInst, + const char * instMnem, + bool isMicro, bool isDelayed, bool isFirst, bool isLast, + InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); + + %(class_name)s(ExtMachInst _machInst, + const char * instMnem, + InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); + + %(BasicExecDeclare)s + }; +}}; + +def template MediaOpRegConstructor {{ + + inline void %(class_name)s::buildMe() + { + %(constructor)s; + } + + inline %(class_name)s::%(class_name)s( + ExtMachInst machInst, const char * instMnem, + InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : + %(base_class)s(machInst, "%(mnemonic)s", instMnem, + false, false, false, false, + _src1, _src2, _dest, _srcSize, _destSize, _ext, + %(op_class)s) + { + buildMe(); + } + + inline %(class_name)s::%(class_name)s( + ExtMachInst machInst, const char * instMnem, + bool isMicro, bool isDelayed, bool isFirst, bool isLast, + InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : + %(base_class)s(machInst, "%(mnemonic)s", instMnem, + isMicro, isDelayed, isFirst, isLast, + _src1, _src2, _dest, _srcSize, _destSize, _ext, + %(op_class)s) + { + buildMe(); + } +}}; + +def template MediaOpImmConstructor {{ + + inline void %(class_name)s::buildMe() + { + %(constructor)s; + } + + inline %(class_name)s::%(class_name)s( + ExtMachInst machInst, const char * instMnem, + InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : + %(base_class)s(machInst, "%(mnemonic)s", instMnem, + false, false, false, false, + _src1, _imm8, _dest, _srcSize, _destSize, _ext, + %(op_class)s) + { + buildMe(); + } + + inline %(class_name)s::%(class_name)s( + ExtMachInst machInst, const char * instMnem, + bool isMicro, bool isDelayed, bool isFirst, bool isLast, + InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, + uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : + %(base_class)s(machInst, "%(mnemonic)s", instMnem, + isMicro, isDelayed, isFirst, isLast, + _src1, _imm8, _dest, _srcSize, _destSize, _ext, + %(op_class)s) + { + buildMe(); + } +}}; + +let {{ + # Make these empty strings so that concatenating onto + # them will always work. + header_output = "" + decoder_output = "" + exec_output = "" + + immTemplates = ( + MediaOpImmDeclare, + MediaOpImmConstructor, + MediaOpExecute) + + regTemplates = ( + MediaOpRegDeclare, + MediaOpRegConstructor, + MediaOpExecute) + + class MediaOpMeta(type): + def buildCppClasses(self, name, Name, suffix, code): + + # Globals to stick the output in + global header_output + global decoder_output + global exec_output + + # If op2 is used anywhere, make register and immediate versions + # of this code. + matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") + match = matcher.search(code) + if match: + typeQual = "" + if match.group("typeQual"): + typeQual = match.group("typeQual") + src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) + self.buildCppClasses(name, Name, suffix, + matcher.sub(src2_name, code)) + self.buildCppClasses(name + "i", Name, suffix + "Imm", + matcher.sub("imm8", code)) + return + + base = "X86ISA::MediaOp" + + # If imm8 shows up in the code, use the immediate templates, if + # not, hopefully the register ones will be correct. + matcher = re.compile("(?<!\w)imm8(?!\w)") + if matcher.search(code): + base += "Imm" + templates = immTemplates + else: + base += "Reg" + templates = regTemplates + + # Get everything ready for the substitution + iop = InstObjParams(name, Name + suffix, base, {"code" : code}) + + # Generate the actual code (finally!) + header_output += templates[0].subst(iop) + decoder_output += templates[1].subst(iop) + exec_output += templates[2].subst(iop) + + + def __new__(mcls, Name, bases, dict): + abstract = False + name = Name.lower() + if "abstract" in dict: + abstract = dict['abstract'] + del dict['abstract'] + + cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict) + if not abstract: + cls.className = Name + cls.base_mnemonic = name + code = cls.code + + # Set up the C++ classes + mcls.buildCppClasses(cls, name, Name, "", code) + + # Hook into the microassembler dict + global microopClasses + microopClasses[name] = cls + + # If op2 is used anywhere, make register and immediate versions + # of this code. + matcher = re.compile("op2(?P<typeQual>\\.\\w+)?") + if matcher.search(code): + microopClasses[name + 'i'] = cls + return cls + + + class MediaOp(X86Microop): + __metaclass__ = MediaOpMeta + # This class itself doesn't act as a microop + abstract = True + + def __init__(self, dest, src1, op2, + size = None, destSize = None, srcSize = None, ext = None): + self.dest = dest + self.src1 = src1 + self.op2 = op2 + if size is not None: + self.srcSize = size + self.destSize = size + if srcSize is not None: + self.srcSize = srcSize + if destSize is not None: + self.destSize = destSize + if self.srcSize is None: + raise Exception, "Source size not set." + if self.destSize is None: + raise Exception, "Dest size not set." + if ext is None: + self.ext = 0 + else: + self.ext = ext + + def getAllocator(self, *microFlags): + className = self.className + if self.mnemonic == self.base_mnemonic + 'i': + className += "Imm" + allocator = '''new %(class_name)s(machInst, macrocodeBlock + %(flags)s, %(src1)s, %(op2)s, %(dest)s, + %(srcSize)s, %(destSize)s, %(ext)s)''' % { + "class_name" : className, + "flags" : self.microFlagsText(microFlags), + "src1" : self.src1, "op2" : self.op2, + "dest" : self.dest, + "srcSize" : self.srcSize, + "destSize" : self.destSize, + "ext" : self.ext} + return allocator + + class Mov2int(MediaOp): + def __init__(self, dest, src1, src2 = 0, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Mov2int, self).__init__(dest, src1,\ + src2, size, destSize, srcSize, ext) + code = ''' + int items = sizeof(FloatRegBits) / srcSize; + int offset = imm8; + if (bits(src1, 0) && (ext & 0x1)) + offset -= items; + if (offset >= 0 && offset < items) { + uint64_t fpSrcReg1 = + bits(FpSrcReg1.uqw, + (offset + 1) * srcSize * 8 - 1, + (offset + 0) * srcSize * 8); + DestReg = merge(0, fpSrcReg1, destSize); + } else { + DestReg = DestReg; + } + ''' + + class Mov2fp(MediaOp): + def __init__(self, dest, src1, src2 = 0, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Mov2fp, self).__init__(dest, src1,\ + src2, size, destSize, srcSize, ext) + code = ''' + int items = sizeof(FloatRegBits) / destSize; + int offset = imm8; + if (bits(dest, 0) && (ext & 0x1)) + offset -= items; + if (offset >= 0 && offset < items) { + uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); + FpDestReg.uqw = + insertBits(FpDestReg.uqw, + (offset + 1) * destSize * 8 - 1, + (offset + 0) * destSize * 8, srcReg1); + } else { + FpDestReg.uqw = FpDestReg.uqw; + } + ''' + + class Movsign(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Movsign, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + int items = sizeof(FloatRegBits) / srcSize; + uint64_t result = 0; + int offset = (ext & 0x1) ? items : 0; + for (int i = 0; i < items; i++) { + uint64_t picked = + bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); + result = insertBits(result, i + offset, i + offset, picked); + } + DestReg = DestReg | result; + ''' + + class Maskmov(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + if (bits(FpSrcReg2.uqw, hiIndex)) + result = insertBits(result, hiIndex, loIndex, arg1Bits); + } + FpDestReg.uqw = result; + ''' + + class shuffle(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = sizeof(FloatRegBits) / size; + int options; + int optionBits; + if (size == 8) { + options = 2; + optionBits = 1; + } else { + options = 4; + optionBits = 2; + } + + uint64_t result = 0; + uint8_t sel = ext; + + for (int i = 0; i < items; i++) { + uint64_t resBits; + uint8_t lsel = sel & mask(optionBits); + if (lsel * size >= sizeof(FloatRegBits)) { + lsel -= options / 2; + resBits = bits(FpSrcReg2.uqw, + (lsel + 1) * sizeBits - 1, + (lsel + 0) * sizeBits); + } else { + resBits = bits(FpSrcReg1.uqw, + (lsel + 1) * sizeBits - 1, + (lsel + 0) * sizeBits); + } + + sel >>= optionBits; + + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Unpack(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = destSize; + int items = (sizeof(FloatRegBits) / size) / 2; + int offset = ext ? items : 0; + uint64_t result = 0; + for (int i = 0; i < items; i++) { + uint64_t pickedLow = + bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1, + (i + offset) * 8 * size); + result = insertBits(result, + (2 * i + 1) * 8 * size - 1, + (2 * i + 0) * 8 * size, + pickedLow); + uint64_t pickedHigh = + bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1, + (i + offset) * 8 * size); + result = insertBits(result, + (2 * i + 2) * 8 * size - 1, + (2 * i + 1) * 8 * size, + pickedHigh); + } + FpDestReg.uqw = result; + ''' + + class Pack(MediaOp): + code = ''' + assert(srcSize == destSize * 2); + int items = (sizeof(FloatRegBits) / destSize); + int destBits = destSize * 8; + int srcBits = srcSize * 8; + uint64_t result = 0; + int i; + for (i = 0; i < items / 2; i++) { + uint64_t picked = + bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1, + (i + 0) * srcBits); + unsigned signBit = bits(picked, srcBits - 1); + uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); + + // Handle saturation. + if (signBit) { + if (overflow != mask(destBits - srcBits + 1)) { + if (ext & 0x1) + picked = (1 << (destBits - 1)); + else + picked = 0; + } + } else { + if (overflow != 0) { + if (ext & 0x1) + picked = mask(destBits - 1); + else + picked = mask(destBits); + } + } + result = insertBits(result, + (i + 1) * destBits - 1, + (i + 0) * destBits, + picked); + } + for (;i < items; i++) { + uint64_t picked = + bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1, + (i - items + 0) * srcBits); + unsigned signBit = bits(picked, srcBits - 1); + uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); + + // Handle saturation. + if (signBit) { + if (overflow != mask(destBits - srcBits + 1)) { + if (ext & 0x1) + picked = (1 << (destBits - 1)); + else + picked = 0; + } + } else { + if (overflow != 0) { + if (ext & 0x1) + picked = mask(destBits - 1); + else + picked = mask(destBits); + } + } + result = insertBits(result, + (i + 1) * destBits - 1, + (i + 0) * destBits, + picked); + } + FpDestReg.uqw = result; + ''' + + class Mxor(MediaOp): + def __init__(self, dest, src1, src2): + super(Mxor, self).__init__(dest, src1, src2, 1) + code = ''' + FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw; + ''' + + class Mor(MediaOp): + def __init__(self, dest, src1, src2): + super(Mor, self).__init__(dest, src1, src2, 1) + code = ''' + FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw; + ''' + + class Mand(MediaOp): + def __init__(self, dest, src1, src2): + super(Mand, self).__init__(dest, src1, src2, 1) + code = ''' + FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw; + ''' + + class Mandn(MediaOp): + def __init__(self, dest, src1, src2): + super(Mandn, self).__init__(dest, src1, src2, 1) + code = ''' + FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw; + ''' + + class Mminf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + double arg1, arg2; + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + + if (size == 4) { + floatInt fi; + fi.i = arg1Bits; + arg1 = fi.f; + fi.i = arg2Bits; + arg2 = fi.f; + } else { + doubleInt di; + di.i = arg1Bits; + arg1 = di.d; + di.i = arg2Bits; + arg2 = di.d; + } + + if (arg1 < arg2) { + result = insertBits(result, hiIndex, loIndex, arg1Bits); + } else { + result = insertBits(result, hiIndex, loIndex, arg2Bits); + } + } + FpDestReg.uqw = result; + ''' + + class Mmaxf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + double arg1, arg2; + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + + if (size == 4) { + floatInt fi; + fi.i = arg1Bits; + arg1 = fi.f; + fi.i = arg2Bits; + arg2 = fi.f; + } else { + doubleInt di; + di.i = arg1Bits; + arg1 = di.d; + di.i = arg2Bits; + arg2 = di.d; + } + + if (arg1 > arg2) { + result = insertBits(result, hiIndex, loIndex, arg1Bits); + } else { + result = insertBits(result, hiIndex, loIndex, arg2Bits); + } + } + FpDestReg.uqw = result; + ''' + + class Mmini(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (1 << (sizeBits - 1)))); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (1 << (sizeBits - 1)))); + uint64_t resBits; + + if (ext & 0x2) { + if (arg1 < arg2) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } else { + if (arg1Bits < arg2Bits) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mmaxi(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (1 << (sizeBits - 1)))); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (1 << (sizeBits - 1)))); + uint64_t resBits; + + if (ext & 0x2) { + if (arg1 > arg2) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } else { + if (arg1Bits > arg2Bits) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msqrt(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Msqrt, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + + if (size == 4) { + floatInt fi; + fi.i = argBits; + fi.f = sqrt(fi.f); + argBits = fi.i; + } else { + doubleInt di; + di.i = argBits; + di.d = sqrt(di.d); + argBits = di.i; + } + result = insertBits(result, hiIndex, loIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Maddf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f + arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d + arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msubf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f - arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d - arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mmulf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f * arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d * arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mdivf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f / arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d / arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Maddi(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits = arg1Bits + arg2Bits; + + if (ext & 0x2) { + if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) + resBits = mask(sizeBits); + } else if (ext & 0x4) { + int arg1Sign = bits(arg1Bits, sizeBits - 1); + int arg2Sign = bits(arg2Bits, sizeBits - 1); + int resSign = bits(resBits, sizeBits - 1); + if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { + if (resSign == 0) + resBits = (1 << (sizeBits - 1)); + else + resBits = mask(sizeBits - 1); + } + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msubi(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits = arg1Bits - arg2Bits; + + if (ext & 0x2) { + if (arg2Bits > arg1Bits) { + resBits = 0; + } else if (!findCarry(sizeBits, resBits, + arg1Bits, ~arg2Bits)) { + resBits = mask(sizeBits); + } + } else if (ext & 0x4) { + int arg1Sign = bits(arg1Bits, sizeBits - 1); + int arg2Sign = !bits(arg2Bits, sizeBits - 1); + int resSign = bits(resBits, sizeBits - 1); + if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { + if (resSign == 0) + resBits = (1 << (sizeBits - 1)); + else + resBits = mask(sizeBits - 1); + } + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mmuli(MediaOp): + code = ''' + int srcBits = srcSize * 8; + int destBits = destSize * 8; + assert(destBits <= 64); + assert(destSize >= srcSize); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int offset = 0; + if (ext & 16) { + if (ext & 32) + offset = i * (destBits - srcBits); + else + offset = i * (destBits - srcBits) + srcBits; + } + int srcHiIndex = (i + 1) * srcBits - 1 + offset; + int srcLoIndex = (i + 0) * srcBits + offset; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); + uint64_t resBits; + + if (ext & 0x2) { + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (1 << (srcBits - 1)))); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (1 << (srcBits - 1)))); + resBits = (uint64_t)(arg1 * arg2); + } else { + resBits = arg1Bits * arg2Bits; + } + + if (ext & 0x4) + resBits += (1 << (destBits - 1)); + + if (ext & 0x8) + resBits >>= destBits; + + int destHiIndex = (i + 1) * destBits - 1; + int destLoIndex = (i + 0) * destBits; + result = insertBits(result, destHiIndex, destLoIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mavg(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msad(MediaOp): + code = ''' + int srcBits = srcSize * 8; + int items = sizeof(FloatRegBits) / srcSize; + + uint64_t sum = 0; + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * srcBits - 1; + int loIndex = (i + 0) * srcBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t resBits = arg1Bits - arg2Bits; + if (resBits < 0) + resBits = -resBits; + sum += resBits; + } + FpDestReg.uqw = sum & mask(destSize * 8); + ''' + + class Msrl(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t shiftAmt = op2.uqw; + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t resBits; + if (shiftAmt >= sizeBits) { + resBits = 0; + } else { + resBits = (arg1Bits >> shiftAmt) & + mask(sizeBits - shiftAmt); + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msra(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t shiftAmt = op2.uqw; + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t resBits; + if (shiftAmt >= sizeBits) { + if (bits(arg1Bits, sizeBits - 1)) + resBits = mask(sizeBits); + else + resBits = 0; + } else { + resBits = (arg1Bits >> shiftAmt); + resBits = resBits | + (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt)))); + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msll(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t shiftAmt = op2.uqw; + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t resBits; + if (shiftAmt >= sizeBits) { + resBits = 0; + } else { + resBits = (arg1Bits << shiftAmt); + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Cvtf2i(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Cvtf2i, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(destSize == 4 || destSize == 8); + assert(srcSize == 4 || srcSize == 8); + int srcSizeBits = srcSize * 8; + int destSizeBits = destSize * 8; + int items; + int srcStart = 0; + int destStart = 0; + if (srcSize == 2 * destSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + if (ext & 0x2) + destStart = destSizeBits * items; + } else if (destSize == 2 * srcSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + if (ext & 0x2) + srcStart = srcSizeBits * items; + } else { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + } + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; + int srcLoIndex = srcStart + (i + 0) * srcSizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + double arg; + + if (srcSize == 4) { + floatInt fi; + fi.i = argBits; + arg = fi.f; + } else { + doubleInt di; + di.i = argBits; + arg = di.d; + } + + if (ext & 0x4) { + if (arg >= 0) + arg += 0.5; + else + arg -= 0.5; + } + + if (destSize == 4) { + argBits = (uint32_t)(float)arg; + } else { + argBits = (uint64_t)arg; + } + int destHiIndex = destStart + (i + 1) * destSizeBits - 1; + int destLoIndex = destStart + (i + 0) * destSizeBits; + result = insertBits(result, destHiIndex, destLoIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Cvti2f(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Cvti2f, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(destSize == 4 || destSize == 8); + assert(srcSize == 4 || srcSize == 8); + int srcSizeBits = srcSize * 8; + int destSizeBits = destSize * 8; + int items; + int srcStart = 0; + int destStart = 0; + if (srcSize == 2 * destSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + if (ext & 0x2) + destStart = destSizeBits * items; + } else if (destSize == 2 * srcSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + if (ext & 0x2) + srcStart = srcSizeBits * items; + } else { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + } + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; + int srcLoIndex = srcStart + (i + 0) * srcSizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex))); + double arg = sArg; + + if (destSize == 4) { + floatInt fi; + fi.f = arg; + argBits = fi.i; + } else { + doubleInt di; + di.d = arg; + argBits = di.i; + } + int destHiIndex = destStart + (i + 1) * destSizeBits - 1; + int destLoIndex = destStart + (i + 0) * destSizeBits; + result = insertBits(result, destHiIndex, destLoIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Cvtf2f(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Cvtf2f, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(destSize == 4 || destSize == 8); + assert(srcSize == 4 || srcSize == 8); + int srcSizeBits = srcSize * 8; + int destSizeBits = destSize * 8; + int items; + int srcStart = 0; + int destStart = 0; + if (srcSize == 2 * destSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + if (ext & 0x2) + destStart = destSizeBits * items; + } else if (destSize == 2 * srcSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + if (ext & 0x2) + srcStart = srcSizeBits * items; + } else { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + } + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; + int srcLoIndex = srcStart + (i + 0) * srcSizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + double arg; + + if (srcSize == 4) { + floatInt fi; + fi.i = argBits; + arg = fi.f; + } else { + doubleInt di; + di.i = argBits; + arg = di.d; + } + if (destSize == 4) { + floatInt fi; + fi.f = arg; + argBits = fi.i; + } else { + doubleInt di; + di.d = arg; + argBits = di.i; + } + int destHiIndex = destStart + (i + 1) * destSizeBits - 1; + int destLoIndex = destStart + (i + 0) * destSizeBits; + result = insertBits(result, destHiIndex, destLoIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Mcmpi2r(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (1 << (sizeBits - 1)))); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (1 << (sizeBits - 1)))); + + uint64_t resBits = 0; + if (((ext & 0x2) == 0 && arg1 == arg2) || + ((ext & 0x2) == 0x2 && arg1 > arg2)) + resBits = mask(sizeBits); + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mcmpf2r(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + double arg1, arg2; + + if (size == 4) { + floatInt fi; + fi.i = arg1Bits; + arg1 = fi.f; + fi.i = arg2Bits; + arg2 = fi.f; + } else { + doubleInt di; + di.i = arg1Bits; + arg1 = di.d; + di.i = arg2Bits; + arg2 = di.d; + } + + uint64_t resBits = 0; + bool nanop = isnan(arg1) || isnan(arg2); + switch (ext & mask(3)) { + case 0: + if (arg1 == arg2 && !nanop) + resBits = mask(sizeBits); + break; + case 1: + if (arg1 < arg2 && !nanop) + resBits = mask(sizeBits); + break; + case 2: + if (arg1 <= arg2 && !nanop) + resBits = mask(sizeBits); + break; + case 3: + if (nanop) + resBits = mask(sizeBits); + break; + case 4: + if (arg1 != arg2 || nanop) + resBits = mask(sizeBits); + break; + case 5: + if (!(arg1 < arg2) || nanop) + resBits = mask(sizeBits); + break; + case 6: + if (!(arg1 <= arg2) || nanop) + resBits = mask(sizeBits); + break; + case 7: + if (!nanop) + resBits = mask(sizeBits); + break; + }; + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mcmpf2rf(MediaOp): + def __init__(self, src1, src2,\ + size = None, destSize = None, srcSize = None, ext = None): + super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ + src2, size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + assert(srcSize == 4 || srcSize == 8); + int size = srcSize; + int sizeBits = size * 8; + + double arg1, arg2; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); + if (size == 4) { + floatInt fi; + fi.i = arg1Bits; + arg1 = fi.f; + fi.i = arg2Bits; + arg2 = fi.f; + } else { + doubleInt di; + di.i = arg1Bits; + arg1 = di.d; + di.i = arg2Bits; + arg2 = di.d; + } + + // ZF PF CF + // Unordered 1 1 1 + // Greater than 0 0 0 + // Less than 0 0 1 + // Equal 1 0 0 + // OF = SF = AF = 0 + ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | + ZFBit | PFBit | CFBit); + if (isnan(arg1) || isnan(arg2)) + ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); + else if(arg1 < arg2) + ccFlagBits = ccFlagBits | CFBit; + else if(arg1 == arg2) + ccFlagBits = ccFlagBits | ZFBit; + ''' +}}; diff --git a/src/arch/x86/isa/microops/microops.isa b/src/arch/x86/isa/microops/microops.isa index 19266f6d6..a9cdffe0a 100644 --- a/src/arch/x86/isa/microops/microops.isa +++ b/src/arch/x86/isa/microops/microops.isa @@ -68,6 +68,9 @@ //Load/store microop definitions ##include "ldstop.isa" +//Media microop definitions +##include "mediaop.isa" + //Control flow microop definitions ##include "seqop.isa" diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa index b74363470..8d65111b0 100644 --- a/src/arch/x86/isa/specialize.isa +++ b/src/arch/x86/isa/specialize.isa @@ -139,7 +139,7 @@ let {{ opType = OpType(opTypes[0]) opTypes.pop(0) - if opType.tag not in ("I", "J"): + if opType.tag not in ("I", "J", "P", "PR", "Q", "V", "VR", "W"): if opType.size: env.setSize(opType.size) @@ -190,7 +190,12 @@ let {{ env.addReg(ModRMRegIndex) env.addToDisassembly( "printReg(out, %s, regSize);\n" % ModRMRegIndex) - Name += "_R" + if opType.tag == "P": + Name += "_MMX" + elif opType.tag == "V": + Name += "_XMM" + else: + Name += "_R" elif opType.tag in ("E", "Q", "W"): # This might refer to memory or to a register. We need to # divide it up farther. @@ -202,9 +207,16 @@ let {{ # modrm addressing. memEnv = copy.copy(env) memEnv.doModRM = True + regSuffix = "_R" + if opType.tag == "Q": + regSuffix = "_MMX" + elif opType.tag == "W": + regSuffix = "_XMM" return doSplitDecode("MODRM_MOD", - {"3" : (specializeInst, Name + "_R", copy.copy(opTypes), regEnv)}, - (doRipRelativeDecode, Name, copy.copy(opTypes), memEnv)) + {"3" : (specializeInst, Name + regSuffix, + copy.copy(opTypes), regEnv)}, + (doRipRelativeDecode, Name, + copy.copy(opTypes), memEnv)) elif opType.tag in ("I", "J"): # Immediates env.addToDisassembly( @@ -218,7 +230,12 @@ let {{ env.addReg(ModRMRMIndex) env.addToDisassembly( "printReg(out, %s, regSize);\n" % ModRMRMIndex) - Name += "_R" + if opType.tag == "PR": + Name += "_MMX" + elif opType.tag == "VR": + Name += "_XMM" + else: + Name += "_R" elif opType.tag in ("X", "Y"): # This type of memory addressing is for string instructions. # They'll use the right index and segment internally. diff --git a/src/arch/x86/predecoder.cc b/src/arch/x86/predecoder.cc index f537f92af..026b733bd 100644 --- a/src/arch/x86/predecoder.cc +++ b/src/arch/x86/predecoder.cc @@ -195,7 +195,7 @@ namespace X86ISA State nextState = ErrorState; emi.opcode.num++; //We can't handle 3+ byte opcodes right now - assert(emi.opcode.num < 3); + assert(emi.opcode.num < 4); consumeByte(); if(emi.opcode.num == 1 && nextByte == 0x0f) { @@ -203,11 +203,8 @@ namespace X86ISA DPRINTF(Predecoder, "Found two byte opcode.\n"); emi.opcode.prefixA = nextByte; } - else if(emi.opcode.num == 2 && - (nextByte == 0x0f || - (nextByte & 0xf8) == 0x38)) + else if(emi.opcode.num == 2 && (nextByte == 0x38 || nextByte == 0x3F)) { - panic("Three byte opcodes aren't yet supported!\n"); nextState = OpcodeState; DPRINTF(Predecoder, "Found three byte opcode.\n"); emi.opcode.prefixB = nextByte; diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc index 3434ebd73..4082e568c 100644 --- a/src/arch/x86/process.cc +++ b/src/arch/x86/process.cc @@ -270,6 +270,8 @@ X86_64LiveProcess::startup() // setting it to one. cr0.pe = 1; // We're definitely in protected mode. tc->setMiscReg(MISCREG_CR0, cr0); + + tc->setMiscReg(MISCREG_MXCSR, 0x1f80); } } @@ -390,6 +392,8 @@ I386LiveProcess::startup() // setting it to one. cr0.pe = 1; // We're definitely in protected mode. tc->setMiscReg(MISCREG_CR0, cr0); + + tc->setMiscReg(MISCREG_MXCSR, 0x1f80); } } diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh index 956ec3216..6fd36b487 100644 --- a/src/arch/x86/types.hh +++ b/src/arch/x86/types.hh @@ -156,8 +156,10 @@ namespace X86ISA uint8_t num; //The first byte detected in a 2+ byte opcode. Should be 0xF0. uint8_t prefixA; - //The second byte detected in a 3+ byte opcode. Could be 0xF0 for - //3dnow instructions, or 0x38-0x3F for some SSE instructions. + //The second byte detected in a 3+ byte opcode. Could be 0x38-0x3F + //for some SSE instructions. 3dNow! instructions are handled as + //two byte opcodes and then split out further by the immediate + //byte. uint8_t prefixB; //The main opcode byte. The highest addressed byte in the opcode. Opcode op; |