// Copyright (c) 2009 The Regents of The University of Michigan // Copyright (c) 2015 Advanced Micro Devices, Inc. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer; // redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution; // neither the name of the copyright holders nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Authors: Gabe Black def template MediaOpExecute {{ Fault %(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; %(op_decl)s; %(op_rd)s; %(code)s; //Write the resulting state to the execution context if(fault == NoFault) { %(op_wb)s; } return fault; } }}; def template MediaOpRegDeclare {{ class %(class_name)s : public %(base_class)s { public: %(class_name)s(ExtMachInst _machInst, const char * instMnem, uint64_t setFlags, InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); Fault execute(ExecContext *, Trace::InstRecord *) const; }; }}; def template MediaOpImmDeclare {{ class %(class_name)s : public %(base_class)s { public: %(class_name)s(ExtMachInst _machInst, const char * instMnem, uint64_t setFlags, InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, uint8_t _srcSize, uint8_t _destSize, uint16_t _ext); Fault execute(ExecContext *, Trace::InstRecord *) const; }; }}; def template MediaOpRegConstructor {{ %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, uint64_t setFlags, InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest, uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, _src1, _src2, _dest, _srcSize, _destSize, _ext, %(op_class)s) { %(constructor)s; } }}; def template MediaOpImmConstructor {{ %(class_name)s::%(class_name)s( ExtMachInst machInst, const char * instMnem, uint64_t setFlags, InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest, uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) : %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags, _src1, _imm8, _dest, _srcSize, _destSize, _ext, %(op_class)s) { %(constructor)s; } }}; let {{ # Make these empty strings so that concatenating onto # them will always work. header_output = "" decoder_output = "" exec_output = "" immTemplates = ( MediaOpImmDeclare, MediaOpImmConstructor, MediaOpExecute) regTemplates = ( MediaOpRegDeclare, MediaOpRegConstructor, MediaOpExecute) class MediaOpMeta(type): def buildCppClasses(self, name, Name, suffix, code): # Globals to stick the output in global header_output global decoder_output global exec_output # If op2 is used anywhere, make register and immediate versions # of this code. matcher = re.compile(r"(?s?)op2(?P_[^\W_]+)?") match = matcher.search(code) if match: typeQual = "" if match.group("typeQual"): typeQual = match.group("typeQual") src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) self.buildCppClasses(name, Name, suffix, matcher.sub(src2_name, code)) self.buildCppClasses(name + "i", Name, suffix + "Imm", matcher.sub("imm8", code)) return base = "X86ISA::MediaOp" # If imm8 shows up in the code, use the immediate templates, if # not, hopefully the register ones will be correct. matcher = re.compile("(?_[^\W_]+)?") if matcher.search(code): microopClasses[name + 'i'] = cls return cls class MediaOp(X86Microop): __metaclass__ = MediaOpMeta # This class itself doesn't act as a microop abstract = True def __init__(self, dest, src1, op2, size = None, destSize = None, srcSize = None, ext = None): self.dest = dest self.src1 = src1 self.op2 = op2 if size is not None: self.srcSize = size self.destSize = size if srcSize is not None: self.srcSize = srcSize if destSize is not None: self.destSize = destSize if self.srcSize is None: raise Exception, "Source size not set." if self.destSize is None: raise Exception, "Dest size not set." if ext is None: self.ext = 0 else: self.ext = ext def getAllocator(self, microFlags): className = self.className if self.mnemonic == self.base_mnemonic + 'i': className += "Imm" allocator = '''new %(class_name)s(machInst, macrocodeBlock, %(flags)s, %(src1)s, %(op2)s, %(dest)s, %(srcSize)s, %(destSize)s, %(ext)s)''' % { "class_name" : className, "flags" : self.microFlagsText(microFlags), "src1" : self.src1, "op2" : self.op2, "dest" : self.dest, "srcSize" : self.srcSize, "destSize" : self.destSize, "ext" : self.ext} return allocator class Mov2int(MediaOp): def __init__(self, dest, src1, src2 = 0, \ size = None, destSize = None, srcSize = None, ext = None): super(Mov2int, self).__init__(dest, src1,\ src2, size, destSize, srcSize, ext) code = ''' int items = sizeof(FloatRegBits) / srcSize; int offset = imm8; if (bits(src1, 0) && (ext & 0x1)) offset -= items; if (offset >= 0 && offset < items) { uint64_t fpSrcReg1 = bits(FpSrcReg1_uqw, (offset + 1) * srcSize * 8 - 1, (offset + 0) * srcSize * 8); DestReg = merge(0, fpSrcReg1, destSize); } else { DestReg = DestReg; } ''' class Mov2fp(MediaOp): def __init__(self, dest, src1, src2 = 0, \ size = None, destSize = None, srcSize = None, ext = None): super(Mov2fp, self).__init__(dest, src1,\ src2, size, destSize, srcSize, ext) code = ''' int items = sizeof(FloatRegBits) / destSize; int offset = imm8; if (bits(dest, 0) && (ext & 0x1)) offset -= items; if (offset >= 0 && offset < items) { uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); FpDestReg_uqw = insertBits(FpDestReg_uqw, (offset + 1) * destSize * 8 - 1, (offset + 0) * destSize * 8, srcReg1); } else { FpDestReg_uqw = FpDestReg_uqw; } ''' class Movsign(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): super(Movsign, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' int items = sizeof(FloatRegBits) / srcSize; uint64_t result = 0; int offset = (ext & 0x1) ? items : 0; for (int i = 0; i < items; i++) { uint64_t picked = bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1); result = insertBits(result, i + offset, i + offset, picked); } DestReg = DestReg | result; ''' class Maskmov(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); if (bits(FpSrcReg2_uqw, hiIndex)) result = insertBits(result, hiIndex, loIndex, arg1Bits); } FpDestReg_uqw = result; ''' class shuffle(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = sizeof(FloatRegBits) / size; int options; int optionBits; if (size == 8) { options = 2; optionBits = 1; } else { options = 4; optionBits = 2; } uint64_t result = 0; uint8_t sel = ext; for (int i = 0; i < items; i++) { uint64_t resBits; uint8_t lsel = sel & mask(optionBits); if (lsel * size >= sizeof(FloatRegBits)) { lsel -= options / 2; resBits = bits(FpSrcReg2_uqw, (lsel + 1) * sizeBits - 1, (lsel + 0) * sizeBits); } else { resBits = bits(FpSrcReg1_uqw, (lsel + 1) * sizeBits - 1, (lsel + 0) * sizeBits); } sel >>= optionBits; int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Unpack(MediaOp): code = ''' assert(srcSize == destSize); int size = destSize; int items = (sizeof(FloatRegBits) / size) / 2; int offset = ext ? items : 0; uint64_t result = 0; for (int i = 0; i < items; i++) { uint64_t pickedLow = bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1, (i + offset) * 8 * size); result = insertBits(result, (2 * i + 1) * 8 * size - 1, (2 * i + 0) * 8 * size, pickedLow); uint64_t pickedHigh = bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1, (i + offset) * 8 * size); result = insertBits(result, (2 * i + 2) * 8 * size - 1, (2 * i + 1) * 8 * size, pickedHigh); } FpDestReg_uqw = result; ''' class Pack(MediaOp): code = ''' assert(srcSize == destSize * 2); int items = (sizeof(FloatRegBits) / destSize); int destBits = destSize * 8; int srcBits = srcSize * 8; uint64_t result = 0; int i; for (i = 0; i < items / 2; i++) { uint64_t picked = bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1, (i + 0) * srcBits); unsigned signBit = bits(picked, srcBits - 1); uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); // Handle saturation. if (signBit) { if (overflow != mask(destBits - srcBits + 1)) { if (signedOp()) picked = (ULL(1) << (destBits - 1)); else picked = 0; } } else { if (overflow != 0) { if (signedOp()) picked = mask(destBits - 1); else picked = mask(destBits); } } result = insertBits(result, (i + 1) * destBits - 1, (i + 0) * destBits, picked); } for (;i < items; i++) { uint64_t picked = bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1, (i - items + 0) * srcBits); unsigned signBit = bits(picked, srcBits - 1); uint64_t overflow = bits(picked, srcBits - 1, destBits - 1); // Handle saturation. if (signBit) { if (overflow != mask(destBits - srcBits + 1)) { if (signedOp()) picked = (ULL(1) << (destBits - 1)); else picked = 0; } } else { if (overflow != 0) { if (signedOp()) picked = mask(destBits - 1); else picked = mask(destBits); } } result = insertBits(result, (i + 1) * destBits - 1, (i + 0) * destBits, picked); } FpDestReg_uqw = result; ''' class Mxor(MediaOp): def __init__(self, dest, src1, src2): super(Mxor, self).__init__(dest, src1, src2, 1) code = ''' FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw; ''' class Mor(MediaOp): def __init__(self, dest, src1, src2): super(Mor, self).__init__(dest, src1, src2, 1) code = ''' FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw; ''' class Mand(MediaOp): def __init__(self, dest, src1, src2): super(Mand, self).__init__(dest, src1, src2, 1) code = ''' FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw; ''' class Mandn(MediaOp): def __init__(self, dest, src1, src2): super(Mandn, self).__init__(dest, src1, src2, 1) code = ''' FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw; ''' class Mminf(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { double arg1, arg2; int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); if (size == 4) { floatInt fi; fi.i = arg1Bits; arg1 = fi.f; fi.i = arg2Bits; arg2 = fi.f; } else { doubleInt di; di.i = arg1Bits; arg1 = di.d; di.i = arg2Bits; arg2 = di.d; } if (arg1 < arg2) { result = insertBits(result, hiIndex, loIndex, arg1Bits); } else { result = insertBits(result, hiIndex, loIndex, arg2Bits); } } FpDestReg_uqw = result; ''' class Mmaxf(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { double arg1, arg2; int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); if (size == 4) { floatInt fi; fi.i = arg1Bits; arg1 = fi.f; fi.i = arg2Bits; arg2 = fi.f; } else { doubleInt di; di.i = arg1Bits; arg1 = di.d; di.i = arg2Bits; arg2 = di.d; } if (arg1 > arg2) { result = insertBits(result, hiIndex, loIndex, arg1Bits); } else { result = insertBits(result, hiIndex, loIndex, arg2Bits); } } FpDestReg_uqw = result; ''' class Mmini(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); int64_t arg1 = arg1Bits | (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); int64_t arg2 = arg2Bits | (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); uint64_t resBits; if (signedOp()) { if (arg1 < arg2) { resBits = arg1Bits; } else { resBits = arg2Bits; } } else { if (arg1Bits < arg2Bits) { resBits = arg1Bits; } else { resBits = arg2Bits; } } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Mmaxi(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); int64_t arg1 = arg1Bits | (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); int64_t arg2 = arg2Bits | (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); uint64_t resBits; if (signedOp()) { if (arg1 > arg2) { resBits = arg1Bits; } else { resBits = arg2Bits; } } else { if (arg1Bits > arg2Bits) { resBits = arg1Bits; } else { resBits = arg2Bits; } } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Msqrt(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): super(Msqrt, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); if (size == 4) { floatInt fi; fi.i = argBits; fi.f = sqrt(fi.f); argBits = fi.i; } else { doubleInt di; di.i = argBits; di.d = sqrt(di.d); argBits = di.i; } result = insertBits(result, hiIndex, loIndex, argBits); } FpDestReg_uqw = result; ''' # compute approximate reciprocal --- single-precision only class Mrcp(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): super(Mrcp, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' union floatInt { float f; uint32_t i; }; assert(srcSize == 4); // ISA defines single-precision only assert(srcSize == destSize); const int size = 4; const int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); floatInt fi; fi.i = argBits; // This is more accuracy than HW provides, but oh well fi.f = 1.0 / fi.f; argBits = fi.i; result = insertBits(result, hiIndex, loIndex, argBits); } FpDestReg_uqw = result; ''' class Maddf(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits; if (size == 4) { floatInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.f = arg1.f + arg2.f; resBits = res.i; } else { doubleInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.d = arg1.d + arg2.d; resBits = res.i; } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Msubf(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits; if (size == 4) { floatInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.f = arg1.f - arg2.f; resBits = res.i; } else { doubleInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.d = arg1.d - arg2.d; resBits = res.i; } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Mmulf(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits; if (size == 4) { floatInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.f = arg1.f * arg2.f; resBits = res.i; } else { doubleInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.d = arg1.d * arg2.d; resBits = res.i; } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Mdivf(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; assert(srcSize == 4 || srcSize == 8); int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits; if (size == 4) { floatInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.f = arg1.f / arg2.f; resBits = res.i; } else { doubleInt arg1, arg2, res; arg1.i = arg1Bits; arg2.i = arg2Bits; res.d = arg1.d / arg2.d; resBits = res.i; } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Maddi(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits = arg1Bits + arg2Bits; if (ext & 0x2) { if (signedOp()) { int arg1Sign = bits(arg1Bits, sizeBits - 1); int arg2Sign = bits(arg2Bits, sizeBits - 1); int resSign = bits(resBits, sizeBits - 1); if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { if (resSign == 0) resBits = (ULL(1) << (sizeBits - 1)); else resBits = mask(sizeBits - 1); } } else { if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) resBits = mask(sizeBits); } } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Msubi(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits = arg1Bits - arg2Bits; if (ext & 0x2) { if (signedOp()) { int arg1Sign = bits(arg1Bits, sizeBits - 1); int arg2Sign = !bits(arg2Bits, sizeBits - 1); int resSign = bits(resBits, sizeBits - 1); if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { if (resSign == 0) resBits = (ULL(1) << (sizeBits - 1)); else resBits = mask(sizeBits - 1); } } else { if (arg2Bits > arg1Bits) { resBits = 0; } else if (!findCarry(sizeBits, resBits, arg1Bits, ~arg2Bits)) { resBits = mask(sizeBits); } } } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Mmuli(MediaOp): code = ''' int srcBits = srcSize * 8; int destBits = destSize * 8; assert(destBits <= 64); assert(destSize >= srcSize); int items = numItems(destSize); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int offset = 0; if (ext & 16) { if (ext & 32) offset = i * (destBits - srcBits); else offset = i * (destBits - srcBits) + srcBits; } int srcHiIndex = (i + 1) * srcBits - 1 + offset; int srcLoIndex = (i + 0) * srcBits + offset; uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex); uint64_t resBits; if (signedOp()) { int64_t arg1 = arg1Bits | (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); int64_t arg2 = arg2Bits | (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); resBits = (uint64_t)(arg1 * arg2); } else { resBits = arg1Bits * arg2Bits; } if (ext & 0x4) resBits += (ULL(1) << (destBits - 1)); if (multHi()) resBits >>= destBits; int destHiIndex = (i + 1) * destBits - 1; int destLoIndex = (i + 0) * destBits; result = insertBits(result, destHiIndex, destLoIndex, resBits); } FpDestReg_uqw = result; ''' class Mavg(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Msad(MediaOp): code = ''' int srcBits = srcSize * 8; int items = sizeof(FloatRegBits) / srcSize; uint64_t sum = 0; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * srcBits - 1; int loIndex = (i + 0) * srcBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); int64_t resBits = arg1Bits - arg2Bits; if (resBits < 0) resBits = -resBits; sum += resBits; } FpDestReg_uqw = sum & mask(destSize * 8); ''' class Msrl(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t shiftAmt = op2_uqw; uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t resBits; if (shiftAmt >= sizeBits) { resBits = 0; } else { resBits = (arg1Bits >> shiftAmt) & mask(sizeBits - shiftAmt); } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Msra(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t shiftAmt = op2_uqw; uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t resBits; if (shiftAmt >= sizeBits) { if (bits(arg1Bits, sizeBits - 1)) resBits = mask(sizeBits); else resBits = 0; } else { resBits = (arg1Bits >> shiftAmt); resBits = resBits | (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Msll(MediaOp): code = ''' assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t shiftAmt = op2_uqw; uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t resBits; if (shiftAmt >= sizeBits) { resBits = 0; } else { resBits = (arg1Bits << shiftAmt); } result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Cvtf2i(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): super(Cvtf2i, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(destSize == 4 || destSize == 8); assert(srcSize == 4 || srcSize == 8); int srcSizeBits = srcSize * 8; int destSizeBits = destSize * 8; int items; int srcStart = 0; int destStart = 0; if (srcSize == 2 * destSize) { items = numItems(srcSize); if (ext & 0x2) destStart = destSizeBits * items; } else if (destSize == 2 * srcSize) { items = numItems(destSize); if (ext & 0x2) srcStart = srcSizeBits * items; } else { items = numItems(destSize); } uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; int srcLoIndex = srcStart + (i + 0) * srcSizeBits; uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); double arg; if (srcSize == 4) { floatInt fi; fi.i = argBits; arg = fi.f; } else { doubleInt di; di.i = argBits; arg = di.d; } if (ext & 0x4) { if (arg >= 0) arg += 0.5; else arg -= 0.5; } if (destSize == 4) { int32_t i_arg = (int32_t)arg; argBits = *((uint32_t*)&i_arg); } else { int64_t i_arg = (int64_t)arg; argBits = *((uint64_t*)&i_arg); } int destHiIndex = destStart + (i + 1) * destSizeBits - 1; int destLoIndex = destStart + (i + 0) * destSizeBits; result = insertBits(result, destHiIndex, destLoIndex, argBits); } FpDestReg_uqw = result; ''' class Cvti2f(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): super(Cvti2f, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(destSize == 4 || destSize == 8); assert(srcSize == 4 || srcSize == 8); int srcSizeBits = srcSize * 8; int destSizeBits = destSize * 8; int items; int srcStart = 0; int destStart = 0; if (srcSize == 2 * destSize) { items = numItems(srcSize); if (ext & 0x2) destStart = destSizeBits * items; } else if (destSize == 2 * srcSize) { items = numItems(destSize); if (ext & 0x2) srcStart = srcSizeBits * items; } else { items = numItems(destSize); } uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; int srcLoIndex = srcStart + (i + 0) * srcSizeBits; uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); int64_t sArg = argBits | (0 - (argBits & (ULL(1) << (srcSizeBits - 1)))); double arg = sArg; if (destSize == 4) { floatInt fi; fi.f = arg; argBits = fi.i; } else { doubleInt di; di.d = arg; argBits = di.i; } int destHiIndex = destStart + (i + 1) * destSizeBits - 1; int destLoIndex = destStart + (i + 0) * destSizeBits; result = insertBits(result, destHiIndex, destLoIndex, argBits); } FpDestReg_uqw = result; ''' class Cvtf2f(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): super(Cvtf2f, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(destSize == 4 || destSize == 8); assert(srcSize == 4 || srcSize == 8); int srcSizeBits = srcSize * 8; int destSizeBits = destSize * 8; int items; int srcStart = 0; int destStart = 0; if (srcSize == 2 * destSize) { items = numItems(srcSize); if (ext & 0x2) destStart = destSizeBits * items; } else if (destSize == 2 * srcSize) { items = numItems(destSize); if (ext & 0x2) srcStart = srcSizeBits * items; } else { items = numItems(destSize); } uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; int srcLoIndex = srcStart + (i + 0) * srcSizeBits; uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex); double arg; if (srcSize == 4) { floatInt fi; fi.i = argBits; arg = fi.f; } else { doubleInt di; di.i = argBits; arg = di.d; } if (destSize == 4) { floatInt fi; fi.f = arg; argBits = fi.i; } else { doubleInt di; di.d = arg; argBits = di.i; } int destHiIndex = destStart + (i + 1) * destSizeBits - 1; int destLoIndex = destStart + (i + 0) * destSizeBits; result = insertBits(result, destHiIndex, destLoIndex, argBits); } FpDestReg_uqw = result; ''' class Mcmpi2r(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); int64_t arg1 = arg1Bits | (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); int64_t arg2 = arg2Bits | (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); uint64_t resBits = 0; if (((ext & 0x2) == 0 && arg1 == arg2) || ((ext & 0x2) == 0x2 && arg1 > arg2)) resBits = mask(sizeBits); result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Mcmpf2r(MediaOp): code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); int size = srcSize; int sizeBits = size * 8; int items = numItems(size); uint64_t result = FpDestReg_uqw; for (int i = 0; i < items; i++) { int hiIndex = (i + 1) * sizeBits - 1; int loIndex = (i + 0) * sizeBits; uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex); double arg1, arg2; if (size == 4) { floatInt fi; fi.i = arg1Bits; arg1 = fi.f; fi.i = arg2Bits; arg2 = fi.f; } else { doubleInt di; di.i = arg1Bits; arg1 = di.d; di.i = arg2Bits; arg2 = di.d; } uint64_t resBits = 0; bool nanop = std::isnan(arg1) || std::isnan(arg2); switch (ext & mask(3)) { case 0: if (arg1 == arg2 && !nanop) resBits = mask(sizeBits); break; case 1: if (arg1 < arg2 && !nanop) resBits = mask(sizeBits); break; case 2: if (arg1 <= arg2 && !nanop) resBits = mask(sizeBits); break; case 3: if (nanop) resBits = mask(sizeBits); break; case 4: if (arg1 != arg2 || nanop) resBits = mask(sizeBits); break; case 5: if (!(arg1 < arg2) || nanop) resBits = mask(sizeBits); break; case 6: if (!(arg1 <= arg2) || nanop) resBits = mask(sizeBits); break; case 7: if (!nanop) resBits = mask(sizeBits); break; }; result = insertBits(result, hiIndex, loIndex, resBits); } FpDestReg_uqw = result; ''' class Mcmpf2rf(MediaOp): def __init__(self, src1, src2,\ size = None, destSize = None, srcSize = None, ext = None): super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ src2, size, destSize, srcSize, ext) code = ''' union floatInt { float f; uint32_t i; }; union doubleInt { double d; uint64_t i; }; assert(srcSize == destSize); assert(srcSize == 4 || srcSize == 8); int size = srcSize; int sizeBits = size * 8; double arg1, arg2; uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0); uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0); if (size == 4) { floatInt fi; fi.i = arg1Bits; arg1 = fi.f; fi.i = arg2Bits; arg2 = fi.f; } else { doubleInt di; di.i = arg1Bits; arg1 = di.d; di.i = arg2Bits; arg2 = di.d; } // ZF PF CF // Unordered 1 1 1 // Greater than 0 0 0 // Less than 0 0 1 // Equal 1 0 0 // OF = SF = AF = 0 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit); cfofBits = cfofBits & ~(OFBit | CFBit); if (std::isnan(arg1) || std::isnan(arg2)) { ccFlagBits = ccFlagBits | (ZFBit | PFBit); cfofBits = cfofBits | CFBit; } else if(arg1 < arg2) cfofBits = cfofBits | CFBit; else if(arg1 == arg2) ccFlagBits = ccFlagBits | ZFBit; ''' class Emms(MediaOp): def __init__(self): super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)', 'InstRegIndex(0)', 'InstRegIndex(0)', 2) code = 'FTW = 0xFFFF;' }};