// -*- mode:c++ -*-

// Copyright (c) 2010-2014 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
// not be construed as granting a license to any other intellectual
// property including but not limited to intellectual property relating
// to a hardware implementation of the functionality of the software
// licensed hereunder.  You may use the software subject to the license
// terms below provided that you ensure that this notice is replicated
// unmodified and in its entirety in all distributions of the software,
// modified or unmodified, in source code or in binary form.
//
// Copyright (c) 2007-2008 The Florida State University
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Stephen Hines
//          Gabe Black

////////////////////////////////////////////////////////////////////
//
// Load/store microops
//

let {{
    microLdrUopCode = "IWRa = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
    microLdrUopIop = InstObjParams('ldr_uop', 'MicroLdrUop',
                                   'MicroMemOp',
                                   {'memacc_code': microLdrUopCode,
                                    'ea_code': 'EA = URb + (up ? imm : -imm);',
                                    'predicate_test': predicateTest},
                                   ['IsMicroop'])

    microLdr2UopCode = '''
                        uint64_t data = Mem_ud;
                        Dest = cSwap((uint32_t) data, ((CPSR)Cpsr).e);
                        Dest2 = cSwap((uint32_t) (data >> 32), ((CPSR)Cpsr).e);
                        '''
    microLdr2UopIop = InstObjParams('ldr2_uop', 'MicroLdr2Uop',
                                   'MicroMemPairOp',
                                   {'memacc_code': microLdr2UopCode,
                                    'ea_code': 'EA = URb + (up ? imm : -imm);',
                                    'predicate_test': predicateTest},
                                   ['IsMicroop'])

    microLdrFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
    microLdrFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrFpUop',
                                      'MicroMemOp',
                                      {'memacc_code': microLdrFpUopCode,
                                       'ea_code': vfpEnabledCheckCode +
                                           'EA = URb + (up ? imm : -imm);',
                                       'predicate_test': predicateTest},
                                      ['IsMicroop'])

    microLdrDBFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
    microLdrDBFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDBFpUop',
                                      'MicroMemOp',
                                      {'memacc_code': microLdrFpUopCode,
                                       'ea_code': vfpEnabledCheckCode + '''
                                        EA = URb + (up ? imm : -imm) +
                                             (((CPSR)Cpsr).e ? 4 : 0);
                                        ''',
                                       'predicate_test': predicateTest},
                                      ['IsMicroop'])

    microLdrDTFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
    microLdrDTFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDTFpUop',
                                      'MicroMemOp',
                                      {'memacc_code': microLdrFpUopCode,
                                       'ea_code': vfpEnabledCheckCode + '''
                                        EA = URb + (up ? imm : -imm) -
                                             (((CPSR)Cpsr).e ? 4 : 0);
                                        ''',
                                       'predicate_test': predicateTest},
                                      ['IsMicroop'])

    microRetUopCode = '''
        CPSR old_cpsr = Cpsr;
        SCTLR sctlr = Sctlr;

        CPSR new_cpsr =
            cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true,
                             sctlr.nmfi, xc->tcBase());
        Cpsr = ~CondCodesMask & new_cpsr;
        CondCodesNZ = new_cpsr.nz;
        CondCodesC = new_cpsr.c;
        CondCodesV = new_cpsr.v;
        CondCodesGE = new_cpsr.ge;
        IWNPC = cSwap(%s, old_cpsr.e) | ((Spsr & 0x20) ? 1 : 0);
        NextItState = ((((CPSR)Spsr).it2 << 2) & 0xFC)
                | (((CPSR)Spsr).it1 & 0x3);
        SevMailbox = 1;
    '''

    microLdrRetUopIop = InstObjParams('ldr_ret_uop', 'MicroLdrRetUop',
                                      'MicroMemOp',
                                      {'memacc_code':
                                          microRetUopCode % 'Mem_uw',
                                       'ea_code':
                                          'EA = URb + (up ? imm : -imm);',
                                       'predicate_test': condPredicateTest},
                                      ['IsMicroop','IsNonSpeculative',
                                       'IsSerializeAfter', 'IsSquashAfter'])

    microStrUopCode = "Mem = cSwap(URa_uw, ((CPSR)Cpsr).e);"
    microStrUopIop = InstObjParams('str_uop', 'MicroStrUop',
                                   'MicroMemOp',
                                   {'memacc_code': microStrUopCode,
                                    'postacc_code': "",
                                    'ea_code': 'EA = URb + (up ? imm : -imm);',
                                    'predicate_test': predicateTest},
                                   ['IsMicroop'])

    microStrFpUopCode = "Mem = cSwap(Fa_uw, ((CPSR)Cpsr).e);"
    microStrFpUopIop = InstObjParams('strfp_uop', 'MicroStrFpUop',
                                     'MicroMemOp',
                                     {'memacc_code': microStrFpUopCode,
                                      'postacc_code': "",
                                      'ea_code': vfpEnabledCheckCode +
                                           'EA = URb + (up ? imm : -imm);',
                                      'predicate_test': predicateTest},
                                     ['IsMicroop'])

    microStrDBFpUopCode = "Mem = cSwap(Fa_uw, ((CPSR)Cpsr).e);"
    microStrDBFpUopIop = InstObjParams('strfp_uop', 'MicroStrDBFpUop',
                                       'MicroMemOp',
                                       {'memacc_code': microStrFpUopCode,
                                        'postacc_code': "",
                                        'ea_code': vfpEnabledCheckCode + '''
                                         EA = URb + (up ? imm : -imm) +
                                              (((CPSR)Cpsr).e ? 4 : 0);
                                         ''',
                                        'predicate_test': predicateTest},
                                       ['IsMicroop'])

    microStrDTFpUopCode = "Mem = cSwap(Fa_uw, ((CPSR)Cpsr).e);"
    microStrDTFpUopIop = InstObjParams('strfp_uop', 'MicroStrDTFpUop',
                                       'MicroMemOp',
                                       {'memacc_code': microStrFpUopCode,
                                        'postacc_code': "",
                                        'ea_code': vfpEnabledCheckCode + '''
                                         EA = URb + (up ? imm : -imm) -
                                              (((CPSR)Cpsr).e ? 4 : 0);
                                         ''',
                                        'predicate_test': predicateTest},
                                       ['IsMicroop'])

    header_output = decoder_output = exec_output = ''

    loadIops = (microLdrUopIop, microLdrRetUopIop,
                microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop)
    storeIops = (microStrUopIop, microStrFpUopIop,
                 microStrDBFpUopIop, microStrDTFpUopIop)
    for iop in loadIops + storeIops:
        header_output += MicroMemDeclare.subst(iop)
        decoder_output += MicroMemConstructor.subst(iop)
    for iop in loadIops:
        exec_output += LoadExecute.subst(iop) + \
                       LoadInitiateAcc.subst(iop) + \
                       LoadCompleteAcc.subst(iop)
    for iop in storeIops:
        exec_output += StoreExecute.subst(iop) + \
                       StoreInitiateAcc.subst(iop) + \
                       StoreCompleteAcc.subst(iop)

    header_output += MicroMemPairDeclare.subst(microLdr2UopIop)
    decoder_output += MicroMemPairConstructor.subst(microLdr2UopIop)
    exec_output += LoadExecute.subst(microLdr2UopIop) + \
                   LoadInitiateAcc.subst(microLdr2UopIop) + \
                   LoadCompleteAcc.subst(microLdr2UopIop)
}};

let {{
    exec_output = header_output = ''

    eaCode = 'EA = XURa + imm;'

    for size in (1, 2, 3, 4, 6, 8, 12, 16):
        # Set up the memory access.
        regs = (size + 3) // 4
        subst = { "size" : size, "regs" : regs }
        memDecl = '''
        union MemUnion {
            uint8_t bytes[%(size)d];
            Element elements[%(size)d / sizeof(Element)];
            uint32_t floatRegBits[%(regs)d];
        };
        ''' % subst

        # Do endian conversion for all the elements.
        convCode = '''
            const unsigned eCount = sizeof(memUnion.elements) /
                                    sizeof(memUnion.elements[0]);
            if (((CPSR)Cpsr).e) {
                for (unsigned i = 0; i < eCount; i++) {
                    memUnion.elements[i] = gtobe(memUnion.elements[i]);
                }
            } else {
                for (unsigned i = 0; i < eCount; i++) {
                    memUnion.elements[i] = gtole(memUnion.elements[i]);
                }
            }
        '''

        # Offload everything into registers
        regSetCode = ''
        for reg in range(regs):
            mask = ''
            if reg == regs - 1:
                mask = ' & mask(%d)' % (32 - 8 * (regs * 4 - size))
            regSetCode += '''
            FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d])%(mask)s;
            ''' % { "reg" : reg, "mask" : mask }

        # Pull everything in from registers
        regGetCode = ''
        for reg in range(regs):
            regGetCode += '''
            memUnion.floatRegBits[%(reg)d] = htog(FpDestP%(reg)d_uw);
            ''' % { "reg" : reg }

        loadMemAccCode = convCode + regSetCode
        storeMemAccCode = regGetCode + convCode

        loadIop = InstObjParams('ldrneon%(size)d_uop' % subst,
                                'MicroLdrNeon%(size)dUop' % subst,
                                'MicroNeonMemOp',
                                { 'mem_decl' : memDecl,
                                  'size' : size,
                                  'memacc_code' : loadMemAccCode,
                                  'ea_code' : simdEnabledCheckCode + eaCode,
                                  'predicate_test' : predicateTest },
                                [ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
        storeIop = InstObjParams('strneon%(size)d_uop' % subst,
                                 'MicroStrNeon%(size)dUop' % subst,
                                 'MicroNeonMemOp',
                                 { 'mem_decl' : memDecl,
                                   'size' : size,
                                   'memacc_code' : storeMemAccCode,
                                   'ea_code' : simdEnabledCheckCode + eaCode,
                                   'predicate_test' : predicateTest },
                                 [ 'IsMicroop', 'IsMemRef', 'IsStore' ])

        exec_output += NeonLoadExecute.subst(loadIop) + \
                       NeonLoadInitiateAcc.subst(loadIop) + \
                       NeonLoadCompleteAcc.subst(loadIop) + \
                       NeonStoreExecute.subst(storeIop) + \
                       NeonStoreInitiateAcc.subst(storeIop) + \
                       NeonStoreCompleteAcc.subst(storeIop)
        header_output += MicroNeonMemDeclare.subst(loadIop) + \
                         MicroNeonMemDeclare.subst(storeIop)
}};

let {{
    exec_output = ''
    for eSize, type in (1, 'uint8_t'), \
                       (2, 'uint16_t'), \
                       (4, 'uint32_t'), \
                       (8, 'uint64_t'):
        size = eSize
        # An instruction handles no more than 16 bytes and no more than
        # 4 elements, or the number of elements needed to fill 8 or 16 bytes.
        sizes = set((16, 8))
        for count in 1, 2, 3, 4:
            size = count * eSize
            if size <= 16:
                sizes.add(size)
        for size in sizes:
            substDict = {
                "class_name" : "MicroLdrNeon%dUop" % size,
                "targs" : type
            }
            exec_output += MicroNeonMemExecDeclare.subst(substDict)
            substDict["class_name"] = "MicroStrNeon%dUop" % size
            exec_output += MicroNeonMemExecDeclare.subst(substDict)
            size += eSize
}};

////////////////////////////////////////////////////////////////////
//
// Neon (de)interlacing microops
//

let {{
    header_output = exec_output = ''
    for dRegs in (2, 3, 4):
        loadConv = ''
        unloadConv = ''
        for dReg in range(dRegs):
            loadConv += '''
                conv1.cRegs[%(sReg0)d] = htog(FpOp1P%(sReg0)d_uw);
                conv1.cRegs[%(sReg1)d] = htog(FpOp1P%(sReg1)d_uw);
            ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
            unloadConv += '''
                FpDestS%(dReg)dP0_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
                FpDestS%(dReg)dP1_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
            ''' % { "dReg" : dReg }
        microDeintNeonCode = '''
            const unsigned dRegs = %(dRegs)d;
            const unsigned regs = 2 * dRegs;
            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
                                     sizeof(Element);
            union convStruct {
                FloatRegBits cRegs[regs];
                Element elements[dRegs * perDReg];
            } conv1, conv2;

            %(loadConv)s

            unsigned srcElem = 0;
            for (unsigned destOffset = 0;
                    destOffset < perDReg; destOffset++) {
                for (unsigned dReg = 0; dReg < dRegs; dReg++) {
                    conv2.elements[dReg * perDReg + destOffset] =
                        conv1.elements[srcElem++];
                }
            }

            %(unloadConv)s
        ''' % { "dRegs" : dRegs,
                "loadConv" : loadConv,
                "unloadConv" : unloadConv }
        microDeintNeonIop = \
            InstObjParams('deintneon%duop' % (dRegs * 2),
                          'MicroDeintNeon%dUop' % (dRegs * 2),
                          'MicroNeonMixOp',
                          { 'predicate_test': predicateTest,
                            'code' : microDeintNeonCode },
                            ['IsMicroop'])
        header_output += MicroNeonMixDeclare.subst(microDeintNeonIop)
        exec_output += MicroNeonMixExecute.subst(microDeintNeonIop)

        loadConv = ''
        unloadConv = ''
        for dReg in range(dRegs):
            loadConv += '''
                conv1.cRegs[2 * %(dReg)d + 0] = htog(FpOp1S%(dReg)dP0_uw);
                conv1.cRegs[2 * %(dReg)d + 1] = htog(FpOp1S%(dReg)dP1_uw);
            ''' % { "dReg" : dReg }
            unloadConv += '''
                FpDestP%(sReg0)d_uw = gtoh(conv2.cRegs[%(sReg0)d]);
                FpDestP%(sReg1)d_uw = gtoh(conv2.cRegs[%(sReg1)d]);
            ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
        microInterNeonCode = '''
            const unsigned dRegs = %(dRegs)d;
            const unsigned regs = 2 * dRegs;
            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
                                     sizeof(Element);
            union convStruct {
                FloatRegBits cRegs[regs];
                Element elements[dRegs * perDReg];
            } conv1, conv2;

            %(loadConv)s

            unsigned destElem = 0;
            for (unsigned srcOffset = 0;
                    srcOffset < perDReg; srcOffset++) {
                for (unsigned dReg = 0; dReg < dRegs; dReg++) {
                    conv2.elements[destElem++] =
                        conv1.elements[dReg * perDReg + srcOffset];
                }
            }

            %(unloadConv)s
        ''' % { "dRegs" : dRegs,
                "loadConv" : loadConv,
                "unloadConv" : unloadConv }
        microInterNeonIop = \
            InstObjParams('interneon%duop' % (dRegs * 2),
                          'MicroInterNeon%dUop' % (dRegs * 2),
                          'MicroNeonMixOp',
                          { 'predicate_test': predicateTest,
                            'code' : microInterNeonCode },
                            ['IsMicroop'])
        header_output += MicroNeonMixDeclare.subst(microInterNeonIop)
        exec_output += MicroNeonMixExecute.subst(microInterNeonIop)
}};

let {{
    exec_output = ''
    for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
        for dRegs in (2, 3, 4):
            Name = "MicroDeintNeon%dUop" % (dRegs * 2)
            substDict = { "class_name" : Name, "targs" : type }
            exec_output += MicroNeonExecDeclare.subst(substDict)
            Name = "MicroInterNeon%dUop" % (dRegs * 2)
            substDict = { "class_name" : Name, "targs" : type }
            exec_output += MicroNeonExecDeclare.subst(substDict)
}};

////////////////////////////////////////////////////////////////////
//
// Neon microops to pack/unpack a single lane
//

let {{
    header_output = exec_output = ''
    for sRegs in 1, 2:
        baseLoadRegs = ''
        for reg in range(sRegs):
            baseLoadRegs += '''
                sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d_uw);
                sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d_uw);
            ''' % { "reg0" : (2 * reg + 0),
                    "reg1" : (2 * reg + 1) }
        for dRegs in range(sRegs, 5):
            unloadRegs = ''
            loadRegs = baseLoadRegs
            for reg in range(dRegs):
                loadRegs += '''
                    destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0_uw);
                    destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1_uw);
                ''' % { "reg" : reg }
                unloadRegs += '''
                    FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
                    FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
                ''' % { "reg" : reg }
            microUnpackNeonCode = '''
            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
                                     sizeof(Element);

            union SourceRegs {
                FloatRegBits fRegs[2 * %(sRegs)d];
                Element elements[%(sRegs)d * perDReg];
            } sourceRegs;

            union DestReg {
                FloatRegBits fRegs[2];
                Element elements[perDReg];
            } destRegs[%(dRegs)d];

            %(loadRegs)s

            for (unsigned i = 0; i < %(dRegs)d; i++) {
                destRegs[i].elements[lane] = sourceRegs.elements[i];
            }

            %(unloadRegs)s
            ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
                    "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }

            microUnpackNeonIop = \
                InstObjParams('unpackneon%dto%duop' % (sRegs * 2, dRegs * 2),
                              'MicroUnpackNeon%dto%dUop' %
                                    (sRegs * 2, dRegs * 2),
                              'MicroNeonMixLaneOp',
                              { 'predicate_test': predicateTest,
                                'code' : microUnpackNeonCode },
                                ['IsMicroop'])
            header_output += MicroNeonMixLaneDeclare.subst(microUnpackNeonIop)
            exec_output += MicroNeonMixExecute.subst(microUnpackNeonIop)

    for sRegs in 1, 2:
        loadRegs = ''
        for reg in range(sRegs):
            loadRegs += '''
                sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d_uw);
                sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d_uw);
            ''' % { "reg0" : (2 * reg + 0),
                    "reg1" : (2 * reg + 1) }
        for dRegs in range(sRegs, 5):
            unloadRegs = ''
            for reg in range(dRegs):
                unloadRegs += '''
                    FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
                    FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
                ''' % { "reg" : reg }
            microUnpackAllNeonCode = '''
            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
                                     sizeof(Element);

            union SourceRegs {
                FloatRegBits fRegs[2 * %(sRegs)d];
                Element elements[%(sRegs)d * perDReg];
            } sourceRegs;

            union DestReg {
                FloatRegBits fRegs[2];
                Element elements[perDReg];
            } destRegs[%(dRegs)d];

            %(loadRegs)s

            for (unsigned i = 0; i < %(dRegs)d; i++) {
                for (unsigned j = 0; j < perDReg; j++)
                    destRegs[i].elements[j] = sourceRegs.elements[i];
            }

            %(unloadRegs)s
            ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
                    "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }

            microUnpackAllNeonIop = \
                InstObjParams('unpackallneon%dto%duop' % (sRegs * 2, dRegs * 2),
                              'MicroUnpackAllNeon%dto%dUop' %
                                    (sRegs * 2, dRegs * 2),
                              'MicroNeonMixOp',
                              { 'predicate_test': predicateTest,
                                'code' : microUnpackAllNeonCode },
                                ['IsMicroop'])
            header_output += MicroNeonMixDeclare.subst(microUnpackAllNeonIop)
            exec_output += MicroNeonMixExecute.subst(microUnpackAllNeonIop)

    for dRegs in 1, 2:
        unloadRegs = ''
        for reg in range(dRegs):
            unloadRegs += '''
                FpDestP%(reg0)d_uw = gtoh(destRegs.fRegs[%(reg0)d]);
                FpDestP%(reg1)d_uw = gtoh(destRegs.fRegs[%(reg1)d]);
            ''' % { "reg0" : (2 * reg + 0),
                    "reg1" : (2 * reg + 1) }
        for sRegs in range(dRegs, 5):
            loadRegs = ''
            for reg in range(sRegs):
                loadRegs += '''
                    sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0_uw);
                    sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1_uw);
                ''' % { "reg" : reg }
            microPackNeonCode = '''
            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
                                     sizeof(Element);

            union SourceReg {
                FloatRegBits fRegs[2];
                Element elements[perDReg];
            } sourceRegs[%(sRegs)d];

            union DestRegs {
                FloatRegBits fRegs[2 * %(dRegs)d];
                Element elements[%(dRegs)d * perDReg];
            } destRegs;

            %(loadRegs)s

            for (unsigned i = 0; i < %(sRegs)d; i++) {
                destRegs.elements[i] = sourceRegs[i].elements[lane];
            }
            for (unsigned i = %(sRegs)d; i < %(dRegs)d * perDReg; ++i) {
                destRegs.elements[i] = 0;
            }

            %(unloadRegs)s
            ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
                    "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }

            microPackNeonIop = \
                InstObjParams('packneon%dto%duop' % (sRegs * 2, dRegs * 2),
                              'MicroPackNeon%dto%dUop' %
                                    (sRegs * 2, dRegs * 2),
                              'MicroNeonMixLaneOp',
                              { 'predicate_test': predicateTest,
                                'code' : microPackNeonCode },
                                ['IsMicroop'])
            header_output += MicroNeonMixLaneDeclare.subst(microPackNeonIop)
            exec_output += MicroNeonMixExecute.subst(microPackNeonIop)
}};

let {{
    exec_output = ''
    for typeSize in (8, 16, 32):
        for sRegs in 1, 2:
            for dRegs in range(sRegs, min(sRegs * 64 / typeSize + 1, 5)):
                for format in ("MicroUnpackNeon%(sRegs)dto%(dRegs)dUop",
                               "MicroUnpackAllNeon%(sRegs)dto%(dRegs)dUop",
                               "MicroPackNeon%(dRegs)dto%(sRegs)dUop"):
                    Name = format % { "sRegs" : sRegs * 2,
                                      "dRegs" : dRegs * 2 }
                    substDict = { "class_name" : Name,
                                  "targs" : "uint%d_t" % typeSize }
                    exec_output += MicroNeonExecDeclare.subst(substDict)
}};

////////////////////////////////////////////////////////////////////
//
// Integer = Integer op Immediate microops
//

let {{
    microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop',
                                    'MicroIntImmOp',
                                    {'code': 'URa = URb + imm;',
                                     'predicate_test': predicateTest},
                                    ['IsMicroop'])

    microAddUopCode = '''
        URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
    '''

    microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop',
                                     'MicroIntImmXOp',
                                     'XURa = XURb + imm;',
                                     ['IsMicroop'])

    microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop',
                                            'MicroIntImmXOp', '''
        if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) &&
            SPAlignmentCheckEnabled(xc->tcBase())) {
            return std::make_shared<SPAlignmentFault>();
        }
        XURa = XURb + imm;
    ''', ['IsMicroop'])

    microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop',
                                        'MicroIntRegXOp',
                                        'XURa = XURb + ' + \
                                            'extendReg64(XURc, type, shiftAmt, 64);',
                                        ['IsMicroop'])

    microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
                                   'MicroIntRegOp',
                                   {'code': microAddUopCode,
                                    'predicate_test': pickPredicate(microAddUopCode)},
                                   ['IsMicroop'])

    microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop',
                                    'MicroIntImmOp',
                                    {'code': 'URa = URb - imm;',
                                     'predicate_test': predicateTest},
                                    ['IsMicroop'])

    microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop',
                                     'MicroIntImmXOp',
                                     'XURa = XURb - imm;',
                                     ['IsMicroop'])

    microSubUopCode = '''
        URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
    '''
    microSubUopIop = InstObjParams('sub_uop', 'MicroSubUop',
                                   'MicroIntRegOp',
                                   {'code': microSubUopCode,
                                    'predicate_test': pickPredicate(microSubUopCode)},
                                   ['IsMicroop'])

    microUopRegMovIop = InstObjParams('uopReg_uop', 'MicroUopRegMov',
                                   'MicroIntMov',
                                   {'code': 'IWRa = URb;',
                                    'predicate_test': predicateTest},
                                   ['IsMicroop'])

    microUopRegMovRetIop = InstObjParams('movret_uop', 'MicroUopRegMovRet',
                                      'MicroIntMov',
                                     {'code': microRetUopCode % 'URb',
                                      'predicate_test': predicateTest},
                                     ['IsMicroop', 'IsNonSpeculative',
                                      'IsSerializeAfter', 'IsSquashAfter'])

    setPCCPSRDecl = '''
                    CPSR cpsrOrCondCodes = URc;
                    SCTLR sctlr = Sctlr;
                    pNPC = URa;
                    CPSR new_cpsr =
                    cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr,
                                     0xF, true, sctlr.nmfi, xc->tcBase());
                    Cpsr = ~CondCodesMask & new_cpsr;
                    NextThumb = new_cpsr.t;
                    NextJazelle = new_cpsr.j;
                    NextItState = ((((CPSR)URb).it2 << 2) & 0xFC)
                                    | (((CPSR)URb).it1 & 0x3);
                    CondCodesNZ = new_cpsr.nz;
                    CondCodesC = new_cpsr.c;
                    CondCodesV = new_cpsr.v;
                    CondCodesGE = new_cpsr.ge;
                    '''

    microUopSetPCCPSRIop = InstObjParams('uopSet_uop', 'MicroUopSetPCCPSR',
                                         'MicroSetPCCPSR',
                                         {'code': setPCCPSRDecl,
                                          'predicate_test': predicateTest},
                                         ['IsMicroop'])

    header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
                    MicroIntImmDeclare.subst(microAddXiUopIop) + \
                    MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \
                    MicroIntImmDeclare.subst(microSubiUopIop) + \
                    MicroIntImmDeclare.subst(microSubXiUopIop) + \
                    MicroIntRegDeclare.subst(microAddUopIop) + \
                    MicroIntRegDeclare.subst(microSubUopIop) + \
                    MicroIntXERegDeclare.subst(microAddXERegUopIop) + \
                    MicroIntMovDeclare.subst(microUopRegMovIop) + \
                    MicroIntMovDeclare.subst(microUopRegMovRetIop) + \
                    MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop)

    decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
                     MicroIntImmXConstructor.subst(microAddXiUopIop) + \
                     MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \
                     MicroIntImmConstructor.subst(microSubiUopIop) + \
                     MicroIntImmXConstructor.subst(microSubXiUopIop) + \
                     MicroIntRegConstructor.subst(microAddUopIop) + \
                     MicroIntRegConstructor.subst(microSubUopIop) + \
                     MicroIntXERegConstructor.subst(microAddXERegUopIop) + \
                     MicroIntMovConstructor.subst(microUopRegMovIop) + \
                     MicroIntMovConstructor.subst(microUopRegMovRetIop) + \
                     MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop)

    exec_output = PredOpExecute.subst(microAddiUopIop) + \
                  BasicExecute.subst(microAddXiUopIop) + \
                  BasicExecute.subst(microAddXiSpAlignUopIop) + \
                  PredOpExecute.subst(microSubiUopIop) + \
                  BasicExecute.subst(microSubXiUopIop) + \
                  PredOpExecute.subst(microAddUopIop) + \
                  PredOpExecute.subst(microSubUopIop) + \
                  BasicExecute.subst(microAddXERegUopIop) + \
                  PredOpExecute.subst(microUopRegMovIop) + \
                  PredOpExecute.subst(microUopRegMovRetIop) + \
                  PredOpExecute.subst(microUopSetPCCPSRIop)

}};

let {{
    iop = InstObjParams("ldmstm", "LdmStm", 'MacroMemOp', "", [])
    header_output = MacroMemDeclare.subst(iop)
    decoder_output = MacroMemConstructor.subst(iop)

    iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", [])
    header_output += PairMemDeclare.subst(iop)
    decoder_output += PairMemConstructor.subst(iop)

    iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "")
    iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "")
    iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "")
    for iop in (iopImm, iopPre, iopPost):
        header_output += BigFpMemImmDeclare.subst(iop)
        decoder_output += BigFpMemImmConstructor.subst(iop)

    iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "")
    header_output += BigFpMemRegDeclare.subst(iop)
    decoder_output += BigFpMemRegConstructor.subst(iop)

    iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "")
    header_output += BigFpMemLitDeclare.subst(iop)
    decoder_output += BigFpMemLitConstructor.subst(iop)

    iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
    header_output += VMemMultDeclare.subst(iop)
    decoder_output += VMemMultConstructor.subst(iop)

    iop = InstObjParams("vldsingle", "VldSingle", 'VldSingleOp', "", [])
    header_output += VMemSingleDeclare.subst(iop)
    decoder_output += VMemSingleConstructor.subst(iop)

    iop = InstObjParams("vstmult", "VstMult", 'VstMultOp', "", [])
    header_output += VMemMultDeclare.subst(iop)
    decoder_output += VMemMultConstructor.subst(iop)

    iop = InstObjParams("vstsingle", "VstSingle", 'VstSingleOp', "", [])
    header_output += VMemSingleDeclare.subst(iop)
    decoder_output += VMemSingleConstructor.subst(iop)

    vfpIop = InstObjParams("vldmstm", "VLdmStm", 'MacroVFPMemOp', "", [])
    header_output += MacroVFPMemDeclare.subst(vfpIop)
    decoder_output += MacroVFPMemConstructor.subst(vfpIop)
}};