diff options
author | Andreas Sandberg <andreas@sandberg.pp.se> | 2013-09-30 12:00:20 +0200 |
---|---|---|
committer | Andreas Sandberg <andreas@sandberg.pp.se> | 2013-09-30 12:00:20 +0200 |
commit | 654d1e675a3dc1f598aeadb0824bdb3357820a59 (patch) | |
tree | e249108fcd6737dda985b993c95d926a33abca21 | |
parent | c299dcedc6d73aab56d9c659623d7112c2e9c4bb (diff) | |
download | gem5-654d1e675a3dc1f598aeadb0824bdb3357820a59.tar.xz |
x86: Add support for loading 32-bit and 80-bit floats in the x87
The x87 FPU supports three floating point formats: 32-bit, 64-bit, and
80-bit floats. The current gem5 implementation supports 32-bit and
64-bit floats, but only works correctly for 64-bit floats. This
changeset fixes the 32-bit float handling by correctly loading and
rounding (using truncation) 32-bit floats instead of simply truncating
the bit pattern.
80-bit floats are loaded by first loading the 80-bits of the float to
two temporary integer registers. A micro-op (cvtint_fp80) then
converts the contents of the two integer registers to the internal FP
representation (double). Similarly, when storing an 80-bit float,
there are two conversion routines (ctvfp80h_int and cvtfp80l_int) that
convert an internal FP register to 80-bit and stores the upper 64-bits
or lower 32-bits to an integer register, which is the written to
memory using normal integer stores.
-rw-r--r-- | src/arch/x86/isa/decoder/x87.isa | 8 | ||||
-rw-r--r-- | src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py | 43 | ||||
-rw-r--r-- | src/arch/x86/isa/microops/fpop.isa | 33 | ||||
-rw-r--r-- | src/arch/x86/isa/microops/ldstop.isa | 36 |
4 files changed, 106 insertions, 14 deletions
diff --git a/src/arch/x86/isa/decoder/x87.isa b/src/arch/x86/isa/decoder/x87.isa index c29e8a334..03ca8be07 100644 --- a/src/arch/x86/isa/decoder/x87.isa +++ b/src/arch/x86/isa/decoder/x87.isa @@ -67,11 +67,11 @@ format WarnUnimpl { 0x0: fnop(); default: Inst::UD2(); } - default: fst(); + default: Inst::FST(Ed); } 0x3: decode MODRM_MOD { 0x3: Inst::UD2(); - default: fstp(); + default: Inst::FSTP(Ed); } 0x4: decode MODRM_MOD { 0x3: decode MODRM_RM { @@ -189,7 +189,7 @@ format WarnUnimpl { 0x5: decode MODRM_MOD { 0x3: fucomi(); // 80-bit load - default: fld(); + default: Inst::FLD80(M); } 0x6: decode MODRM_MOD { 0x3: fcomi(); @@ -197,7 +197,7 @@ format WarnUnimpl { } 0x7: decode MODRM_MOD { 0x3: Inst::UD2(); - default: fstp(); + default: Inst::FST80P(M); } } //0x4: esc4(); diff --git a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py index 5a19aabd9..6f3a8d3a6 100644 --- a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py +++ b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py @@ -37,13 +37,13 @@ microcode = ''' def macroop FLD_M { - ldfp ufp1, seg, sib, disp + ldfp87 ufp1, seg, sib, disp movfp st(-1), ufp1, spm=-1 }; def macroop FLD_P { rdip t7 - ldfp ufp1, seg, riprel, disp + ldfp87 ufp1, seg, riprel, disp movfp st(-1), ufp1, spm=-1 }; @@ -51,17 +51,30 @@ def macroop FLD_R { movfp st(-1), sti, spm=-1 }; +def macroop FLD80_M { + ld t1, seg, sib, "DISPLACEMENT", dataSize=8 + ld t2, seg, sib, "DISPLACEMENT + 8", dataSize=2 + cvtint_fp80 st(-1), t1, t2, spm=-1 +}; + +def macroop FLD80_P { + rdip t7 + ld t1, seg, riprel, "DISPLACEMENT", dataSize=8 + ld t2, seg, riprel, "DISPLACEMENT + 8", dataSize=2 + cvtint_fp80 st(-1), t1, t2, spm=-1 +}; + def macroop FST_R { movfp sti, st(0) }; def macroop FST_M { - stfp st(0), seg, sib, disp + stfp87 st(0), seg, sib, disp }; def macroop FST_P { rdip t7 - stfp st(0), seg, riprel, disp + stfp87 st(0), seg, riprel, disp }; def macroop FSTP_R { @@ -70,14 +83,32 @@ def macroop FSTP_R { def macroop FSTP_M { movfp ufp1, st(0) - stfp ufp1, seg, sib, disp + stfp87 ufp1, seg, sib, disp pop87 }; def macroop FSTP_P { movfp ufp1, st(0) rdip t7 - stfp ufp1, seg, riprel, disp + stfp87 ufp1, seg, riprel, disp pop87 }; + +def macroop FST80P_M { + cvtfp80h_int t1, st(0) + cvtfp80l_int t2, st(0) + st t1, seg, sib, "DISPLACEMENT + 0", dataSize=8 + st t2, seg, sib, "DISPLACEMENT + 8", dataSize=2 + pop87 +}; + +def macroop FST80P_P { + rdip t7 + cvtfp80h_int t1, st(0) + cvtfp80l_int t2, st(0) + st t1, seg, riprel, "DISPLACEMENT + 0", dataSize=8 + st t2, seg, riprel, "DISPLACEMENT + 8", dataSize=2 + pop87 +}; + ''' diff --git a/src/arch/x86/isa/microops/fpop.isa b/src/arch/x86/isa/microops/fpop.isa index 8a77914d9..3c6753712 100644 --- a/src/arch/x86/isa/microops/fpop.isa +++ b/src/arch/x86/isa/microops/fpop.isa @@ -295,9 +295,10 @@ let {{ class ConvOp(FpBinaryOp): abstract = True op_class = 'FloatCvtOp' - def __init__(self, dest, src1): + def __init__(self, dest, src1, **kwargs): super(ConvOp, self).__init__(dest, src1, \ - "InstRegIndex(FLOATREG_MICROFP0)") + "InstRegIndex(FLOATREG_MICROFP0)", \ + **kwargs) # These probably shouldn't look at the ExtMachInst directly to figure # out what size to use and should instead delegate that to the macroop's @@ -324,6 +325,34 @@ let {{ SDestReg = merge(SDestReg, intSrcReg1, 4); ''' + # Convert two integers registers representing an 80-bit floating + # point number to an x87 register. + class cvtint_fp80(FpBinaryOp): + code = ''' + uint8_t bits[10]; + *(uint64_t *)(bits + 0) = SSrcReg1; + *(uint16_t *)(bits + 8) = (uint16_t)SSrcReg2; + FpDestReg = loadFloat80(bits); + ''' + + # Convert an x87 register (double) into extended precision and + # extract the highest 64 bits. + class cvtfp80h_int(ConvOp): + code = ''' + char bits[10]; + storeFloat80(bits, FpSrcReg1); + SDestReg = *(uint64_t *)(bits + 0); + ''' + + # Convert an x87 register (double) into extended precision and + # extract the lowest 16 bits. + class cvtfp80l_int(ConvOp): + code = ''' + char bits[10]; + storeFloat80(bits, FpSrcReg1); + SDestReg = *(uint16_t *)(bits + 8); + ''' + # These need to consider size at some point. They'll always use doubles # for the moment. class addfp(FpBinaryOp): diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 75519f417..1b22b88de 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -410,7 +410,22 @@ let {{ defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);', 'Data = Mem & mask(dataSize * 8);', '(StoreCheck << FlagShift) | Request::LOCKED') - defineMicroLoadOp('Ldfp', 'FpData_uqw = Mem;', big = False) + + defineMicroLoadOp('Ldfp', code='FpData_uqw = Mem', big = False) + + defineMicroLoadOp('Ldfp87', code=''' + switch (dataSize) + { + case 4: + FpData_df = *(float *)&Mem; + break; + case 8: + FpData_df = *(double *)&Mem; + break; + default: + panic("Unhandled data size in LdFp87.\\n"); + } + ''', big = False) def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0"): global header_output @@ -447,7 +462,24 @@ let {{ defineMicroStoreOp('St', 'Mem = pick(Data, 2, dataSize);') defineMicroStoreOp('Stul', 'Mem = pick(Data, 2, dataSize);', mem_flags="Request::LOCKED") - defineMicroStoreOp('Stfp', 'Mem = FpData_uqw;') + + defineMicroStoreOp('Stfp', code='Mem = FpData_uqw;') + + defineMicroStoreOp('Stfp87', code=''' + switch (dataSize) + { + case 4: { + float single(FpData_df); + Mem = *(uint32_t *)&single; + } break; + case 8: + Mem = *(uint64_t *)&FpData_df; + break; + default: + panic("Unhandled data size in StFp87.\\n"); + } + ''') + defineMicroStoreOp('Cda', 'Mem = 0;', mem_flags="Request::NO_ACCESS") iop = InstObjParams("lea", "Lea", 'X86ISA::LdStOp', |