summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Sandberg <andreas@sandberg.pp.se>2013-09-30 12:00:20 +0200
committerAndreas Sandberg <andreas@sandberg.pp.se>2013-09-30 12:00:20 +0200
commit654d1e675a3dc1f598aeadb0824bdb3357820a59 (patch)
treee249108fcd6737dda985b993c95d926a33abca21
parentc299dcedc6d73aab56d9c659623d7112c2e9c4bb (diff)
downloadgem5-654d1e675a3dc1f598aeadb0824bdb3357820a59.tar.xz
x86: Add support for loading 32-bit and 80-bit floats in the x87
The x87 FPU supports three floating point formats: 32-bit, 64-bit, and 80-bit floats. The current gem5 implementation supports 32-bit and 64-bit floats, but only works correctly for 64-bit floats. This changeset fixes the 32-bit float handling by correctly loading and rounding (using truncation) 32-bit floats instead of simply truncating the bit pattern. 80-bit floats are loaded by first loading the 80-bits of the float to two temporary integer registers. A micro-op (cvtint_fp80) then converts the contents of the two integer registers to the internal FP representation (double). Similarly, when storing an 80-bit float, there are two conversion routines (ctvfp80h_int and cvtfp80l_int) that convert an internal FP register to 80-bit and stores the upper 64-bits or lower 32-bits to an integer register, which is the written to memory using normal integer stores.
-rw-r--r--src/arch/x86/isa/decoder/x87.isa8
-rw-r--r--src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py43
-rw-r--r--src/arch/x86/isa/microops/fpop.isa33
-rw-r--r--src/arch/x86/isa/microops/ldstop.isa36
4 files changed, 106 insertions, 14 deletions
diff --git a/src/arch/x86/isa/decoder/x87.isa b/src/arch/x86/isa/decoder/x87.isa
index c29e8a334..03ca8be07 100644
--- a/src/arch/x86/isa/decoder/x87.isa
+++ b/src/arch/x86/isa/decoder/x87.isa
@@ -67,11 +67,11 @@ format WarnUnimpl {
0x0: fnop();
default: Inst::UD2();
}
- default: fst();
+ default: Inst::FST(Ed);
}
0x3: decode MODRM_MOD {
0x3: Inst::UD2();
- default: fstp();
+ default: Inst::FSTP(Ed);
}
0x4: decode MODRM_MOD {
0x3: decode MODRM_RM {
@@ -189,7 +189,7 @@ format WarnUnimpl {
0x5: decode MODRM_MOD {
0x3: fucomi();
// 80-bit load
- default: fld();
+ default: Inst::FLD80(M);
}
0x6: decode MODRM_MOD {
0x3: fcomi();
@@ -197,7 +197,7 @@ format WarnUnimpl {
}
0x7: decode MODRM_MOD {
0x3: Inst::UD2();
- default: fstp();
+ default: Inst::FST80P(M);
}
}
//0x4: esc4();
diff --git a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py
index 5a19aabd9..6f3a8d3a6 100644
--- a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py
+++ b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/load_or_store_floating_point.py
@@ -37,13 +37,13 @@
microcode = '''
def macroop FLD_M {
- ldfp ufp1, seg, sib, disp
+ ldfp87 ufp1, seg, sib, disp
movfp st(-1), ufp1, spm=-1
};
def macroop FLD_P {
rdip t7
- ldfp ufp1, seg, riprel, disp
+ ldfp87 ufp1, seg, riprel, disp
movfp st(-1), ufp1, spm=-1
};
@@ -51,17 +51,30 @@ def macroop FLD_R {
movfp st(-1), sti, spm=-1
};
+def macroop FLD80_M {
+ ld t1, seg, sib, "DISPLACEMENT", dataSize=8
+ ld t2, seg, sib, "DISPLACEMENT + 8", dataSize=2
+ cvtint_fp80 st(-1), t1, t2, spm=-1
+};
+
+def macroop FLD80_P {
+ rdip t7
+ ld t1, seg, riprel, "DISPLACEMENT", dataSize=8
+ ld t2, seg, riprel, "DISPLACEMENT + 8", dataSize=2
+ cvtint_fp80 st(-1), t1, t2, spm=-1
+};
+
def macroop FST_R {
movfp sti, st(0)
};
def macroop FST_M {
- stfp st(0), seg, sib, disp
+ stfp87 st(0), seg, sib, disp
};
def macroop FST_P {
rdip t7
- stfp st(0), seg, riprel, disp
+ stfp87 st(0), seg, riprel, disp
};
def macroop FSTP_R {
@@ -70,14 +83,32 @@ def macroop FSTP_R {
def macroop FSTP_M {
movfp ufp1, st(0)
- stfp ufp1, seg, sib, disp
+ stfp87 ufp1, seg, sib, disp
pop87
};
def macroop FSTP_P {
movfp ufp1, st(0)
rdip t7
- stfp ufp1, seg, riprel, disp
+ stfp87 ufp1, seg, riprel, disp
pop87
};
+
+def macroop FST80P_M {
+ cvtfp80h_int t1, st(0)
+ cvtfp80l_int t2, st(0)
+ st t1, seg, sib, "DISPLACEMENT + 0", dataSize=8
+ st t2, seg, sib, "DISPLACEMENT + 8", dataSize=2
+ pop87
+};
+
+def macroop FST80P_P {
+ rdip t7
+ cvtfp80h_int t1, st(0)
+ cvtfp80l_int t2, st(0)
+ st t1, seg, riprel, "DISPLACEMENT + 0", dataSize=8
+ st t2, seg, riprel, "DISPLACEMENT + 8", dataSize=2
+ pop87
+};
+
'''
diff --git a/src/arch/x86/isa/microops/fpop.isa b/src/arch/x86/isa/microops/fpop.isa
index 8a77914d9..3c6753712 100644
--- a/src/arch/x86/isa/microops/fpop.isa
+++ b/src/arch/x86/isa/microops/fpop.isa
@@ -295,9 +295,10 @@ let {{
class ConvOp(FpBinaryOp):
abstract = True
op_class = 'FloatCvtOp'
- def __init__(self, dest, src1):
+ def __init__(self, dest, src1, **kwargs):
super(ConvOp, self).__init__(dest, src1, \
- "InstRegIndex(FLOATREG_MICROFP0)")
+ "InstRegIndex(FLOATREG_MICROFP0)", \
+ **kwargs)
# These probably shouldn't look at the ExtMachInst directly to figure
# out what size to use and should instead delegate that to the macroop's
@@ -324,6 +325,34 @@ let {{
SDestReg = merge(SDestReg, intSrcReg1, 4);
'''
+ # Convert two integers registers representing an 80-bit floating
+ # point number to an x87 register.
+ class cvtint_fp80(FpBinaryOp):
+ code = '''
+ uint8_t bits[10];
+ *(uint64_t *)(bits + 0) = SSrcReg1;
+ *(uint16_t *)(bits + 8) = (uint16_t)SSrcReg2;
+ FpDestReg = loadFloat80(bits);
+ '''
+
+ # Convert an x87 register (double) into extended precision and
+ # extract the highest 64 bits.
+ class cvtfp80h_int(ConvOp):
+ code = '''
+ char bits[10];
+ storeFloat80(bits, FpSrcReg1);
+ SDestReg = *(uint64_t *)(bits + 0);
+ '''
+
+ # Convert an x87 register (double) into extended precision and
+ # extract the lowest 16 bits.
+ class cvtfp80l_int(ConvOp):
+ code = '''
+ char bits[10];
+ storeFloat80(bits, FpSrcReg1);
+ SDestReg = *(uint16_t *)(bits + 8);
+ '''
+
# These need to consider size at some point. They'll always use doubles
# for the moment.
class addfp(FpBinaryOp):
diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa
index 75519f417..1b22b88de 100644
--- a/src/arch/x86/isa/microops/ldstop.isa
+++ b/src/arch/x86/isa/microops/ldstop.isa
@@ -410,7 +410,22 @@ let {{
defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);',
'Data = Mem & mask(dataSize * 8);',
'(StoreCheck << FlagShift) | Request::LOCKED')
- defineMicroLoadOp('Ldfp', 'FpData_uqw = Mem;', big = False)
+
+ defineMicroLoadOp('Ldfp', code='FpData_uqw = Mem', big = False)
+
+ defineMicroLoadOp('Ldfp87', code='''
+ switch (dataSize)
+ {
+ case 4:
+ FpData_df = *(float *)&Mem;
+ break;
+ case 8:
+ FpData_df = *(double *)&Mem;
+ break;
+ default:
+ panic("Unhandled data size in LdFp87.\\n");
+ }
+ ''', big = False)
def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0"):
global header_output
@@ -447,7 +462,24 @@ let {{
defineMicroStoreOp('St', 'Mem = pick(Data, 2, dataSize);')
defineMicroStoreOp('Stul', 'Mem = pick(Data, 2, dataSize);',
mem_flags="Request::LOCKED")
- defineMicroStoreOp('Stfp', 'Mem = FpData_uqw;')
+
+ defineMicroStoreOp('Stfp', code='Mem = FpData_uqw;')
+
+ defineMicroStoreOp('Stfp87', code='''
+ switch (dataSize)
+ {
+ case 4: {
+ float single(FpData_df);
+ Mem = *(uint32_t *)&single;
+ } break;
+ case 8:
+ Mem = *(uint64_t *)&FpData_df;
+ break;
+ default:
+ panic("Unhandled data size in StFp87.\\n");
+ }
+ ''')
+
defineMicroStoreOp('Cda', 'Mem = 0;', mem_flags="Request::NO_ACCESS")
iop = InstObjParams("lea", "Lea", 'X86ISA::LdStOp',