diff options
-rw-r--r-- | src/arch/arm/isa/insts/neon.isa | 38 | ||||
-rw-r--r-- | src/arch/arm/isa/templates/neon.isa | 26 |
2 files changed, 38 insertions, 26 deletions
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 1568b755b..790c9c3a1 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1718,7 +1718,7 @@ let {{ destElem = (srcElem1 >> shiftAmt); } // Make sure the right shift sign extended when it should. - if (srcElem1 < 0 && destElem >= 0) { + if (ltz(srcElem1) && !ltz(destElem)) { destElem |= -((Element)1 << (sizeof(Element) * 8 - 1 - shiftAmt)); } @@ -1740,7 +1740,7 @@ let {{ Element rBit = 0; if (shiftAmt <= sizeof(Element) * 8) rBit = bits(srcElem1, shiftAmt - 1); - if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) + if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) rBit = 1; if (shiftAmt >= sizeof(Element) * 8) { shiftAmt = sizeof(Element) * 8 - 1; @@ -1749,7 +1749,7 @@ let {{ destElem = (srcElem1 >> shiftAmt); } // Make sure the right shift sign extended when it should. - if (srcElem1 < 0 && destElem >= 0) { + if (ltz(srcElem1) && !ltz(destElem)) { destElem |= -((Element)1 << (sizeof(Element) * 8 - 1 - shiftAmt)); } @@ -1778,11 +1778,6 @@ let {{ } else { destElem = (srcElem1 >> shiftAmt); } - // Make sure the right shift sign extended when it should. - if (srcElem1 < 0 && destElem >= 0) { - destElem |= -((Element)1 << (sizeof(Element) * 8 - - 1 - shiftAmt)); - } } else if (shiftAmt > 0) { if (shiftAmt >= sizeof(Element) * 8) { if (srcElem1 != 0) { @@ -1862,19 +1857,12 @@ let {{ Element rBit = 0; if (shiftAmt <= sizeof(Element) * 8) rBit = bits(srcElem1, shiftAmt - 1); - if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) - rBit = 1; if (shiftAmt >= sizeof(Element) * 8) { shiftAmt = sizeof(Element) * 8 - 1; destElem = 0; } else { destElem = (srcElem1 >> shiftAmt); } - // Make sure the right shift sign extended when it should. - if (srcElem1 < 0 && destElem >= 0) { - destElem |= -((Element)1 << (sizeof(Element) * 8 - - 1 - shiftAmt)); - } destElem += rBit; } else { if (shiftAmt >= sizeof(Element) * 8) { @@ -2014,10 +2002,10 @@ let {{ midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); fpscr.qc = 1; } - bool negPreDest = (destElem < 0); + bool negPreDest = ltz(destElem); destElem += midElem; - bool negDest = (destElem < 0); - bool negMid = (midElem < 0); + bool negDest = ltz(destElem); + bool negMid = ltz(midElem); if (negPreDest == negMid && negMid != negDest) { destElem = mask(sizeof(BigElement) * 8 - 1); if (negPreDest) @@ -2039,10 +2027,10 @@ let {{ midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); fpscr.qc = 1; } - bool negPreDest = (destElem < 0); + bool negPreDest = ltz(destElem); destElem -= midElem; - bool negDest = (destElem < 0); - bool posMid = (midElem > 0); + bool negDest = ltz(destElem); + bool posMid = ltz((BigElement)-midElem); if (negPreDest == posMid && posMid != negDest) { destElem = mask(sizeof(BigElement) * 8 - 1); if (negPreDest) @@ -2361,7 +2349,7 @@ let {{ vshrCode = ''' if (imm >= sizeof(srcElem1) * 8) { - if (srcElem1 < 0) + if (ltz(srcElem1)) destElem = -1; else destElem = 0; @@ -2375,10 +2363,10 @@ let {{ vsraCode = ''' Element mid;; if (imm >= sizeof(srcElem1) * 8) { - mid = (srcElem1 < 0) ? -1 : 0; + mid = ltz(srcElem1) ? -1 : 0; } else { mid = srcElem1 >> imm; - if (srcElem1 < 0 && mid >= 0) { + if (ltz(srcElem1) && !ltz(mid)) { mid |= -(mid & ((Element)1 << (sizeof(Element) * 8 - 1 - imm))); } @@ -2686,8 +2674,6 @@ let {{ } else { if (srcElem1 != (Element)srcElem1) { destElem = mask(sizeof(Element) * 8 - 1); - if (srcElem1 < 0) - destElem = ~destElem; fpscr.qc = 1; } else { destElem = srcElem1; diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa index e402979dc..20c1d26b8 100644 --- a/src/arch/arm/isa/templates/neon.isa +++ b/src/arch/arm/isa/templates/neon.isa @@ -142,6 +142,32 @@ def template NeonExecDeclare {{ %(CPU_exec_context)s *, Trace::InstRecord *) const; }}; +output header {{ + template <class T> + // Implement a less-than-zero function: ltz() + // this function exists because some versions of GCC complain when a + // comparison is done between a unsigned variable and 0 and for GCC 4.2 + // there is no way to disable this warning + inline bool ltz(T t); + + template <> + inline bool ltz(uint8_t) { return false; } + template <> + inline bool ltz(uint16_t) { return false; } + template <> + inline bool ltz(uint32_t) { return false; } + template <> + inline bool ltz(uint64_t) { return false; } + template <> + inline bool ltz(int8_t v) { return v < 0; } + template <> + inline bool ltz(int16_t v) { return v < 0; } + template <> + inline bool ltz(int32_t v) { return v < 0; } + template <> + inline bool ltz(int64_t v) { return v < 0; } +}}; + def template NeonEqualRegExecute {{ template <class Element> Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, |