summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/arch/arm/isa/insts/neon.isa38
-rw-r--r--src/arch/arm/isa/templates/neon.isa26
2 files changed, 38 insertions, 26 deletions
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa
index 1568b755b..790c9c3a1 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -1718,7 +1718,7 @@ let {{
destElem = (srcElem1 >> shiftAmt);
}
// Make sure the right shift sign extended when it should.
- if (srcElem1 < 0 && destElem >= 0) {
+ if (ltz(srcElem1) && !ltz(destElem)) {
destElem |= -((Element)1 << (sizeof(Element) * 8 -
1 - shiftAmt));
}
@@ -1740,7 +1740,7 @@ let {{
Element rBit = 0;
if (shiftAmt <= sizeof(Element) * 8)
rBit = bits(srcElem1, shiftAmt - 1);
- if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
+ if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
rBit = 1;
if (shiftAmt >= sizeof(Element) * 8) {
shiftAmt = sizeof(Element) * 8 - 1;
@@ -1749,7 +1749,7 @@ let {{
destElem = (srcElem1 >> shiftAmt);
}
// Make sure the right shift sign extended when it should.
- if (srcElem1 < 0 && destElem >= 0) {
+ if (ltz(srcElem1) && !ltz(destElem)) {
destElem |= -((Element)1 << (sizeof(Element) * 8 -
1 - shiftAmt));
}
@@ -1778,11 +1778,6 @@ let {{
} else {
destElem = (srcElem1 >> shiftAmt);
}
- // Make sure the right shift sign extended when it should.
- if (srcElem1 < 0 && destElem >= 0) {
- destElem |= -((Element)1 << (sizeof(Element) * 8 -
- 1 - shiftAmt));
- }
} else if (shiftAmt > 0) {
if (shiftAmt >= sizeof(Element) * 8) {
if (srcElem1 != 0) {
@@ -1862,19 +1857,12 @@ let {{
Element rBit = 0;
if (shiftAmt <= sizeof(Element) * 8)
rBit = bits(srcElem1, shiftAmt - 1);
- if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
- rBit = 1;
if (shiftAmt >= sizeof(Element) * 8) {
shiftAmt = sizeof(Element) * 8 - 1;
destElem = 0;
} else {
destElem = (srcElem1 >> shiftAmt);
}
- // Make sure the right shift sign extended when it should.
- if (srcElem1 < 0 && destElem >= 0) {
- destElem |= -((Element)1 << (sizeof(Element) * 8 -
- 1 - shiftAmt));
- }
destElem += rBit;
} else {
if (shiftAmt >= sizeof(Element) * 8) {
@@ -2014,10 +2002,10 @@ let {{
midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
fpscr.qc = 1;
}
- bool negPreDest = (destElem < 0);
+ bool negPreDest = ltz(destElem);
destElem += midElem;
- bool negDest = (destElem < 0);
- bool negMid = (midElem < 0);
+ bool negDest = ltz(destElem);
+ bool negMid = ltz(midElem);
if (negPreDest == negMid && negMid != negDest) {
destElem = mask(sizeof(BigElement) * 8 - 1);
if (negPreDest)
@@ -2039,10 +2027,10 @@ let {{
midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
fpscr.qc = 1;
}
- bool negPreDest = (destElem < 0);
+ bool negPreDest = ltz(destElem);
destElem -= midElem;
- bool negDest = (destElem < 0);
- bool posMid = (midElem > 0);
+ bool negDest = ltz(destElem);
+ bool posMid = ltz((BigElement)-midElem);
if (negPreDest == posMid && posMid != negDest) {
destElem = mask(sizeof(BigElement) * 8 - 1);
if (negPreDest)
@@ -2361,7 +2349,7 @@ let {{
vshrCode = '''
if (imm >= sizeof(srcElem1) * 8) {
- if (srcElem1 < 0)
+ if (ltz(srcElem1))
destElem = -1;
else
destElem = 0;
@@ -2375,10 +2363,10 @@ let {{
vsraCode = '''
Element mid;;
if (imm >= sizeof(srcElem1) * 8) {
- mid = (srcElem1 < 0) ? -1 : 0;
+ mid = ltz(srcElem1) ? -1 : 0;
} else {
mid = srcElem1 >> imm;
- if (srcElem1 < 0 && mid >= 0) {
+ if (ltz(srcElem1) && !ltz(mid)) {
mid |= -(mid & ((Element)1 <<
(sizeof(Element) * 8 - 1 - imm)));
}
@@ -2686,8 +2674,6 @@ let {{
} else {
if (srcElem1 != (Element)srcElem1) {
destElem = mask(sizeof(Element) * 8 - 1);
- if (srcElem1 < 0)
- destElem = ~destElem;
fpscr.qc = 1;
} else {
destElem = srcElem1;
diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa
index e402979dc..20c1d26b8 100644
--- a/src/arch/arm/isa/templates/neon.isa
+++ b/src/arch/arm/isa/templates/neon.isa
@@ -142,6 +142,32 @@ def template NeonExecDeclare {{
%(CPU_exec_context)s *, Trace::InstRecord *) const;
}};
+output header {{
+ template <class T>
+ // Implement a less-than-zero function: ltz()
+ // this function exists because some versions of GCC complain when a
+ // comparison is done between a unsigned variable and 0 and for GCC 4.2
+ // there is no way to disable this warning
+ inline bool ltz(T t);
+
+ template <>
+ inline bool ltz(uint8_t) { return false; }
+ template <>
+ inline bool ltz(uint16_t) { return false; }
+ template <>
+ inline bool ltz(uint32_t) { return false; }
+ template <>
+ inline bool ltz(uint64_t) { return false; }
+ template <>
+ inline bool ltz(int8_t v) { return v < 0; }
+ template <>
+ inline bool ltz(int16_t v) { return v < 0; }
+ template <>
+ inline bool ltz(int32_t v) { return v < 0; }
+ template <>
+ inline bool ltz(int64_t v) { return v < 0; }
+}};
+
def template NeonEqualRegExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,