Improve the multiply instruction.

It now keeps more resolution and is less prone to overflows (unless of course the result overflows). The code is aimed at an ARM v7 so might be slow on platforms that do not support an instruction for: 32-bit signed * 32-bit signed = 64-bit signed The alternative was to write code like for the division, but that would be more code and slower on platforms with fast 64-bit multiplication result. The output of the titlescreen image now looks less dark as before this modification.
2012-03-11 13:14:29 +00:00 · 2012-03-11 13:14:29 +00:00 · 6f21500ce0
commit 6f21500ce0
parent db4adda228
1 changed files with 26 additions and 1 deletions
--- a/src/floating_point_emulation.hpp
+++ b/src/floating_point_emulation.hpp
@ -732,7 +732,13 @@ public:
 	operator*=(const tfloat rhs)
 	{
 		value_ *= rhs.value_;
-		FLOATING_POINT_EMULATION_RANGE_CHECK;
+
+		/*
+		 * There is no need to check the range at this point. The specialised
+		 * version makes a short trip to 64-bit value, so overflowing is not
+		 * possible.
+		 */
+
 		detail::tscale<T, S>::down(value_);
 		FLOATING_POINT_EMULATION_RANGE_CHECK;
 		return *this;
@ -963,6 +969,25 @@ operator!=(const double lhs, const tfloat<T, S> rhs)

 /***** Mul *****/

+/**
+ * Multiply
+ *
+ * Specialised for the Sint32 with a shift of 8.
+ *
+ * Instead of figuring out the optimal shift before multiplying simply multiply
+ * as a 64-bit value and then perform the shift. This is rather cheap on the
+ * Pandora and also keeps the code short on that platform (only two extra
+ * instructions on the ARM v7; a `logical shift right' instruction followed by
+ * a `logical left shifted or' instruction.)
+ */
+template<>
+inline tfloat<Sint32, 8>&
+tfloat<Sint32, 8>::operator*=(const tfloat<Sint32, 8> rhs)
+{
+	value_ = (static_cast<Sint64>(value_) * rhs.value_) >> 8;
+	return *this;
+}
+
 template<class T, unsigned S>
 inline tfloat<T, S>
 operator*(tfloat<T, S> lhs, const tfloat<T, S> rhs)