Add count_leading_ones function

Add the following function in `src/util.hpp`: template<typename N> inline unsigned int count_leading_ones(N n); This function returns the quantity of leading `1` bits in `n`. Conflicts: src/util.hpp
2014-03-18 00:11:50 +00:00 · 2014-03-18 00:11:50 +00:00 · c86b905fef
commit c86b905fef
parent c2f6072b9b
2 changed files with 229 additions and 0 deletions
--- a/src/tests/test_util.cpp
+++ b/src/tests/test_util.cpp
@ -19,6 +19,8 @@
 #include "serialization/string_utils.hpp"
 #include "util.hpp"

+#include <boost/cstdint.hpp>
+
 BOOST_AUTO_TEST_SUITE( util )

 BOOST_AUTO_TEST_CASE( test_lexical_cast )
@ -80,6 +82,19 @@ BOOST_AUTO_TEST_CASE( test_lowercase )
 	BOOST_CHECK_EQUAL ( utils::lowercase("FOO") , "foo" );
 }

+BOOST_AUTO_TEST_CASE( test_count_leading_ones )
+{
+	BOOST_CHECK( count_leading_ones(0) == 0 );
+	BOOST_CHECK( count_leading_ones(1) == 0 );
+	BOOST_CHECK( count_leading_ones((boost::uint8_t) 0xFF) == 8 );
+	BOOST_CHECK( count_leading_ones((boost::uint16_t) 0xFFFF) == 16 );
+	BOOST_CHECK( count_leading_ones((boost::uint32_t) 0xFFFFFFFF) == 32 );
+	BOOST_CHECK( count_leading_ones((boost::uint64_t) 0xFFFFFFFFFFFFFFFF)
+		== 64 );
+	BOOST_CHECK( count_leading_ones((boost::uint8_t) 0xF8) == 5 );
+	BOOST_CHECK( count_leading_ones((boost::uint16_t) 54321) == 2 );
+}
+
 /* vim: set ts=4 sw=4: */

 BOOST_AUTO_TEST_SUITE_END()
--- a/src/util.hpp
+++ b/src/util.hpp
@ -22,6 +22,8 @@

 #include "global.hpp"
 #include <cmath>
+#include <cstddef>
+#include <limits>
 #include <math.h> // cmath may not provide round()
 #include <vector>
 #include <sstream>
@ -209,6 +211,218 @@ bool in_ranges(const Cmp c, const std::vector<std::pair<Cmp, Cmp> >&ranges) {
 inline bool chars_equal_insensitive(char a, char b) { return tolower(a) == tolower(b); }
 inline bool chars_less_insensitive(char a, char b) { return tolower(a) < tolower(b); }

+/**
+ * Returns the size, in bits, of an instance of type `T`, providing a
+ * convenient and self-documenting name for the underlying expression:
+ *
+ *     sizeof(T) * std::numeric_limits<unsigned char>::digits
+ *
+ * @tparam T The return value is the size, in bits, of an instance of this
+ * type.
+ *
+ * @returns the size, in bits, of an instance of type `T`.
+ */
+template<typename T>
+inline std::size_t bit_width() {
+	return sizeof(T) * std::numeric_limits<unsigned char>::digits;
+}
+
+/**
+ * Returns the size, in bits, of `x`, providing a convenient and
+ * self-documenting name for the underlying expression:
+ *
+ *     sizeof(x) * std::numeric_limits<unsigned char>::digits
+ *
+ * @tparam T The type of `x`.
+ *
+ * @param x The return value is the size, in bits, of this object.
+ *
+ * @returns the size, in bits, of an instance of type `T`.
+ */
+template<typename T>
+inline std::size_t bit_width(const T& x) {
+	return sizeof(x) * std::numeric_limits<unsigned char>::digits;
+}
+
+/**
+ * Returns the quantity of `1` bits in `n` — i.e., `n`’s population count.
+ *
+ * Algorithm adapted from:
+ * <https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan>
+ *
+ * This algorithm was chosen for relative simplicity, not for speed.
+ *
+ * @tparam N The type of `n`. This should be a fundamental integer type no
+ * greater than `UINT_MAX` bits in width; if it is not, the return value is
+ * undefined.
+ *
+ * @param n An integer upon which to operate.
+ *
+ * @returns the quantity of `1` bits in `n`, if `N` is a fundamental integer
+ * type.
+ */
+template<typename N>
+inline unsigned int count_ones(N n) {
+	unsigned int r = 0;
+	while (n) {
+		n &= n-1;
+		++r;
+	}
+	return r;
+}
+
+// Support functions for `count_leading_zeros`.
+#if defined(__GNUC__) || defined(__clang__)
+inline unsigned int count_leading_zeros_impl(
+		unsigned char n, std::size_t w) {
+	// Returns the result of the compiler built-in function, adjusted for
+	// the difference between the width, in bits, of the built-in
+	// function’s parameter’s type (which is `unsigned int`, at the
+	// smallest) and the width, in bits, of the input to this function, as
+	// specified at the call-site in `count_leading_zeros`.
+	return static_cast<unsigned int>(__builtin_clz(n))
+		- static_cast<unsigned int>(
+			bit_width<unsigned int>() - w);
+}
+inline unsigned int count_leading_zeros_impl(
+		unsigned short int n, std::size_t w) {
+	return static_cast<unsigned int>(__builtin_clz(n))
+		- static_cast<unsigned int>(
+			bit_width<unsigned int>() - w);
+}
+inline unsigned int count_leading_zeros_impl(
+		unsigned int n, std::size_t w) {
+	return static_cast<unsigned int>(__builtin_clz(n))
+		- static_cast<unsigned int>(
+			bit_width<unsigned int>() - w);
+}
+inline unsigned int count_leading_zeros_impl(
+		unsigned long int n, std::size_t w) {
+	return static_cast<unsigned int>(__builtin_clzl(n))
+		- static_cast<unsigned int>(
+			bit_width<unsigned long int>() - w);
+}
+inline unsigned int count_leading_zeros_impl(
+		unsigned long long int n, std::size_t w) {
+	return static_cast<unsigned int>(__builtin_clzll(n))
+		- static_cast<unsigned int>(
+			bit_width<unsigned long long int>() - w);
+}
+inline unsigned int count_leading_zeros_impl(
+		char n, std::size_t w) {
+	return count_leading_zeros_impl(
+		static_cast<unsigned char>(n), w);
+}
+inline unsigned int count_leading_zeros_impl(
+		signed char n, std::size_t w) {
+	return count_leading_zeros_impl(
+		static_cast<unsigned char>(n), w);
+}
+inline unsigned int count_leading_zeros_impl(
+		signed short int n, std::size_t w) {
+	return count_leading_zeros_impl(
+		static_cast<unsigned short int>(n), w);
+}
+inline unsigned int count_leading_zeros_impl(
+		signed int n, std::size_t w) {
+	return count_leading_zeros_impl(
+		static_cast<unsigned int>(n), w);
+}
+inline unsigned int count_leading_zeros_impl(
+		signed long int n, std::size_t w) {
+	return count_leading_zeros_impl(
+		static_cast<unsigned long int>(n), w);
+}
+inline unsigned int count_leading_zeros_impl(
+		signed long long int n, std::size_t w) {
+	return count_leading_zeros_impl(
+		static_cast<unsigned long long int>(n), w);
+}
+#else
+template<typename N>
+inline unsigned int count_leading_zeros_impl(N n, std::size_t w) {
+	// Algorithm adapted from:
+	// <http://aggregate.org/MAGIC/#Leading%20Zero%20Count>
+	for (unsigned int shift = 1; shift < w; shift *= 2) {
+		n |= (n >> shift);
+	}
+	return static_cast<unsigned int>(w) - count_ones(n);
+}
+#endif
+
+/**
+ * Returns the quantity of leading `0` bits in `n` — i.e., the quantity of
+ * bits in `n`, minus the 1-based bit index of the most significant `1` bit in
+ * `n`, or minus 0 if `n` is 0.
+ *
+ * @tparam N The type of `n`. This should be a fundamental integer type that
+ *  (a) is not wider than `unsigned long long int` (the GCC
+ *   count-leading-zeros built-in functions are defined for `unsigned int`,
+ *   `unsigned long int`, and `unsigned long long int`), and
+ *  (b) is no greater than `INT_MAX` bits in width (the GCC built-in functions
+ *   return instances of type `int`);
+ * if `N` does not satisfy these constraints, the return value is undefined.
+ *
+ * @param n An integer upon which to operate.
+ *
+ * @returns the quantity of leading `0` bits in `n`, if `N` satisfies the
+ * above constraints.
+ *
+ * @see count_leading_ones()
+ */
+template<typename N>
+inline unsigned int count_leading_zeros(N n) {
+#if defined(__GNUC__) || defined(__clang__)
+	// GCC’s `__builtin_clz` returns an undefined value when called with 0
+	// as argument.
+	// [<http://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html>]
+	if (n == 0) {
+		// Return the quantity of zero bits in `n` rather than
+		// returning that undefined value.
+		return static_cast<unsigned int>(bit_width(n));
+	}
+#endif
+	// Dispatch, on the static type of `n`, to one of the
+	// `count_leading_zeros_impl` functions.
+	return count_leading_zeros_impl(n, bit_width(n));
+	// The second argument to `count_leading_zeros_impl` specifies the
+	// width, in bits, of `n`.
+	//
+	// This is necessary because `n` may be widened (or, alas, shrunk),
+	// and thus the information of `n`’s true width may be lost.
+	//
+	// At least, this *was* necessary before there were so many overloads
+	// of `count_leading_zeros_impl`, but I’ve kept it anyway as an extra
+	// precautionary measure, that will (I hope) be optimized out.
+	//
+	// To be clear, `n` would only be shrunk in cases noted above as
+	// having an undefined result.
+}
+
+/**
+ * Returns the quantity of leading `1` bits in `n` — i.e., the quantity of
+ * bits in `n`, minus the 1-based bit index of the most significant `0` bit in
+ * `n`, or minus 0 if `n` contains no `0` bits.
+ *
+ * @tparam N The type of `n`. This should be a fundamental integer type that
+ *  (a) is not wider than `unsigned long long int`, and
+ *  (b) is no greater than `INT_MAX` bits in width;
+ * if `N` does not satisfy these constraints, the return value is undefined.
+ *
+ * @param n An integer upon which to operate.
+ *
+ * @returns the quantity of leading `1` bits in `n`, if `N` satisfies the
+ * above constraints.
+ *
+ * @see count_leading_zeros()
+ */
+template<typename N>
+inline unsigned int count_leading_ones(N n) {
+	// Explicitly specify the type for which to instantiate
+	// `count_leading_zeros` in case `~n` is of a different type.
+	return count_leading_zeros<N>(~n);
+}
+
 #ifdef __GNUC__
 #define LIKELY(a)    __builtin_expect((a),1) // Tells GCC to optimize code so that if is likely to happen
 #define UNLIKELY(a)  __builtin_expect((a),0) // Tells GCC to optimize code so that if is unlikely to happen