Some progress on testing and implementing the IBMFloat.

2026-06-27 19:00:53 +08:00 · 2015-03-10 19:52:06 +01:00
parent 1779f11b44
commit 52e2967ec2
4 changed files with 385 additions and 25 deletions
@@ -0,0 +1,48 @@
+def lfu_cache(maxsize=100):
+    '''Least-frequently-used cache decorator.
+
+    Arguments to the cached function must be hashable.
+    Cache performance statistics stored in f.hits and f.misses.
+    Clear the cache with f.clear().
+    http://en.wikipedia.org/wiki/Least_Frequently_Used
+
+    '''
+    def decorating_function(user_function):
+        cache = {}                      # mapping of args to results
+        use_count = Counter()           # times each key has been accessed
+        kwd_mark = object()             # separate positional and keyword args
+
+        @functools.wraps(user_function)
+        def wrapper(*args, **kwds):
+            key = args
+            if kwds:
+                key += (kwd_mark,) + tuple(sorted(kwds.items()))
+            use_count[key] += 1
+
+            # get cache entry or compute if not found
+            try:
+                result = cache[key]
+                wrapper.hits += 1
+            except KeyError:
+                result = user_function(*args, **kwds)
+                cache[key] = result
+                wrapper.misses += 1
+
+                # purge least frequently used cache entry
+                if len(cache) > maxsize:
+                    for key, _ in nsmallest(maxsize // 10,
+                                            use_count.iteritems(),
+                                            key=itemgetter(1)):
+                        del cache[key], use_count[key]
+
+            return result
+
+        def clear():
+            cache.clear()
+            use_count.clear()
+            wrapper.hits = wrapper.misses = 0
+
+        wrapper.hits = wrapper.misses = 0
+        wrapper.clear = clear
+        return wrapper
+    return decorating_function
@@ -1,17 +1,31 @@
-from math import frexp, isnan, isinf
+from math import frexp, isnan, isinf, ceil, floor, trunc
+from numbers import Real

 from segpy.portability import long_int, byte_string, four_bytes

+
+
+IBM_ZERO_BYTES = b'\x00\x00\x00\x00'
+IBM_NEGATIVE_ONE_BYTES = b'\xc1\x10\x00\x00'
+IBM_POSITIVE_ONE_BYTES = b'A\x10\x00\x00'
+
 MIN_IBM_FLOAT = -7.2370051459731155e+75
 LARGEST_NEGATIVE_NORMAL_IBM_FLOAT = -5.397605346934028e-79
 SMALLEST_POSITIVE_NORMAL_IBM_FLOAT = 5.397605346934028e-79
 MAX_IBM_FLOAT = 7.2370051459731155e+75

-IBM_FLOAT32_MAX_BITS_PRECISION = 24
-IBM_FLOAT32_MIN_BITS_PRECISION = 21  # The first 3 bits of the mantissa may be zero
-IBM_FLOAT32_EPSILON = pow(2.0, -(IBM_FLOAT32_MIN_BITS_PRECISION - 1))
-_L24 = long_int(2) ** IBM_FLOAT32_MAX_BITS_PRECISION
-_F24 = float(pow(2, IBM_FLOAT32_MAX_BITS_PRECISION))
+MAX_BITS_PRECISION_IBM_FLOAT = 24
+MIN_BITS_PRECISION_IBM_FLOAT = 21  # The first 3 bits of the mantissa may be zero
+EPSILON_IBM_FLOAT = pow(2.0, -(MIN_BITS_PRECISION_IBM_FLOAT - 1))
+_L24 = long_int(2) ** MAX_BITS_PRECISION_IBM_FLOAT
+_F24 = float(pow(2, MAX_BITS_PRECISION_IBM_FLOAT))
+
+_L21 = long_int(2) ** MIN_BITS_PRECISION_IBM_FLOAT
+
+EXPONENT_BIAS = 64
+
+MIN_EXACT_INTEGER_IBM_FLOAT = -2**MAX_BITS_PRECISION_IBM_FLOAT
+MAX_EXACT_INTEGER_IBM_FLOAT = 2**MIN_BITS_PRECISION_IBM_FLOAT


 def ibm2ieee(big_endian_bytes):
@@ -29,12 +43,52 @@ def ibm2ieee(big_endian_bytes):
        return 0.0

    sign = -1 if (a & 0x80) else 1
-    exponent = a & 0x7f
+    exponent_16_biased = a & 0x7f
    mantissa = ((b << 16) | (c << 8) | d) / _F24

-    value = sign * mantissa * pow(16, exponent - 64)
+    value = sign * mantissa * pow(16, exponent_16_biased - EXPONENT_BIAS)
    return value

+BITS_PER_NYBBLE = 4
+
+
+def truncate(big_endian_bytes):
+    a, b, c, d = four_bytes(big_endian_bytes)
+
+    sign = -1 if (a & 0x80) else 1
+    exponent_16_biased = a & 0x7f
+    exponent_16 = exponent_16_biased - EXPONENT_BIAS
+    mantissa = ((b << 16) | (c << 8) | d)
+
+    print("sign =", sign)
+    print("exponent_16", exponent_16, hex(exponent_16), bin(exponent_16))
+    print("mantissa", mantissa, hex(mantissa), bin(mantissa))
+
+    num_nybbles_to_preserve = min(exponent_16, MAX_BITS_PRECISION_IBM_FLOAT // BITS_PER_NYBBLE)
+    num_bits_to_clear = MAX_BITS_PRECISION_IBM_FLOAT - num_nybbles_to_preserve * BITS_PER_NYBBLE
+    clear_mask = 2**num_bits_to_clear - 1
+    preserve_mask = (2**MAX_BITS_PRECISION_IBM_FLOAT - 1) & ~clear_mask
+
+    print("num_nybbles_to_preserve", num_nybbles_to_preserve)
+    print("num_bits_to_clear", num_bits_to_clear)
+    print("clear_mask   ", bin(clear_mask))
+    print("preserve_mask", bin(preserve_mask))
+
+    truncated_mantissa = mantissa & preserve_mask
+
+    value = truncated_mantissa * pow(16, exponent_16)
+
+    scaled_value = value >> MAX_BITS_PRECISION_IBM_FLOAT
+
+    return scaled_value
+
+    #
+    # tb = (preserve_mask >> 16) & b
+    # tc = (preserve_mask >> 8) & c
+    # td = preserve_mask & d
+    #
+    # return byte_string((a, tb, tc, td))
+

 def ieee2ibm(f):
    """Convert a float to four big-endian bytes representing an IBM float.
@@ -116,23 +170,32 @@ def ieee2ibm(f):
    return byte_string((a, b, c, d))


-class IBMFloat(object):
+class IBMFloat(Real):
+
    __slots__ = ['_data']

-    def __init__(self, b):
-        """Initialise IBMFloat from an IEEE float.
+    _INTERNED = {IBM_ZERO_BYTES: None,
+                 IBM_NEGATIVE_ONE_BYTES: None,
+                 IBM_POSITIVE_ONE_BYTES: None}

-        Args:
-            b: A byte sequence containing exactly four bytes
+    # noinspection PyUnresolvedReferences
+    def __new__(cls, b):
+        obj = object.__new__(cls)

-        Raises:
-            ValueError: If b does not contain exactly four values in the range 0-255.
-        """
        data = bytes(b)
        num_bytes = len(data)
        if num_bytes != 4:
-            raise ValueError("{} cannot be constructed from {} values".format(self.__class__.__name__, num_bytes))
-        self._data = data
+            raise ValueError("{} cannot be constructed from {} values".format(cls.__name__, num_bytes))
+        obj._data = data
+
+        # Intern common values
+        if data in cls._INTERNED:
+            if cls._INTERNED[data] is None:
+                cls._INTERNED[data] = obj
+            return cls._INTERNED[data]
+
+        return obj
+

    @classmethod
    def from_float(cls, f):
@@ -151,10 +214,20 @@ class IBMFloat(object):
        """
        return cls(ieee2ibm(f))

+    @classmethod
+    def from_real(cls, f):
+        if isinstance(f, IBMFloat):
+            return f
+        return cls.from_float(f)
+
    @classmethod
    def from_bytes(cls, b):
        return cls(b)

+    @property
+    def signbit(self):
+        return self._data[0] & 0x80
+
    def __float__(self):
        return ibm2ieee(self._data)

@@ -162,7 +235,10 @@ class IBMFloat(object):
        return self._data

    def __repr__(self):
-        return "{}({!r})".format(self.__class__.__name__, self._data)
+        return "{}.from_float({!r}) ~{!r}".format(self.__class__.__name__, self._data, float(self))
+
+    def __str__(self):
+        return str(float(self))

    def __bool__(self):
        return not self.is_zero()
@@ -174,5 +250,172 @@ class IBMFloat(object):
        return not self.is_zero()

    def is_subnormal(self):
-        return (not self.is_zero()) and (self._data[1] < 32)
+        return (not self.is_zero()) and (self._data[1] < 16)
+
+    def zero_subnormal(self):
+        return IBM_FLOAT_ZERO if self.is_subnormal() else self
+
+    def frexp(self):
+        raise NotImplemented
+        # return the mantissa and exponent
+
+    def __pos__(self):
+        return self
+
+    def __neg__(self):
+        data = self._data
+        return IBMFloat((data[0] ^ 0b10000000,
+                         data[1],
+                         data[2],
+                         data[3]))
+
+    def __abs__(self):
+        data = self._data
+        return IBMFloat((data[0] & 0b01111111,
+                         data[1],
+                         data[2],
+                         data[3]))
+
+    def __eq__(self, rhs):
+        if not isinstance(rhs, IBMFloat):
+            return NotImplemented
+        # TODO: Consider forcing normalisation
+        return self._data == rhs._data
+
+    def __floordiv__(self, rhs):
+        return float(self) // float(rhs)
+
+    def __rfloordiv__(self, lhs):
+        return float(lhs) // float(self)
+
+    def __rtruediv__(self, lhs):
+        q = float(lhs) / float(self)
+        return IBMFloat.from_float(q) if isinstance(lhs, float) else q
+
+    def __pow__(self, exponent):
+        p = pow(float(self), float(exponent))
+        return IBMFloat.from_float(p) if isinstance(exponent, IBMFloat) else p
+
+    def __rpow__(self, base):
+        return IBMFloat.from_float(pow(float(base), float(self)))
+
+    def __mod__(self, rhs):
+        m = float(self) % float(rhs)
+        return IBMFloat.from_float(m) if isinstance(rhs, IBMFloat) else m
+
+    def __rmod__(self, lhs):
+        m = float(lhs) % float(self)
+        return IBMFloat.from_float(m) if isinstance(lhs, IBMFloat) else m
+
+    def __rmul__(self, lhs):
+        p = float(lhs) * float(self)
+        return IBMFloat.from_float(p) if isinstance(lhs, IBMFloat) else p
+
+    def __radd__(self, lhs):
+        s = float(lhs) + float(self)
+        return IBMFloat.from_float(s) if isinstance(lhs, IBMFloat) else s
+
+    def __lt__(self, rhs):
+        return float(self) < float(rhs)
+
+    def __le__(self, rhs):
+        return float(self) <= float(rhs)
+
+    def __ceil__(self):
+        return ceil(float(self))
+
+    def __floor__(self):
+        return floor(float(self))
+
+    @property
+    def exp16(self):
+        """The base 16 exponent."""
+        exponent_16_biased = self._data[0] & 0x7f
+        exponent_16 = exponent_16_biased - EXPONENT_BIAS
+        return exponent_16
+
+    @property
+    def int_mantissa(self):
+        data = self._data
+        return (data[1] << 16) | (data[2] << 8) | data[3]
+
+    def __trunc__(self):
+        sign = -1 if self.signbit else 1
+        exponent_16 = self.exp16
+        mantissa = self.int_mantissa
+
+        # print("sign =", sign)
+        # print("exponent_16", exponent_16, hex(exponent_16), bin(exponent_16))
+        # print("mantissa", mantissa, hex(mantissa), bin(mantissa))
+
+        num_nybbles_to_preserve = min(exponent_16, MAX_BITS_PRECISION_IBM_FLOAT // BITS_PER_NYBBLE)
+        num_bits_to_clear = MAX_BITS_PRECISION_IBM_FLOAT - num_nybbles_to_preserve * BITS_PER_NYBBLE
+        clear_mask = 2**num_bits_to_clear - 1
+        preserve_mask = (2**MAX_BITS_PRECISION_IBM_FLOAT - 1) & ~clear_mask
+
+        # print("num_nybbles_to_preserve", num_nybbles_to_preserve)
+        # print("num_bits_to_clear", num_bits_to_clear)
+        # print("clear_mask   ", bin(clear_mask))
+        # print("preserve_mask", bin(preserve_mask))
+
+        truncated_mantissa = mantissa & preserve_mask
+        magnitude = truncated_mantissa * pow(16, exponent_16) >> MAX_BITS_PRECISION_IBM_FLOAT
+        return sign * magnitude
+
+    def normalize(self):
+        """Attempt to normalize the floating point value.
+
+        Returns:
+            A normalized IBMFloat equal in value to this object.
+
+        Raises:
+            FloatingPointError: If the number could not be normalized.
+        """
+        exponent_16 = self.exp16
+        mantissa = self.int_mantissa
+
+        if mantissa == 0:
+            return IBM_FLOAT_ZERO
+
+        while mantissa < (1 << 20):
+            new_exponent_16 = exponent_16 - 1
+            if not (-64 <= new_exponent_16 < 64):
+                raise FloatingPointError("Could not normalize {!r} without causing exponent overflow.".format(self))
+
+            mantissa <<= 4
+            exponent_16 = new_exponent_16
+
+        exponent_16_biased = exponent_16 + EXPONENT_BIAS
+
+        sign = self.signbit << 7
+
+        a = sign | exponent_16_biased
+        b = (mantissa >> 16) & 0xff
+        c = (mantissa >> 8) & 0xff
+        d = mantissa & 0xff
+
+        return IBMFloat.from_bytes((a, b, c, d))
+
+    def __round__(self, ndigits=None):
+        return IBMFloat.from_float(round(float(self), ndigits))
+
+    def __truediv__(self, rhs):
+        q = float(self) / float(rhs)
+        return IBMFloat.from_float(q) if isinstance(rhs, IBMFloat) else q
+
+    def __mul__(self, rhs):
+        p = float(self) * float(rhs)
+        return IBMFloat.from_float(p) if isinstance(rhs, IBMFloat) else p
+
+    def __add__(self, rhs):
+        p = float(self) + float(rhs)
+        return IBMFloat.from_float(p) if isinstance(rhs, IBMFloat) else p
+
+    def __int__(self):
+        raise trunc(self)
+
+
+IBM_FLOAT_ZERO = IBMFloat.from_bytes(IBM_ZERO_BYTES)
+
+

@@ -15,7 +15,7 @@ from segpy.catalog import CatalogBuilder
 from segpy.datatypes import CTYPES, size_in_bytes
 from segpy.encoding import guess_encoding, is_supported_encoding, UnsupportedEncodingError
 from segpy.binary_reel_header_definition import HEADER_DEF
-from segpy.ibm_float import ibm2ieee, ieee2ibm
+from segpy.ibm_float import ibm2ieee, ieee2ibm, IBMFloat
 from segpy.revisions import canonicalize_revision
 from segpy.trace_header_definition import TRACE_HEADER_DEF
 from segpy.util import file_length, batched, pad, complementary_intervals, NATIVE_ENDIANNESS
@@ -461,7 +461,7 @@ def unpack_ibm_floats(data, count):
    Returns:
        A sequence of floats.
    """
-    return array('d', (ibm2ieee(data[i: i+4]) for i in range(0, count * 4, 4)))
+    return array('d', (IBMFloat.from_bytes(data[i: i+4]) for i in range(0, count * 4, 4)))


 def unpack_values(buf, count, fmt, endian='>'):
@@ -1,11 +1,13 @@
+from math import trunc, floor
 import unittest

-from hypothesis import given
+from hypothesis import given, assume
 from hypothesis.descriptors import integers_in_range, floats_in_range

 from segpy.portability import byte_string
 from segpy.ibm_float import (ieee2ibm, ibm2ieee, MAX_IBM_FLOAT, SMALLEST_POSITIVE_NORMAL_IBM_FLOAT,
-                             LARGEST_NEGATIVE_NORMAL_IBM_FLOAT, MIN_IBM_FLOAT, IBMFloat, IBM_FLOAT32_EPSILON)
+                             LARGEST_NEGATIVE_NORMAL_IBM_FLOAT, MIN_IBM_FLOAT, IBMFloat, EPSILON_IBM_FLOAT, truncate,
+                             MAX_EXACT_INTEGER_IBM_FLOAT, MIN_EXACT_INTEGER_IBM_FLOAT, EXPONENT_BIAS)
 from segpy.util import almost_equal


@@ -192,7 +194,74 @@ class TestIBMFloat(unittest.TestCase):
    @given(floats_in_range(MIN_IBM_FLOAT, MAX_IBM_FLOAT))
    def test_floats_roundtrip(self, f):
        ibm = IBMFloat.from_float(f)
-        self.assertTrue(almost_equal(f, float(ibm), epsilon=IBM_FLOAT32_EPSILON))
+        self.assertTrue(almost_equal(f, float(ibm), epsilon=EPSILON_IBM_FLOAT))
+
+    @given(integers_in_range(0, MAX_EXACT_INTEGER_IBM_FLOAT - 1))
+    @given(floats_in_range(0.0, 1.0))
+    def test_trunc_above_zero(self, i, f):
+        assume(f != 1.0)
+        ieee = i + f
+        ibm = IBMFloat.from_float(ieee)
+        self.assertEqual(trunc(ibm), i)
+
+    @given(integers_in_range(MIN_EXACT_INTEGER_IBM_FLOAT + 1, 0))
+    @given(floats_in_range(0.0, 1.0))
+    def test_trunc_below_zero(self, i, f):
+        assume(f != 1.0)
+        ieee = i - f
+        ibm = IBMFloat.from_float(ieee)
+        self.assertEqual(trunc(ibm), i)
+
+    def test_normalise_subnormal_expect_failure(self):
+        # This float has an base-16 exponent of -64 (the minimum) and cannot be normalised
+        ibm = IBMFloat.from_float(1.6472184286297693e-83)
+        assert ibm.is_subnormal()
+        with self.assertRaises(FloatingPointError):
+            ibm.normalize()
+
+    def test_normalise_subnormal(self):
+        ibm = IBMFloat.from_bytes((0b01000000, 0b00000000, 0b11111111, 0b00000000))
+        assert ibm.is_subnormal()
+        normalized = ibm.normalize()
+        self.assertFalse(normalized.is_subnormal())
+
+    def test_normalise_subnormal2(self):
+        ibm = IBMFloat.from_bytes((64, 1, 0, 0))
+        assert ibm.is_subnormal()
+        normalized = ibm.normalize()
+        self.assertFalse(normalized.is_subnormal())
+
+    @given(integers_in_range(128, 255),
+           integers_in_range(0, 255),
+           integers_in_range(0, 255),
+           integers_in_range(4, 23))
+    def test_normalise_subnormal(self, b, c, d, shift):
+        mantissa = (b << 16) | (c << 8) | d
+        assume(mantissa != 0)
+        mantissa >>= shift
+        assert mantissa != 0
+
+        sa = EXPONENT_BIAS
+        sb = (mantissa >> 16) & 0xff
+        sc = (mantissa >> 8) & 0xff
+        sd = mantissa & 0xff
+
+        ibm = IBMFloat.from_bytes((sa, sb, sc, sd))
+        assert ibm.is_subnormal()
+        normalized = ibm.normalize()
+        self.assertFalse(normalized.is_subnormal())
+
+
+    @given(integers_in_range(0, 255),
+           integers_in_range(0, 255),
+           integers_in_range(0, 255),
+           integers_in_range(0, 255))
+    def test_abs(self, a, b, c, d):
+        b = byte_string((a, b, c, d))
+        ibm = IBMFloat.from_bytes(b)
+        abs_ibm = abs(ibm)
+        self.assertGreaterEqual(abs_ibm.signbit, 0)
+


 if __name__ == '__main__':