diff --git a/segpy/caching.py b/segpy/caching.py new file mode 100644 index 0000000..ae95d6f --- /dev/null +++ b/segpy/caching.py @@ -0,0 +1,48 @@ +def lfu_cache(maxsize=100): + '''Least-frequently-used cache decorator. + + Arguments to the cached function must be hashable. + Cache performance statistics stored in f.hits and f.misses. + Clear the cache with f.clear(). + http://en.wikipedia.org/wiki/Least_Frequently_Used + + ''' + def decorating_function(user_function): + cache = {} # mapping of args to results + use_count = Counter() # times each key has been accessed + kwd_mark = object() # separate positional and keyword args + + @functools.wraps(user_function) + def wrapper(*args, **kwds): + key = args + if kwds: + key += (kwd_mark,) + tuple(sorted(kwds.items())) + use_count[key] += 1 + + # get cache entry or compute if not found + try: + result = cache[key] + wrapper.hits += 1 + except KeyError: + result = user_function(*args, **kwds) + cache[key] = result + wrapper.misses += 1 + + # purge least frequently used cache entry + if len(cache) > maxsize: + for key, _ in nsmallest(maxsize // 10, + use_count.iteritems(), + key=itemgetter(1)): + del cache[key], use_count[key] + + return result + + def clear(): + cache.clear() + use_count.clear() + wrapper.hits = wrapper.misses = 0 + + wrapper.hits = wrapper.misses = 0 + wrapper.clear = clear + return wrapper + return decorating_function diff --git a/segpy/ibm_float.py b/segpy/ibm_float.py index e5ecd66..254b82a 100644 --- a/segpy/ibm_float.py +++ b/segpy/ibm_float.py @@ -1,17 +1,31 @@ -from math import frexp, isnan, isinf +from math import frexp, isnan, isinf, ceil, floor, trunc +from numbers import Real from segpy.portability import long_int, byte_string, four_bytes + + +IBM_ZERO_BYTES = b'\x00\x00\x00\x00' +IBM_NEGATIVE_ONE_BYTES = b'\xc1\x10\x00\x00' +IBM_POSITIVE_ONE_BYTES = b'A\x10\x00\x00' + MIN_IBM_FLOAT = -7.2370051459731155e+75 LARGEST_NEGATIVE_NORMAL_IBM_FLOAT = -5.397605346934028e-79 SMALLEST_POSITIVE_NORMAL_IBM_FLOAT = 5.397605346934028e-79 MAX_IBM_FLOAT = 7.2370051459731155e+75 -IBM_FLOAT32_MAX_BITS_PRECISION = 24 -IBM_FLOAT32_MIN_BITS_PRECISION = 21 # The first 3 bits of the mantissa may be zero -IBM_FLOAT32_EPSILON = pow(2.0, -(IBM_FLOAT32_MIN_BITS_PRECISION - 1)) -_L24 = long_int(2) ** IBM_FLOAT32_MAX_BITS_PRECISION -_F24 = float(pow(2, IBM_FLOAT32_MAX_BITS_PRECISION)) +MAX_BITS_PRECISION_IBM_FLOAT = 24 +MIN_BITS_PRECISION_IBM_FLOAT = 21 # The first 3 bits of the mantissa may be zero +EPSILON_IBM_FLOAT = pow(2.0, -(MIN_BITS_PRECISION_IBM_FLOAT - 1)) +_L24 = long_int(2) ** MAX_BITS_PRECISION_IBM_FLOAT +_F24 = float(pow(2, MAX_BITS_PRECISION_IBM_FLOAT)) + +_L21 = long_int(2) ** MIN_BITS_PRECISION_IBM_FLOAT + +EXPONENT_BIAS = 64 + +MIN_EXACT_INTEGER_IBM_FLOAT = -2**MAX_BITS_PRECISION_IBM_FLOAT +MAX_EXACT_INTEGER_IBM_FLOAT = 2**MIN_BITS_PRECISION_IBM_FLOAT def ibm2ieee(big_endian_bytes): @@ -29,12 +43,52 @@ def ibm2ieee(big_endian_bytes): return 0.0 sign = -1 if (a & 0x80) else 1 - exponent = a & 0x7f + exponent_16_biased = a & 0x7f mantissa = ((b << 16) | (c << 8) | d) / _F24 - value = sign * mantissa * pow(16, exponent - 64) + value = sign * mantissa * pow(16, exponent_16_biased - EXPONENT_BIAS) return value +BITS_PER_NYBBLE = 4 + + +def truncate(big_endian_bytes): + a, b, c, d = four_bytes(big_endian_bytes) + + sign = -1 if (a & 0x80) else 1 + exponent_16_biased = a & 0x7f + exponent_16 = exponent_16_biased - EXPONENT_BIAS + mantissa = ((b << 16) | (c << 8) | d) + + print("sign =", sign) + print("exponent_16", exponent_16, hex(exponent_16), bin(exponent_16)) + print("mantissa", mantissa, hex(mantissa), bin(mantissa)) + + num_nybbles_to_preserve = min(exponent_16, MAX_BITS_PRECISION_IBM_FLOAT // BITS_PER_NYBBLE) + num_bits_to_clear = MAX_BITS_PRECISION_IBM_FLOAT - num_nybbles_to_preserve * BITS_PER_NYBBLE + clear_mask = 2**num_bits_to_clear - 1 + preserve_mask = (2**MAX_BITS_PRECISION_IBM_FLOAT - 1) & ~clear_mask + + print("num_nybbles_to_preserve", num_nybbles_to_preserve) + print("num_bits_to_clear", num_bits_to_clear) + print("clear_mask ", bin(clear_mask)) + print("preserve_mask", bin(preserve_mask)) + + truncated_mantissa = mantissa & preserve_mask + + value = truncated_mantissa * pow(16, exponent_16) + + scaled_value = value >> MAX_BITS_PRECISION_IBM_FLOAT + + return scaled_value + + # + # tb = (preserve_mask >> 16) & b + # tc = (preserve_mask >> 8) & c + # td = preserve_mask & d + # + # return byte_string((a, tb, tc, td)) + def ieee2ibm(f): """Convert a float to four big-endian bytes representing an IBM float. @@ -116,23 +170,32 @@ def ieee2ibm(f): return byte_string((a, b, c, d)) -class IBMFloat(object): +class IBMFloat(Real): + __slots__ = ['_data'] - def __init__(self, b): - """Initialise IBMFloat from an IEEE float. + _INTERNED = {IBM_ZERO_BYTES: None, + IBM_NEGATIVE_ONE_BYTES: None, + IBM_POSITIVE_ONE_BYTES: None} - Args: - b: A byte sequence containing exactly four bytes + # noinspection PyUnresolvedReferences + def __new__(cls, b): + obj = object.__new__(cls) - Raises: - ValueError: If b does not contain exactly four values in the range 0-255. - """ data = bytes(b) num_bytes = len(data) if num_bytes != 4: - raise ValueError("{} cannot be constructed from {} values".format(self.__class__.__name__, num_bytes)) - self._data = data + raise ValueError("{} cannot be constructed from {} values".format(cls.__name__, num_bytes)) + obj._data = data + + # Intern common values + if data in cls._INTERNED: + if cls._INTERNED[data] is None: + cls._INTERNED[data] = obj + return cls._INTERNED[data] + + return obj + @classmethod def from_float(cls, f): @@ -151,10 +214,20 @@ class IBMFloat(object): """ return cls(ieee2ibm(f)) + @classmethod + def from_real(cls, f): + if isinstance(f, IBMFloat): + return f + return cls.from_float(f) + @classmethod def from_bytes(cls, b): return cls(b) + @property + def signbit(self): + return self._data[0] & 0x80 + def __float__(self): return ibm2ieee(self._data) @@ -162,7 +235,10 @@ class IBMFloat(object): return self._data def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self._data) + return "{}.from_float({!r}) ~{!r}".format(self.__class__.__name__, self._data, float(self)) + + def __str__(self): + return str(float(self)) def __bool__(self): return not self.is_zero() @@ -174,5 +250,172 @@ class IBMFloat(object): return not self.is_zero() def is_subnormal(self): - return (not self.is_zero()) and (self._data[1] < 32) + return (not self.is_zero()) and (self._data[1] < 16) + + def zero_subnormal(self): + return IBM_FLOAT_ZERO if self.is_subnormal() else self + + def frexp(self): + raise NotImplemented + # return the mantissa and exponent + + def __pos__(self): + return self + + def __neg__(self): + data = self._data + return IBMFloat((data[0] ^ 0b10000000, + data[1], + data[2], + data[3])) + + def __abs__(self): + data = self._data + return IBMFloat((data[0] & 0b01111111, + data[1], + data[2], + data[3])) + + def __eq__(self, rhs): + if not isinstance(rhs, IBMFloat): + return NotImplemented + # TODO: Consider forcing normalisation + return self._data == rhs._data + + def __floordiv__(self, rhs): + return float(self) // float(rhs) + + def __rfloordiv__(self, lhs): + return float(lhs) // float(self) + + def __rtruediv__(self, lhs): + q = float(lhs) / float(self) + return IBMFloat.from_float(q) if isinstance(lhs, float) else q + + def __pow__(self, exponent): + p = pow(float(self), float(exponent)) + return IBMFloat.from_float(p) if isinstance(exponent, IBMFloat) else p + + def __rpow__(self, base): + return IBMFloat.from_float(pow(float(base), float(self))) + + def __mod__(self, rhs): + m = float(self) % float(rhs) + return IBMFloat.from_float(m) if isinstance(rhs, IBMFloat) else m + + def __rmod__(self, lhs): + m = float(lhs) % float(self) + return IBMFloat.from_float(m) if isinstance(lhs, IBMFloat) else m + + def __rmul__(self, lhs): + p = float(lhs) * float(self) + return IBMFloat.from_float(p) if isinstance(lhs, IBMFloat) else p + + def __radd__(self, lhs): + s = float(lhs) + float(self) + return IBMFloat.from_float(s) if isinstance(lhs, IBMFloat) else s + + def __lt__(self, rhs): + return float(self) < float(rhs) + + def __le__(self, rhs): + return float(self) <= float(rhs) + + def __ceil__(self): + return ceil(float(self)) + + def __floor__(self): + return floor(float(self)) + + @property + def exp16(self): + """The base 16 exponent.""" + exponent_16_biased = self._data[0] & 0x7f + exponent_16 = exponent_16_biased - EXPONENT_BIAS + return exponent_16 + + @property + def int_mantissa(self): + data = self._data + return (data[1] << 16) | (data[2] << 8) | data[3] + + def __trunc__(self): + sign = -1 if self.signbit else 1 + exponent_16 = self.exp16 + mantissa = self.int_mantissa + + # print("sign =", sign) + # print("exponent_16", exponent_16, hex(exponent_16), bin(exponent_16)) + # print("mantissa", mantissa, hex(mantissa), bin(mantissa)) + + num_nybbles_to_preserve = min(exponent_16, MAX_BITS_PRECISION_IBM_FLOAT // BITS_PER_NYBBLE) + num_bits_to_clear = MAX_BITS_PRECISION_IBM_FLOAT - num_nybbles_to_preserve * BITS_PER_NYBBLE + clear_mask = 2**num_bits_to_clear - 1 + preserve_mask = (2**MAX_BITS_PRECISION_IBM_FLOAT - 1) & ~clear_mask + + # print("num_nybbles_to_preserve", num_nybbles_to_preserve) + # print("num_bits_to_clear", num_bits_to_clear) + # print("clear_mask ", bin(clear_mask)) + # print("preserve_mask", bin(preserve_mask)) + + truncated_mantissa = mantissa & preserve_mask + magnitude = truncated_mantissa * pow(16, exponent_16) >> MAX_BITS_PRECISION_IBM_FLOAT + return sign * magnitude + + def normalize(self): + """Attempt to normalize the floating point value. + + Returns: + A normalized IBMFloat equal in value to this object. + + Raises: + FloatingPointError: If the number could not be normalized. + """ + exponent_16 = self.exp16 + mantissa = self.int_mantissa + + if mantissa == 0: + return IBM_FLOAT_ZERO + + while mantissa < (1 << 20): + new_exponent_16 = exponent_16 - 1 + if not (-64 <= new_exponent_16 < 64): + raise FloatingPointError("Could not normalize {!r} without causing exponent overflow.".format(self)) + + mantissa <<= 4 + exponent_16 = new_exponent_16 + + exponent_16_biased = exponent_16 + EXPONENT_BIAS + + sign = self.signbit << 7 + + a = sign | exponent_16_biased + b = (mantissa >> 16) & 0xff + c = (mantissa >> 8) & 0xff + d = mantissa & 0xff + + return IBMFloat.from_bytes((a, b, c, d)) + + def __round__(self, ndigits=None): + return IBMFloat.from_float(round(float(self), ndigits)) + + def __truediv__(self, rhs): + q = float(self) / float(rhs) + return IBMFloat.from_float(q) if isinstance(rhs, IBMFloat) else q + + def __mul__(self, rhs): + p = float(self) * float(rhs) + return IBMFloat.from_float(p) if isinstance(rhs, IBMFloat) else p + + def __add__(self, rhs): + p = float(self) + float(rhs) + return IBMFloat.from_float(p) if isinstance(rhs, IBMFloat) else p + + def __int__(self): + raise trunc(self) + + +IBM_FLOAT_ZERO = IBMFloat.from_bytes(IBM_ZERO_BYTES) + + diff --git a/segpy/toolkit.py b/segpy/toolkit.py index 293903c..fb2a155 100644 --- a/segpy/toolkit.py +++ b/segpy/toolkit.py @@ -15,7 +15,7 @@ from segpy.catalog import CatalogBuilder from segpy.datatypes import CTYPES, size_in_bytes from segpy.encoding import guess_encoding, is_supported_encoding, UnsupportedEncodingError from segpy.binary_reel_header_definition import HEADER_DEF -from segpy.ibm_float import ibm2ieee, ieee2ibm +from segpy.ibm_float import ibm2ieee, ieee2ibm, IBMFloat from segpy.revisions import canonicalize_revision from segpy.trace_header_definition import TRACE_HEADER_DEF from segpy.util import file_length, batched, pad, complementary_intervals, NATIVE_ENDIANNESS @@ -461,7 +461,7 @@ def unpack_ibm_floats(data, count): Returns: A sequence of floats. """ - return array('d', (ibm2ieee(data[i: i+4]) for i in range(0, count * 4, 4))) + return array('d', (IBMFloat.from_bytes(data[i: i+4]) for i in range(0, count * 4, 4))) def unpack_values(buf, count, fmt, endian='>'): diff --git a/test/test_float.py b/test/test_float.py index a04e903..e3a7d12 100644 --- a/test/test_float.py +++ b/test/test_float.py @@ -1,11 +1,13 @@ +from math import trunc, floor import unittest -from hypothesis import given +from hypothesis import given, assume from hypothesis.descriptors import integers_in_range, floats_in_range from segpy.portability import byte_string from segpy.ibm_float import (ieee2ibm, ibm2ieee, MAX_IBM_FLOAT, SMALLEST_POSITIVE_NORMAL_IBM_FLOAT, - LARGEST_NEGATIVE_NORMAL_IBM_FLOAT, MIN_IBM_FLOAT, IBMFloat, IBM_FLOAT32_EPSILON) + LARGEST_NEGATIVE_NORMAL_IBM_FLOAT, MIN_IBM_FLOAT, IBMFloat, EPSILON_IBM_FLOAT, truncate, + MAX_EXACT_INTEGER_IBM_FLOAT, MIN_EXACT_INTEGER_IBM_FLOAT, EXPONENT_BIAS) from segpy.util import almost_equal @@ -192,7 +194,74 @@ class TestIBMFloat(unittest.TestCase): @given(floats_in_range(MIN_IBM_FLOAT, MAX_IBM_FLOAT)) def test_floats_roundtrip(self, f): ibm = IBMFloat.from_float(f) - self.assertTrue(almost_equal(f, float(ibm), epsilon=IBM_FLOAT32_EPSILON)) + self.assertTrue(almost_equal(f, float(ibm), epsilon=EPSILON_IBM_FLOAT)) + + @given(integers_in_range(0, MAX_EXACT_INTEGER_IBM_FLOAT - 1)) + @given(floats_in_range(0.0, 1.0)) + def test_trunc_above_zero(self, i, f): + assume(f != 1.0) + ieee = i + f + ibm = IBMFloat.from_float(ieee) + self.assertEqual(trunc(ibm), i) + + @given(integers_in_range(MIN_EXACT_INTEGER_IBM_FLOAT + 1, 0)) + @given(floats_in_range(0.0, 1.0)) + def test_trunc_below_zero(self, i, f): + assume(f != 1.0) + ieee = i - f + ibm = IBMFloat.from_float(ieee) + self.assertEqual(trunc(ibm), i) + + def test_normalise_subnormal_expect_failure(self): + # This float has an base-16 exponent of -64 (the minimum) and cannot be normalised + ibm = IBMFloat.from_float(1.6472184286297693e-83) + assert ibm.is_subnormal() + with self.assertRaises(FloatingPointError): + ibm.normalize() + + def test_normalise_subnormal(self): + ibm = IBMFloat.from_bytes((0b01000000, 0b00000000, 0b11111111, 0b00000000)) + assert ibm.is_subnormal() + normalized = ibm.normalize() + self.assertFalse(normalized.is_subnormal()) + + def test_normalise_subnormal2(self): + ibm = IBMFloat.from_bytes((64, 1, 0, 0)) + assert ibm.is_subnormal() + normalized = ibm.normalize() + self.assertFalse(normalized.is_subnormal()) + + @given(integers_in_range(128, 255), + integers_in_range(0, 255), + integers_in_range(0, 255), + integers_in_range(4, 23)) + def test_normalise_subnormal(self, b, c, d, shift): + mantissa = (b << 16) | (c << 8) | d + assume(mantissa != 0) + mantissa >>= shift + assert mantissa != 0 + + sa = EXPONENT_BIAS + sb = (mantissa >> 16) & 0xff + sc = (mantissa >> 8) & 0xff + sd = mantissa & 0xff + + ibm = IBMFloat.from_bytes((sa, sb, sc, sd)) + assert ibm.is_subnormal() + normalized = ibm.normalize() + self.assertFalse(normalized.is_subnormal()) + + + @given(integers_in_range(0, 255), + integers_in_range(0, 255), + integers_in_range(0, 255), + integers_in_range(0, 255)) + def test_abs(self, a, b, c, d): + b = byte_string((a, b, c, d)) + ibm = IBMFloat.from_bytes(b) + abs_ibm = abs(ibm) + self.assertGreaterEqual(abs_ibm.signbit, 0) + if __name__ == '__main__':