Some progress on testing and implementing the IBMFloat.

This commit is contained in:
Robert Smallshire
2015-03-10 19:52:06 +01:00
parent 1779f11b44
commit 52e2967ec2
4 changed files with 385 additions and 25 deletions
+48
View File
@@ -0,0 +1,48 @@
def lfu_cache(maxsize=100):
'''Least-frequently-used cache decorator.
Arguments to the cached function must be hashable.
Cache performance statistics stored in f.hits and f.misses.
Clear the cache with f.clear().
http://en.wikipedia.org/wiki/Least_Frequently_Used
'''
def decorating_function(user_function):
cache = {} # mapping of args to results
use_count = Counter() # times each key has been accessed
kwd_mark = object() # separate positional and keyword args
@functools.wraps(user_function)
def wrapper(*args, **kwds):
key = args
if kwds:
key += (kwd_mark,) + tuple(sorted(kwds.items()))
use_count[key] += 1
# get cache entry or compute if not found
try:
result = cache[key]
wrapper.hits += 1
except KeyError:
result = user_function(*args, **kwds)
cache[key] = result
wrapper.misses += 1
# purge least frequently used cache entry
if len(cache) > maxsize:
for key, _ in nsmallest(maxsize // 10,
use_count.iteritems(),
key=itemgetter(1)):
del cache[key], use_count[key]
return result
def clear():
cache.clear()
use_count.clear()
wrapper.hits = wrapper.misses = 0
wrapper.hits = wrapper.misses = 0
wrapper.clear = clear
return wrapper
return decorating_function
+263 -20
View File
@@ -1,17 +1,31 @@
from math import frexp, isnan, isinf
from math import frexp, isnan, isinf, ceil, floor, trunc
from numbers import Real
from segpy.portability import long_int, byte_string, four_bytes
IBM_ZERO_BYTES = b'\x00\x00\x00\x00'
IBM_NEGATIVE_ONE_BYTES = b'\xc1\x10\x00\x00'
IBM_POSITIVE_ONE_BYTES = b'A\x10\x00\x00'
MIN_IBM_FLOAT = -7.2370051459731155e+75
LARGEST_NEGATIVE_NORMAL_IBM_FLOAT = -5.397605346934028e-79
SMALLEST_POSITIVE_NORMAL_IBM_FLOAT = 5.397605346934028e-79
MAX_IBM_FLOAT = 7.2370051459731155e+75
IBM_FLOAT32_MAX_BITS_PRECISION = 24
IBM_FLOAT32_MIN_BITS_PRECISION = 21 # The first 3 bits of the mantissa may be zero
IBM_FLOAT32_EPSILON = pow(2.0, -(IBM_FLOAT32_MIN_BITS_PRECISION - 1))
_L24 = long_int(2) ** IBM_FLOAT32_MAX_BITS_PRECISION
_F24 = float(pow(2, IBM_FLOAT32_MAX_BITS_PRECISION))
MAX_BITS_PRECISION_IBM_FLOAT = 24
MIN_BITS_PRECISION_IBM_FLOAT = 21 # The first 3 bits of the mantissa may be zero
EPSILON_IBM_FLOAT = pow(2.0, -(MIN_BITS_PRECISION_IBM_FLOAT - 1))
_L24 = long_int(2) ** MAX_BITS_PRECISION_IBM_FLOAT
_F24 = float(pow(2, MAX_BITS_PRECISION_IBM_FLOAT))
_L21 = long_int(2) ** MIN_BITS_PRECISION_IBM_FLOAT
EXPONENT_BIAS = 64
MIN_EXACT_INTEGER_IBM_FLOAT = -2**MAX_BITS_PRECISION_IBM_FLOAT
MAX_EXACT_INTEGER_IBM_FLOAT = 2**MIN_BITS_PRECISION_IBM_FLOAT
def ibm2ieee(big_endian_bytes):
@@ -29,12 +43,52 @@ def ibm2ieee(big_endian_bytes):
return 0.0
sign = -1 if (a & 0x80) else 1
exponent = a & 0x7f
exponent_16_biased = a & 0x7f
mantissa = ((b << 16) | (c << 8) | d) / _F24
value = sign * mantissa * pow(16, exponent - 64)
value = sign * mantissa * pow(16, exponent_16_biased - EXPONENT_BIAS)
return value
BITS_PER_NYBBLE = 4
def truncate(big_endian_bytes):
a, b, c, d = four_bytes(big_endian_bytes)
sign = -1 if (a & 0x80) else 1
exponent_16_biased = a & 0x7f
exponent_16 = exponent_16_biased - EXPONENT_BIAS
mantissa = ((b << 16) | (c << 8) | d)
print("sign =", sign)
print("exponent_16", exponent_16, hex(exponent_16), bin(exponent_16))
print("mantissa", mantissa, hex(mantissa), bin(mantissa))
num_nybbles_to_preserve = min(exponent_16, MAX_BITS_PRECISION_IBM_FLOAT // BITS_PER_NYBBLE)
num_bits_to_clear = MAX_BITS_PRECISION_IBM_FLOAT - num_nybbles_to_preserve * BITS_PER_NYBBLE
clear_mask = 2**num_bits_to_clear - 1
preserve_mask = (2**MAX_BITS_PRECISION_IBM_FLOAT - 1) & ~clear_mask
print("num_nybbles_to_preserve", num_nybbles_to_preserve)
print("num_bits_to_clear", num_bits_to_clear)
print("clear_mask ", bin(clear_mask))
print("preserve_mask", bin(preserve_mask))
truncated_mantissa = mantissa & preserve_mask
value = truncated_mantissa * pow(16, exponent_16)
scaled_value = value >> MAX_BITS_PRECISION_IBM_FLOAT
return scaled_value
#
# tb = (preserve_mask >> 16) & b
# tc = (preserve_mask >> 8) & c
# td = preserve_mask & d
#
# return byte_string((a, tb, tc, td))
def ieee2ibm(f):
"""Convert a float to four big-endian bytes representing an IBM float.
@@ -116,23 +170,32 @@ def ieee2ibm(f):
return byte_string((a, b, c, d))
class IBMFloat(object):
class IBMFloat(Real):
__slots__ = ['_data']
def __init__(self, b):
"""Initialise IBMFloat from an IEEE float.
_INTERNED = {IBM_ZERO_BYTES: None,
IBM_NEGATIVE_ONE_BYTES: None,
IBM_POSITIVE_ONE_BYTES: None}
Args:
b: A byte sequence containing exactly four bytes
# noinspection PyUnresolvedReferences
def __new__(cls, b):
obj = object.__new__(cls)
Raises:
ValueError: If b does not contain exactly four values in the range 0-255.
"""
data = bytes(b)
num_bytes = len(data)
if num_bytes != 4:
raise ValueError("{} cannot be constructed from {} values".format(self.__class__.__name__, num_bytes))
self._data = data
raise ValueError("{} cannot be constructed from {} values".format(cls.__name__, num_bytes))
obj._data = data
# Intern common values
if data in cls._INTERNED:
if cls._INTERNED[data] is None:
cls._INTERNED[data] = obj
return cls._INTERNED[data]
return obj
@classmethod
def from_float(cls, f):
@@ -151,10 +214,20 @@ class IBMFloat(object):
"""
return cls(ieee2ibm(f))
@classmethod
def from_real(cls, f):
if isinstance(f, IBMFloat):
return f
return cls.from_float(f)
@classmethod
def from_bytes(cls, b):
return cls(b)
@property
def signbit(self):
return self._data[0] & 0x80
def __float__(self):
return ibm2ieee(self._data)
@@ -162,7 +235,10 @@ class IBMFloat(object):
return self._data
def __repr__(self):
return "{}({!r})".format(self.__class__.__name__, self._data)
return "{}.from_float({!r}) ~{!r}".format(self.__class__.__name__, self._data, float(self))
def __str__(self):
return str(float(self))
def __bool__(self):
return not self.is_zero()
@@ -174,5 +250,172 @@ class IBMFloat(object):
return not self.is_zero()
def is_subnormal(self):
return (not self.is_zero()) and (self._data[1] < 32)
return (not self.is_zero()) and (self._data[1] < 16)
def zero_subnormal(self):
return IBM_FLOAT_ZERO if self.is_subnormal() else self
def frexp(self):
raise NotImplemented
# return the mantissa and exponent
def __pos__(self):
return self
def __neg__(self):
data = self._data
return IBMFloat((data[0] ^ 0b10000000,
data[1],
data[2],
data[3]))
def __abs__(self):
data = self._data
return IBMFloat((data[0] & 0b01111111,
data[1],
data[2],
data[3]))
def __eq__(self, rhs):
if not isinstance(rhs, IBMFloat):
return NotImplemented
# TODO: Consider forcing normalisation
return self._data == rhs._data
def __floordiv__(self, rhs):
return float(self) // float(rhs)
def __rfloordiv__(self, lhs):
return float(lhs) // float(self)
def __rtruediv__(self, lhs):
q = float(lhs) / float(self)
return IBMFloat.from_float(q) if isinstance(lhs, float) else q
def __pow__(self, exponent):
p = pow(float(self), float(exponent))
return IBMFloat.from_float(p) if isinstance(exponent, IBMFloat) else p
def __rpow__(self, base):
return IBMFloat.from_float(pow(float(base), float(self)))
def __mod__(self, rhs):
m = float(self) % float(rhs)
return IBMFloat.from_float(m) if isinstance(rhs, IBMFloat) else m
def __rmod__(self, lhs):
m = float(lhs) % float(self)
return IBMFloat.from_float(m) if isinstance(lhs, IBMFloat) else m
def __rmul__(self, lhs):
p = float(lhs) * float(self)
return IBMFloat.from_float(p) if isinstance(lhs, IBMFloat) else p
def __radd__(self, lhs):
s = float(lhs) + float(self)
return IBMFloat.from_float(s) if isinstance(lhs, IBMFloat) else s
def __lt__(self, rhs):
return float(self) < float(rhs)
def __le__(self, rhs):
return float(self) <= float(rhs)
def __ceil__(self):
return ceil(float(self))
def __floor__(self):
return floor(float(self))
@property
def exp16(self):
"""The base 16 exponent."""
exponent_16_biased = self._data[0] & 0x7f
exponent_16 = exponent_16_biased - EXPONENT_BIAS
return exponent_16
@property
def int_mantissa(self):
data = self._data
return (data[1] << 16) | (data[2] << 8) | data[3]
def __trunc__(self):
sign = -1 if self.signbit else 1
exponent_16 = self.exp16
mantissa = self.int_mantissa
# print("sign =", sign)
# print("exponent_16", exponent_16, hex(exponent_16), bin(exponent_16))
# print("mantissa", mantissa, hex(mantissa), bin(mantissa))
num_nybbles_to_preserve = min(exponent_16, MAX_BITS_PRECISION_IBM_FLOAT // BITS_PER_NYBBLE)
num_bits_to_clear = MAX_BITS_PRECISION_IBM_FLOAT - num_nybbles_to_preserve * BITS_PER_NYBBLE
clear_mask = 2**num_bits_to_clear - 1
preserve_mask = (2**MAX_BITS_PRECISION_IBM_FLOAT - 1) & ~clear_mask
# print("num_nybbles_to_preserve", num_nybbles_to_preserve)
# print("num_bits_to_clear", num_bits_to_clear)
# print("clear_mask ", bin(clear_mask))
# print("preserve_mask", bin(preserve_mask))
truncated_mantissa = mantissa & preserve_mask
magnitude = truncated_mantissa * pow(16, exponent_16) >> MAX_BITS_PRECISION_IBM_FLOAT
return sign * magnitude
def normalize(self):
"""Attempt to normalize the floating point value.
Returns:
A normalized IBMFloat equal in value to this object.
Raises:
FloatingPointError: If the number could not be normalized.
"""
exponent_16 = self.exp16
mantissa = self.int_mantissa
if mantissa == 0:
return IBM_FLOAT_ZERO
while mantissa < (1 << 20):
new_exponent_16 = exponent_16 - 1
if not (-64 <= new_exponent_16 < 64):
raise FloatingPointError("Could not normalize {!r} without causing exponent overflow.".format(self))
mantissa <<= 4
exponent_16 = new_exponent_16
exponent_16_biased = exponent_16 + EXPONENT_BIAS
sign = self.signbit << 7
a = sign | exponent_16_biased
b = (mantissa >> 16) & 0xff
c = (mantissa >> 8) & 0xff
d = mantissa & 0xff
return IBMFloat.from_bytes((a, b, c, d))
def __round__(self, ndigits=None):
return IBMFloat.from_float(round(float(self), ndigits))
def __truediv__(self, rhs):
q = float(self) / float(rhs)
return IBMFloat.from_float(q) if isinstance(rhs, IBMFloat) else q
def __mul__(self, rhs):
p = float(self) * float(rhs)
return IBMFloat.from_float(p) if isinstance(rhs, IBMFloat) else p
def __add__(self, rhs):
p = float(self) + float(rhs)
return IBMFloat.from_float(p) if isinstance(rhs, IBMFloat) else p
def __int__(self):
raise trunc(self)
IBM_FLOAT_ZERO = IBMFloat.from_bytes(IBM_ZERO_BYTES)
+2 -2
View File
@@ -15,7 +15,7 @@ from segpy.catalog import CatalogBuilder
from segpy.datatypes import CTYPES, size_in_bytes
from segpy.encoding import guess_encoding, is_supported_encoding, UnsupportedEncodingError
from segpy.binary_reel_header_definition import HEADER_DEF
from segpy.ibm_float import ibm2ieee, ieee2ibm
from segpy.ibm_float import ibm2ieee, ieee2ibm, IBMFloat
from segpy.revisions import canonicalize_revision
from segpy.trace_header_definition import TRACE_HEADER_DEF
from segpy.util import file_length, batched, pad, complementary_intervals, NATIVE_ENDIANNESS
@@ -461,7 +461,7 @@ def unpack_ibm_floats(data, count):
Returns:
A sequence of floats.
"""
return array('d', (ibm2ieee(data[i: i+4]) for i in range(0, count * 4, 4)))
return array('d', (IBMFloat.from_bytes(data[i: i+4]) for i in range(0, count * 4, 4)))
def unpack_values(buf, count, fmt, endian='>'):
+72 -3
View File
@@ -1,11 +1,13 @@
from math import trunc, floor
import unittest
from hypothesis import given
from hypothesis import given, assume
from hypothesis.descriptors import integers_in_range, floats_in_range
from segpy.portability import byte_string
from segpy.ibm_float import (ieee2ibm, ibm2ieee, MAX_IBM_FLOAT, SMALLEST_POSITIVE_NORMAL_IBM_FLOAT,
LARGEST_NEGATIVE_NORMAL_IBM_FLOAT, MIN_IBM_FLOAT, IBMFloat, IBM_FLOAT32_EPSILON)
LARGEST_NEGATIVE_NORMAL_IBM_FLOAT, MIN_IBM_FLOAT, IBMFloat, EPSILON_IBM_FLOAT, truncate,
MAX_EXACT_INTEGER_IBM_FLOAT, MIN_EXACT_INTEGER_IBM_FLOAT, EXPONENT_BIAS)
from segpy.util import almost_equal
@@ -192,7 +194,74 @@ class TestIBMFloat(unittest.TestCase):
@given(floats_in_range(MIN_IBM_FLOAT, MAX_IBM_FLOAT))
def test_floats_roundtrip(self, f):
ibm = IBMFloat.from_float(f)
self.assertTrue(almost_equal(f, float(ibm), epsilon=IBM_FLOAT32_EPSILON))
self.assertTrue(almost_equal(f, float(ibm), epsilon=EPSILON_IBM_FLOAT))
@given(integers_in_range(0, MAX_EXACT_INTEGER_IBM_FLOAT - 1))
@given(floats_in_range(0.0, 1.0))
def test_trunc_above_zero(self, i, f):
assume(f != 1.0)
ieee = i + f
ibm = IBMFloat.from_float(ieee)
self.assertEqual(trunc(ibm), i)
@given(integers_in_range(MIN_EXACT_INTEGER_IBM_FLOAT + 1, 0))
@given(floats_in_range(0.0, 1.0))
def test_trunc_below_zero(self, i, f):
assume(f != 1.0)
ieee = i - f
ibm = IBMFloat.from_float(ieee)
self.assertEqual(trunc(ibm), i)
def test_normalise_subnormal_expect_failure(self):
# This float has an base-16 exponent of -64 (the minimum) and cannot be normalised
ibm = IBMFloat.from_float(1.6472184286297693e-83)
assert ibm.is_subnormal()
with self.assertRaises(FloatingPointError):
ibm.normalize()
def test_normalise_subnormal(self):
ibm = IBMFloat.from_bytes((0b01000000, 0b00000000, 0b11111111, 0b00000000))
assert ibm.is_subnormal()
normalized = ibm.normalize()
self.assertFalse(normalized.is_subnormal())
def test_normalise_subnormal2(self):
ibm = IBMFloat.from_bytes((64, 1, 0, 0))
assert ibm.is_subnormal()
normalized = ibm.normalize()
self.assertFalse(normalized.is_subnormal())
@given(integers_in_range(128, 255),
integers_in_range(0, 255),
integers_in_range(0, 255),
integers_in_range(4, 23))
def test_normalise_subnormal(self, b, c, d, shift):
mantissa = (b << 16) | (c << 8) | d
assume(mantissa != 0)
mantissa >>= shift
assert mantissa != 0
sa = EXPONENT_BIAS
sb = (mantissa >> 16) & 0xff
sc = (mantissa >> 8) & 0xff
sd = mantissa & 0xff
ibm = IBMFloat.from_bytes((sa, sb, sc, sd))
assert ibm.is_subnormal()
normalized = ibm.normalize()
self.assertFalse(normalized.is_subnormal())
@given(integers_in_range(0, 255),
integers_in_range(0, 255),
integers_in_range(0, 255),
integers_in_range(0, 255))
def test_abs(self, a, b, c, d):
b = byte_string((a, b, c, d))
ibm = IBMFloat.from_bytes(b)
abs_ibm = abs(ibm)
self.assertGreaterEqual(abs_ibm.signbit, 0)
if __name__ == '__main__':