From ee228e2dc9c94131ec06915019b7c9ce052d9c9a Mon Sep 17 00:00:00 2001
From: Robert Smallshire <robert@smallshire.org.uk>
Date: Tue, 21 Oct 2014 14:54:49 +0200
Subject: [PATCH] Improved portability between Python 2 and Python 3.
 Beginnings of code for writing SEG Y.

---
 catalog.py         |   8 ++--
 datatypes.py       |   1 +
 ibm_float.py       |  82 ++++++++++++++++++++++++++++++---
 portability.py     |  32 ++++++++++++-
 reader.py          |   9 ++--
 test/__init__.py   |   1 +
 test/test_float.py |  50 +++++++++++++++++++++
 toolkit.py         | 110 +++++++++++++++++++++++++++++++++++++++++++--
 util.py            |   4 +-
 9 files changed, 274 insertions(+), 23 deletions(-)
 create mode 100644 test/__init__.py
 create mode 100644 test/test_float.py

diff --git a/catalog.py b/catalog.py
index f9d2877..a97eeeb 100644
--- a/catalog.py
+++ b/catalog.py
@@ -1,6 +1,6 @@
 from collections import Mapping, Sequence, OrderedDict
 from fractions import Fraction
-import repr
+from portability import reprlib
 from util import contains_duplicates, measure_stride, minmax
 
 
@@ -261,7 +261,7 @@ class DictionaryCatalog(Mapping):
 
     def __repr__(self):
         return '{}({})'.format(
-            self.__class__.__name__, repr.repr(self._items.items()))
+            self.__class__.__name__, reprlib.repr(self._items.items()))
 
 
 class RegularConstantCatalog(Mapping):
@@ -357,7 +357,7 @@ class ConstantCatalog(Mapping):
     def __repr__(self):
         return '{}({}, {})'.format(
             self.__class__.__name__,
-            repr.repr(self._items),
+            reprlib.repr(self._items),
             self._value)
 
 
@@ -424,7 +424,7 @@ class RegularCatalog(Mapping):
             self._key_min,
             self._key_max,
             self._key_stride,
-            repr.repr(self._values))
+            reprlib.repr(self._values))
 
 
 class LinearRegularCatalog(Mapping):
diff --git a/datatypes.py b/datatypes.py
index ad94441..11c1f2e 100644
--- a/datatypes.py
+++ b/datatypes.py
@@ -34,6 +34,7 @@ CTYPES = {'l': 'l',
 
           'ibm': 'ibm'}
 
+
 # TODO This is redundant with data in the SH_def below
 CTYPE_DESCRIPTION = {'ibm': 'IBM float',
                      'l':   '32 bit signed integer',
diff --git a/ibm_float.py b/ibm_float.py
index 828c1cd..2808c13 100644
--- a/ibm_float.py
+++ b/ibm_float.py
@@ -1,6 +1,13 @@
-import sys
+from __future__ import print_function
 
-_P24 = float(pow(2, 24))
+import sys
+from math import frexp, isnan, isinf
+from portability import long_int, byte_string
+
+
+_IBM_FLOAT32_BITS_PRECISION = 24
+_L24 = long_int(2) ** _IBM_FLOAT32_BITS_PRECISION
+_F24 = float(pow(2, _IBM_FLOAT32_BITS_PRECISION))
 
 if sys.version_info >= (3, 0):
 
@@ -18,11 +25,12 @@ if sys.version_info >= (3, 0):
         if a == b == c == c == 0:
             return 0.0
 
-        sign = 1 if (a & 0x80) else -1
+        sign = -1 if (a & 0x80) else 1
         exponent = a & 0x7f
-        mantissa = ((b << 16) | (c << 8) | d) / _P24
+        mantissa = ((b << 16) | (c << 8) | d) / _F24
         value = sign * mantissa * pow(16, exponent - 64)
         return value
+
 else:
 
     def ibm2ieee(big_endian_bytes):
@@ -42,8 +50,68 @@ else:
         if a == b == c == c == 0:
             return 0.0
 
-        sign = 1 if (a & 0x80) else -1
+        sign = -1 if (a & 0x80) else 1
         exponent = a & 0x7f
-        mantissa = ((b << 16) | (c << 8) | d) / _P24
+        mantissa = ((b << 16) | (c << 8) | d) / _F24
         value = sign * mantissa * pow(16, exponent - 64)
-        return value
\ No newline at end of file
+        return value
+
+
+def ieee2ibm(f):
+    """Covert a float to four big-endian bytes representing an IBM float.
+
+    Args:
+        f (float): The value to be converted.
+
+    Returns:
+        A bytes object (Python 3) or a string (Python 2) containing four
+        bytes representing a big-endian IBM float.
+    """
+    if f == 0:
+        # There are many potential representations of zero - this is the standard one
+        return b'\x00\x00\x00\x00'
+
+    if isnan(f):
+        raise ValueError("NaN cannot be represented in IBM floating point")
+
+    if isinf(f):
+        raise ValueError("Infinities cannot be represented in IBM floating point")
+
+    # Now compute m and e to satisfy:
+    #
+    #  f = m * 2^e
+    #
+    # where 0.5 <= abs(m) < 1
+    # except when f == 0 in which case m == 0 and e == 0, which we've already
+    # dealt with.
+    m, e = frexp(f)
+
+    # Convert the fraction (m) into an integer representation. IEEE float32
+    # numbers have 23 explicit (24 implicit) bits of precision.
+
+    mantissa = abs(long_int(m * _L24))
+    exponent = e
+    sign = 0x80 if f < 0 else 0x00
+
+    # IBM single precision floats are of the form
+    # (-1)^sign * 0.significand * 16^(exponent-64)
+
+    # Adjust the exponent, and the mantissa in sympathy so it is
+    # a multiple of four, so it can be expressed in base 16
+    remainder = exponent % 4
+    if remainder != 0:
+        shift = 4 - remainder
+        mantissa >>= shift
+        exponent += shift
+
+    exponent_16 = exponent >> 2            # Divide by four to convert to base 16
+    exponent_16_biased = exponent_16 + 64  # Add the exponent bias of 64
+
+    # TODO: I'm not entirely sure we're producing properly normalised representations.
+
+    a = sign | exponent_16_biased
+    b = (mantissa >> 16) & 0xff
+    c = (mantissa >> 8) & 0xff
+    d = mantissa & 0xff
+
+    return byte_string((a, b, c, d))
diff --git a/portability.py b/portability.py
index 0e8a758..ba975e2 100644
--- a/portability.py
+++ b/portability.py
@@ -1,4 +1,7 @@
 import os
+import sys
+
+EMPTY_BYTE_STRING = b'' if sys.version_info >= (3, 0) else ''
 
 
 def seekable(fh):
@@ -21,4 +24,31 @@ def seekable(fh):
                 fh.seek(pos)
         except AttributeError:
             return False
-    return True
\ No newline at end of file
+    return True
+
+
+if sys.version_info >= (3, 0):
+    long_int = int
+else:
+    long_int = long
+
+
+if sys.version_info >= (3, 0):
+    def byte_string(integers):
+        return bytes(integers)
+else:
+    def byte_string(integers):
+        return ''.join(chr(i) for i in integers)
+
+
+if sys.version_info >= (3, 0):
+    import reprlib
+    reprlib = reprlib  # Keep the static analyzer happy
+else:
+    import repr as reprlib
+
+if sys.version_info >= (3, 0):
+    izip = zip
+else:
+    from itertools import izip
+    izip = izip    # Keep the static analyzer happy
diff --git a/reader.py b/reader.py
index 17e23c4..e061526 100644
--- a/reader.py
+++ b/reader.py
@@ -6,10 +6,10 @@ from datatypes import DATA_SAMPLE_FORMAT, CTYPE_DESCRIPTION, CTYPES, size_in_byt
 from toolkit import (extract_revision,
                      bytes_per_sample,
                      read_reel_header,
+                     read_trace_header,
                      catalog_traces,
                      read_binary_values,
                      compile_trace_header_format,
-                     TraceHeader,
                      REEL_HEADER_NUM_BYTES,
                      TRACE_HEADER_NUM_BYTES)
 
@@ -60,7 +60,7 @@ def create_reader(fh, endian='>', progress=None):
             print(reader.num_traces())
 
     """
-    if fh.encoding is not None:
+    if hasattr(fh, 'encoding') and fh.encoding is not None:
         raise TypeError(
             "SegYReader must be provided with a binary mode file object")
 
@@ -217,10 +217,7 @@ class SegYReader(object):
         if not (0 <= trace_index < self.num_traces()):
             raise ValueError("Trace index {} out of range".format(trace_index))
         pos = self._trace_offset_catalog[trace_index]
-        self._fh.seek(pos)
-        data = self._fh.read(TRACE_HEADER_NUM_BYTES)
-        trace_header = TraceHeader._make(
-            self._trace_header_format.unpack(data))
+        trace_header = read_trace_header(self._fh, pos)
         return trace_header
 
     @property
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/test/__init__.py
@@ -0,0 +1 @@
+
diff --git a/test/test_float.py b/test/test_float.py
new file mode 100644
index 0000000..b553c5d
--- /dev/null
+++ b/test/test_float.py
@@ -0,0 +1,50 @@
+import unittest
+from ibm_float import ieee2ibm, ibm2ieee
+
+
+class Ibm2Ieee(unittest.TestCase):
+
+    def test_zero(self):
+        self.assertEqual(ibm2ieee(b'\0\0\0\0'), 0.0)
+
+    def test_positive_half(self):
+        self.assertEqual(ibm2ieee((0b11000000, 0x80, 0x00, 0x00)), -0.5)
+
+    def test_negative_half(self):
+        self.assertEqual(ibm2ieee((0b01000000, 0x80, 0x00, 0x00)), 0.5)
+
+    def test_one(self):
+        self.assertEqual(ibm2ieee(b'\x41\x10\x00\x00'), 1.0)
+
+    def test_negative_118_625(self):
+        # Example taken from Wikipedia http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
+        self.assertEqual(ibm2ieee((0b11000010, 0b01110110, 0b10100000, 0b00000000)), -118.625)
+
+
+
+class Ieee2Ibm(unittest.TestCase):
+
+    def test_zero(self):
+        self.assertEqual(ieee2ibm(0.0), b'\0\0\0\0')
+
+    def test_positive_half(self):
+        self.assertEqual(ieee2ibm(-0.5), bytes((0b11000000, 0x80, 0x00, 0x00)))
+
+    def test_negative_half(self):
+        self.assertEqual(ieee2ibm(0.5), bytes((0b01000000, 0x80, 0x00, 0x00)))
+
+    def test_one(self):
+        self.assertEqual(ieee2ibm(1.0), b'\x41\x10\x00\x00')
+
+    def test_negative_118_625(self):
+        # Example taken from Wikipedia http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
+        self.assertEqual(ieee2ibm(-118.625), bytes((0b11000010, 0b01110110, 0b10100000, 0b00000000)))
+
+    def test_0_1(self):
+        # Note, this is different from the Wikipedia example, but I'll stick my neck out and say
+        # Wikipedia is wrong.
+        self.assertEqual(ieee2ibm(0.1), bytes((0b01000000, 0b00011001, 0b10011001, 0b10011001)))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/toolkit.py b/toolkit.py
index 8ecba83..d77729c 100644
--- a/toolkit.py
+++ b/toolkit.py
@@ -7,10 +7,11 @@ import struct
 from catalog import CatalogBuilder
 from datatypes import CTYPES, size_in_bytes
 from reel_header_definition import HEADER_DEF
-from ibm_float import ibm2ieee
+from ibm_float import ibm2ieee, ieee2ibm
 from revisions import canonicalize_revision
 from trace_header_definition import TRACE_HEADER_DEF
 from util import file_length
+from portability import EMPTY_BYTE_STRING
 
 
 REEL_HEADER_NUM_BYTES = 3600
@@ -208,16 +209,36 @@ def catalog_traces(fh, bps, endian='>', progress=None):
             cdp_catalog,
             line_catalog)
 
+def read_trace_header(fh, trace_header_format, pos=None):
+    """Read a trace header.
+
+    Args:
+        fh: A file-like-object open in binary mode.
+
+        trace_header_format: A Struct object, such as obtained from a
+            call to compile_trace_header_format()
+
+        pos: The file offset in bytes from the beginning from which the data
+            is to be read.
+
+    Returns:
+        A TraceHeader object.
+    """
+    if pos is not None:
+        fh.seek(pos)
+    data = fh.read(TRACE_HEADER_NUM_BYTES)
+    trace_header = TraceHeader._make(
+        trace_header_format.unpack(data))
+    return trace_header
 
 
-def read_binary_values(fh, pos, ctype='l', count=1, endian='>'):
+def read_binary_values(fh, pos=None, ctype='l', count=1, endian='>'):
     """Read a series of values from a binary file.
 
     Args:
         fh: A file-like-object open in binary mode.
 
-        pos: The file offset in bytes from the beginning from which the data
-            is to be read.
+c
 
         ctype: The SEG Y data type.
 
@@ -270,6 +291,9 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
         fmt: A format code (one of the values in the datatype.CTYPES
             dictionary)
 
+        endian: '>' for big-endian data (the standard and default), '<'
+            for little-endian (non-standard)
+
     Returns:
         A sequence of objects with type corresponding to the format code.
     """
@@ -283,6 +307,84 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
     # swapping ourselves.
 
 
+def write_trace_header(fh, trace_header, trace_header_format, pos=None):
+    """Write a TraceHeader to file.
+
+    Args:
+        fh: A file-like object open in binary mode for writing.
+
+        trace_header: A TraceHeader object.
+
+        trace_header_format: A Struct object, such as obtained from a
+            call to compile_trace_header_format()
+
+        pos: An optional file offset in bytes from the beginning of the
+            file. Defaults to the current file position.
+    """
+    if pos is not None:
+        fh.seek(pos, os.SEEK_SET)
+    buf = trace_header_format.pack(trace_header)
+    fh.write(buf)
+
+
+def write_trace_values(fh, values, ctype='l', pos=None):
+    write_binary_values(fh, values, ctype, pos)
+
+
+def write_binary_values(fh, values, ctype='l', pos=None):
+    """Write a series on values to a file.
+
+    Args:
+        fh: A file-like-object open for writing in binary mode.
+
+        values: An iterable series of values.
+
+        ctype: The SEG Y data type.
+
+        pos: An optional offset from the beginning of the file. If omitted,
+            any writing is done at the current file position.
+    """
+    fmt = CTYPES[ctype]
+
+    if pos is not None:
+        fh.seek(pos, os.SEEK_SET)
+
+    buf = (pack_ibm_floats(values)
+           if fmt == 'ibm'
+           else pack_values(values, fmt))
+
+    fh.write(buf)
+
+
+def pack_ibm_floats(values):
+    """Pack floats into binary-encoded big-endian single-precision IBM floats.
+
+    Args:
+        values: An iterable series of numeric values.
+
+    Returns:
+        A sequence of bytes. (Python 2 - a str object, Python 3 - a bytes
+            object)
+    """
+    return EMPTY_BYTE_STRING.join(ieee2ibm(value) for value in values)
+
+
+def pack_values(values, fmt, endian='>'):
+    """Pack values into binary encoded big-endian byte strings.
+
+    Args:
+        values: An iterable series of values.
+
+        fmt: A format code (one of the values in the datatype.CTYPES
+            dictionary)
+
+        endian: '>' for big-endian data (the standard and default), '<'
+            for little-endian (non-standard)
+    """
+    c_format = '{}{}{}'.format(endian, len(values), fmt)
+    return struct.pack(c_format, *values)
+
+
 _TraceAttributeSpec = namedtuple('Record', ['name', 'pos', 'type'])
 
 
diff --git a/util.py b/util.py
index 956e30b..ac74425 100644
--- a/util.py
+++ b/util.py
@@ -2,6 +2,8 @@ import itertools
 import time
 import os
 
+from portability import izip
+
 
 def pairwise(iterable):
     """Pairwise iteration.
@@ -14,7 +16,7 @@ def pairwise(iterable):
     """
     a, b = itertools.tee(iterable)
     next(b, None)
-    return itertools.izip(a, b)
+    return izip(a, b)
 
 
 def contains_duplicates(sorted_iterable):