Improved portability between Python 2 and Python 3. Beginnings of code for writing SEG Y.

2026-06-27 17:17:13 +08:00 · 2014-10-21 14:54:49 +02:00
parent 1abff28a40
commit ee228e2dc9
9 changed files with 274 additions and 23 deletions
@@ -1,6 +1,6 @@
 from collections import Mapping, Sequence, OrderedDict
 from fractions import Fraction
-import repr
+from portability import reprlib
 from util import contains_duplicates, measure_stride, minmax


@@ -261,7 +261,7 @@ class DictionaryCatalog(Mapping):

    def __repr__(self):
        return '{}({})'.format(
-            self.__class__.__name__, repr.repr(self._items.items()))
+            self.__class__.__name__, reprlib.repr(self._items.items()))


 class RegularConstantCatalog(Mapping):
@@ -357,7 +357,7 @@ class ConstantCatalog(Mapping):
    def __repr__(self):
        return '{}({}, {})'.format(
            self.__class__.__name__,
-            repr.repr(self._items),
+            reprlib.repr(self._items),
            self._value)


@@ -424,7 +424,7 @@ class RegularCatalog(Mapping):
            self._key_min,
            self._key_max,
            self._key_stride,
-            repr.repr(self._values))
+            reprlib.repr(self._values))


 class LinearRegularCatalog(Mapping):
@@ -34,6 +34,7 @@ CTYPES = {'l': 'l',

          'ibm': 'ibm'}

+
 # TODO This is redundant with data in the SH_def below
 CTYPE_DESCRIPTION = {'ibm': 'IBM float',
                     'l':   '32 bit signed integer',
@@ -1,6 +1,13 @@
-import sys
+from __future__ import print_function

-_P24 = float(pow(2, 24))
+import sys
+from math import frexp, isnan, isinf
+from portability import long_int, byte_string
+
+
+_IBM_FLOAT32_BITS_PRECISION = 24
+_L24 = long_int(2) ** _IBM_FLOAT32_BITS_PRECISION
+_F24 = float(pow(2, _IBM_FLOAT32_BITS_PRECISION))

 if sys.version_info >= (3, 0):

@@ -18,11 +25,12 @@ if sys.version_info >= (3, 0):
        if a == b == c == c == 0:
            return 0.0

-        sign = 1 if (a & 0x80) else -1
+        sign = -1 if (a & 0x80) else 1
        exponent = a & 0x7f
-        mantissa = ((b << 16) | (c << 8) | d) / _P24
+        mantissa = ((b << 16) | (c << 8) | d) / _F24
        value = sign * mantissa * pow(16, exponent - 64)
        return value
+
 else:

    def ibm2ieee(big_endian_bytes):
@@ -42,8 +50,68 @@ else:
        if a == b == c == c == 0:
            return 0.0

-        sign = 1 if (a & 0x80) else -1
+        sign = -1 if (a & 0x80) else 1
        exponent = a & 0x7f
-        mantissa = ((b << 16) | (c << 8) | d) / _P24
+        mantissa = ((b << 16) | (c << 8) | d) / _F24
        value = sign * mantissa * pow(16, exponent - 64)
-        return value
+        return value
+
+
+def ieee2ibm(f):
+    """Covert a float to four big-endian bytes representing an IBM float.
+
+    Args:
+        f (float): The value to be converted.
+
+    Returns:
+        A bytes object (Python 3) or a string (Python 2) containing four
+        bytes representing a big-endian IBM float.
+    """
+    if f == 0:
+        # There are many potential representations of zero - this is the standard one
+        return b'\x00\x00\x00\x00'
+
+    if isnan(f):
+        raise ValueError("NaN cannot be represented in IBM floating point")
+
+    if isinf(f):
+        raise ValueError("Infinities cannot be represented in IBM floating point")
+
+    # Now compute m and e to satisfy:
+    #
+    #  f = m * 2^e
+    #
+    # where 0.5 <= abs(m) < 1
+    # except when f == 0 in which case m == 0 and e == 0, which we've already
+    # dealt with.
+    m, e = frexp(f)
+
+    # Convert the fraction (m) into an integer representation. IEEE float32
+    # numbers have 23 explicit (24 implicit) bits of precision.
+
+    mantissa = abs(long_int(m * _L24))
+    exponent = e
+    sign = 0x80 if f < 0 else 0x00
+
+    # IBM single precision floats are of the form
+    # (-1)^sign * 0.significand * 16^(exponent-64)
+
+    # Adjust the exponent, and the mantissa in sympathy so it is
+    # a multiple of four, so it can be expressed in base 16
+    remainder = exponent % 4
+    if remainder != 0:
+        shift = 4 - remainder
+        mantissa >>= shift
+        exponent += shift
+
+    exponent_16 = exponent >> 2            # Divide by four to convert to base 16
+    exponent_16_biased = exponent_16 + 64  # Add the exponent bias of 64
+
+    # TODO: I'm not entirely sure we're producing properly normalised representations.
+
+    a = sign | exponent_16_biased
+    b = (mantissa >> 16) & 0xff
+    c = (mantissa >> 8) & 0xff
+    d = mantissa & 0xff
+
+    return byte_string((a, b, c, d))
@@ -1,4 +1,7 @@
 import os
+import sys
+
+EMPTY_BYTE_STRING = b'' if sys.version_info >= (3, 0) else ''


 def seekable(fh):
@@ -21,4 +24,31 @@ def seekable(fh):
                fh.seek(pos)
        except AttributeError:
            return False
-    return True
+    return True
+
+
+if sys.version_info >= (3, 0):
+    long_int = int
+else:
+    long_int = long
+
+
+if sys.version_info >= (3, 0):
+    def byte_string(integers):
+        return bytes(integers)
+else:
+    def byte_string(integers):
+        return ''.join(chr(i) for i in integers)
+
+
+if sys.version_info >= (3, 0):
+    import reprlib
+    reprlib = reprlib  # Keep the static analyzer happy
+else:
+    import repr as reprlib
+
+if sys.version_info >= (3, 0):
+    izip = zip
+else:
+    from itertools import izip
+    izip = izip    # Keep the static analyzer happy
@@ -6,10 +6,10 @@ from datatypes import DATA_SAMPLE_FORMAT, CTYPE_DESCRIPTION, CTYPES, size_in_byt
 from toolkit import (extract_revision,
                     bytes_per_sample,
                     read_reel_header,
+                     read_trace_header,
                     catalog_traces,
                     read_binary_values,
                     compile_trace_header_format,
-                     TraceHeader,
                     REEL_HEADER_NUM_BYTES,
                     TRACE_HEADER_NUM_BYTES)

@@ -60,7 +60,7 @@ def create_reader(fh, endian='>', progress=None):
            print(reader.num_traces())

    """
-    if fh.encoding is not None:
+    if hasattr(fh, 'encoding') and fh.encoding is not None:
        raise TypeError(
            "SegYReader must be provided with a binary mode file object")

@@ -217,10 +217,7 @@ class SegYReader(object):
        if not (0 <= trace_index < self.num_traces()):
            raise ValueError("Trace index {} out of range".format(trace_index))
        pos = self._trace_offset_catalog[trace_index]
-        self._fh.seek(pos)
-        data = self._fh.read(TRACE_HEADER_NUM_BYTES)
-        trace_header = TraceHeader._make(
-            self._trace_header_format.unpack(data))
+        trace_header = read_trace_header(self._fh, pos)
        return trace_header

    @property
@@ -0,0 +1 @@
+
@@ -0,0 +1,50 @@
+import unittest
+from ibm_float import ieee2ibm, ibm2ieee
+
+
+class Ibm2Ieee(unittest.TestCase):
+
+    def test_zero(self):
+        self.assertEqual(ibm2ieee(b'\0\0\0\0'), 0.0)
+
+    def test_positive_half(self):
+        self.assertEqual(ibm2ieee((0b11000000, 0x80, 0x00, 0x00)), -0.5)
+
+    def test_negative_half(self):
+        self.assertEqual(ibm2ieee((0b01000000, 0x80, 0x00, 0x00)), 0.5)
+
+    def test_one(self):
+        self.assertEqual(ibm2ieee(b'\x41\x10\x00\x00'), 1.0)
+
+    def test_negative_118_625(self):
+        # Example taken from Wikipedia http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
+        self.assertEqual(ibm2ieee((0b11000010, 0b01110110, 0b10100000, 0b00000000)), -118.625)
+
+
+
+class Ieee2Ibm(unittest.TestCase):
+
+    def test_zero(self):
+        self.assertEqual(ieee2ibm(0.0), b'\0\0\0\0')
+
+    def test_positive_half(self):
+        self.assertEqual(ieee2ibm(-0.5), bytes((0b11000000, 0x80, 0x00, 0x00)))
+
+    def test_negative_half(self):
+        self.assertEqual(ieee2ibm(0.5), bytes((0b01000000, 0x80, 0x00, 0x00)))
+
+    def test_one(self):
+        self.assertEqual(ieee2ibm(1.0), b'\x41\x10\x00\x00')
+
+    def test_negative_118_625(self):
+        # Example taken from Wikipedia http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
+        self.assertEqual(ieee2ibm(-118.625), bytes((0b11000010, 0b01110110, 0b10100000, 0b00000000)))
+
+    def test_0_1(self):
+        # Note, this is different from the Wikipedia example, but I'll stick my neck out and say
+        # Wikipedia is wrong.
+        self.assertEqual(ieee2ibm(0.1), bytes((0b01000000, 0b00011001, 0b10011001, 0b10011001)))
+
+
+if __name__ == '__main__':
+    unittest.main()
@@ -7,10 +7,11 @@ import struct
 from catalog import CatalogBuilder
 from datatypes import CTYPES, size_in_bytes
 from reel_header_definition import HEADER_DEF
-from ibm_float import ibm2ieee
+from ibm_float import ibm2ieee, ieee2ibm
 from revisions import canonicalize_revision
 from trace_header_definition import TRACE_HEADER_DEF
 from util import file_length
+from portability import EMPTY_BYTE_STRING


 REEL_HEADER_NUM_BYTES = 3600
@@ -208,16 +209,36 @@ def catalog_traces(fh, bps, endian='>', progress=None):
            cdp_catalog,
            line_catalog)

+def read_trace_header(fh, trace_header_format, pos=None):
+    """Read a trace header.
+
+    Args:
+        fh: A file-like-object open in binary mode.
+
+        trace_header_format: A Struct object, such as obtained from a
+            call to compile_trace_header_format()
+
+        pos: The file offset in bytes from the beginning from which the data
+            is to be read.
+
+    Returns:
+        A TraceHeader object.
+    """
+    if pos is not None:
+        fh.seek(pos)
+    data = fh.read(TRACE_HEADER_NUM_BYTES)
+    trace_header = TraceHeader._make(
+        trace_header_format.unpack(data))
+    return trace_header


-def read_binary_values(fh, pos, ctype='l', count=1, endian='>'):
+def read_binary_values(fh, pos=None, ctype='l', count=1, endian='>'):
    """Read a series of values from a binary file.

    Args:
        fh: A file-like-object open in binary mode.

-        pos: The file offset in bytes from the beginning from which the data
-            is to be read.
+c

        ctype: The SEG Y data type.

@@ -270,6 +291,9 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
        fmt: A format code (one of the values in the datatype.CTYPES
            dictionary)

+        endian: '>' for big-endian data (the standard and default), '<'
+            for little-endian (non-standard)
+
    Returns:
        A sequence of objects with type corresponding to the format code.
    """
@@ -283,6 +307,84 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
    # swapping ourselves.


+def write_trace_header(fh, trace_header, trace_header_format, pos=None):
+    """Write a TraceHeader to file.
+
+    Args:
+        fh: A file-like object open in binary mode for writing.
+
+        trace_header: A TraceHeader object.
+
+        trace_header_format: A Struct object, such as obtained from a
+            call to compile_trace_header_format()
+
+        pos: An optional file offset in bytes from the beginning of the
+            file. Defaults to the current file position.
+    """
+    if pos is not None:
+        fh.seek(pos, os.SEEK_SET)
+    buf = trace_header_format.pack(trace_header)
+    fh.write(buf)
+
+
+def write_trace_values(fh, values, ctype='l', pos=None):
+    write_binary_values(fh, values, ctype, pos)
+
+
+def write_binary_values(fh, values, ctype='l', pos=None):
+    """Write a series on values to a file.
+
+    Args:
+        fh: A file-like-object open for writing in binary mode.
+
+        values: An iterable series of values.
+
+        ctype: The SEG Y data type.
+
+        pos: An optional offset from the beginning of the file. If omitted,
+            any writing is done at the current file position.
+    """
+    fmt = CTYPES[ctype]
+
+    if pos is not None:
+        fh.seek(pos, os.SEEK_SET)
+
+    buf = (pack_ibm_floats(values)
+           if fmt == 'ibm'
+           else pack_values(values, fmt))
+
+    fh.write(buf)
+
+
+def pack_ibm_floats(values):
+    """Pack floats into binary-encoded big-endian single-precision IBM floats.
+
+    Args:
+        values: An iterable series of numeric values.
+
+    Returns:
+        A sequence of bytes. (Python 2 - a str object, Python 3 - a bytes
+            object)
+    """
+    return EMPTY_BYTE_STRING.join(ieee2ibm(value) for value in values)
+
+
+def pack_values(values, fmt, endian='>'):
+    """Pack values into binary encoded big-endian byte strings.
+
+    Args:
+        values: An iterable series of values.
+
+        fmt: A format code (one of the values in the datatype.CTYPES
+            dictionary)
+
+        endian: '>' for big-endian data (the standard and default), '<'
+            for little-endian (non-standard)
+    """
+    c_format = '{}{}{}'.format(endian, len(values), fmt)
+    return struct.pack(c_format, *values)
+
+
 _TraceAttributeSpec = namedtuple('Record', ['name', 'pos', 'type'])


@@ -2,6 +2,8 @@ import itertools
 import time
 import os

+from portability import izip
+

 def pairwise(iterable):
    """Pairwise iteration.
@@ -14,7 +16,7 @@ def pairwise(iterable):
    """
    a, b = itertools.tee(iterable)
    next(b, None)
-    return itertools.izip(a, b)
+    return izip(a, b)


 def contains_duplicates(sorted_iterable):