Improved portability between Python 2 and Python 3. Beginnings of code for writing SEG Y.

This commit is contained in:
Robert Smallshire
2014-10-21 14:54:49 +02:00
parent 1abff28a40
commit ee228e2dc9
9 changed files with 274 additions and 23 deletions
+4 -4
View File
@@ -1,6 +1,6 @@
from collections import Mapping, Sequence, OrderedDict
from fractions import Fraction
import repr
from portability import reprlib
from util import contains_duplicates, measure_stride, minmax
@@ -261,7 +261,7 @@ class DictionaryCatalog(Mapping):
def __repr__(self):
return '{}({})'.format(
self.__class__.__name__, repr.repr(self._items.items()))
self.__class__.__name__, reprlib.repr(self._items.items()))
class RegularConstantCatalog(Mapping):
@@ -357,7 +357,7 @@ class ConstantCatalog(Mapping):
def __repr__(self):
return '{}({}, {})'.format(
self.__class__.__name__,
repr.repr(self._items),
reprlib.repr(self._items),
self._value)
@@ -424,7 +424,7 @@ class RegularCatalog(Mapping):
self._key_min,
self._key_max,
self._key_stride,
repr.repr(self._values))
reprlib.repr(self._values))
class LinearRegularCatalog(Mapping):
+1
View File
@@ -34,6 +34,7 @@ CTYPES = {'l': 'l',
'ibm': 'ibm'}
# TODO This is redundant with data in the SH_def below
CTYPE_DESCRIPTION = {'ibm': 'IBM float',
'l': '32 bit signed integer',
+75 -7
View File
@@ -1,6 +1,13 @@
import sys
from __future__ import print_function
_P24 = float(pow(2, 24))
import sys
from math import frexp, isnan, isinf
from portability import long_int, byte_string
_IBM_FLOAT32_BITS_PRECISION = 24
_L24 = long_int(2) ** _IBM_FLOAT32_BITS_PRECISION
_F24 = float(pow(2, _IBM_FLOAT32_BITS_PRECISION))
if sys.version_info >= (3, 0):
@@ -18,11 +25,12 @@ if sys.version_info >= (3, 0):
if a == b == c == c == 0:
return 0.0
sign = 1 if (a & 0x80) else -1
sign = -1 if (a & 0x80) else 1
exponent = a & 0x7f
mantissa = ((b << 16) | (c << 8) | d) / _P24
mantissa = ((b << 16) | (c << 8) | d) / _F24
value = sign * mantissa * pow(16, exponent - 64)
return value
else:
def ibm2ieee(big_endian_bytes):
@@ -42,8 +50,68 @@ else:
if a == b == c == c == 0:
return 0.0
sign = 1 if (a & 0x80) else -1
sign = -1 if (a & 0x80) else 1
exponent = a & 0x7f
mantissa = ((b << 16) | (c << 8) | d) / _P24
mantissa = ((b << 16) | (c << 8) | d) / _F24
value = sign * mantissa * pow(16, exponent - 64)
return value
return value
def ieee2ibm(f):
"""Covert a float to four big-endian bytes representing an IBM float.
Args:
f (float): The value to be converted.
Returns:
A bytes object (Python 3) or a string (Python 2) containing four
bytes representing a big-endian IBM float.
"""
if f == 0:
# There are many potential representations of zero - this is the standard one
return b'\x00\x00\x00\x00'
if isnan(f):
raise ValueError("NaN cannot be represented in IBM floating point")
if isinf(f):
raise ValueError("Infinities cannot be represented in IBM floating point")
# Now compute m and e to satisfy:
#
# f = m * 2^e
#
# where 0.5 <= abs(m) < 1
# except when f == 0 in which case m == 0 and e == 0, which we've already
# dealt with.
m, e = frexp(f)
# Convert the fraction (m) into an integer representation. IEEE float32
# numbers have 23 explicit (24 implicit) bits of precision.
mantissa = abs(long_int(m * _L24))
exponent = e
sign = 0x80 if f < 0 else 0x00
# IBM single precision floats are of the form
# (-1)^sign * 0.significand * 16^(exponent-64)
# Adjust the exponent, and the mantissa in sympathy so it is
# a multiple of four, so it can be expressed in base 16
remainder = exponent % 4
if remainder != 0:
shift = 4 - remainder
mantissa >>= shift
exponent += shift
exponent_16 = exponent >> 2 # Divide by four to convert to base 16
exponent_16_biased = exponent_16 + 64 # Add the exponent bias of 64
# TODO: I'm not entirely sure we're producing properly normalised representations.
a = sign | exponent_16_biased
b = (mantissa >> 16) & 0xff
c = (mantissa >> 8) & 0xff
d = mantissa & 0xff
return byte_string((a, b, c, d))
+31 -1
View File
@@ -1,4 +1,7 @@
import os
import sys
EMPTY_BYTE_STRING = b'' if sys.version_info >= (3, 0) else ''
def seekable(fh):
@@ -21,4 +24,31 @@ def seekable(fh):
fh.seek(pos)
except AttributeError:
return False
return True
return True
if sys.version_info >= (3, 0):
long_int = int
else:
long_int = long
if sys.version_info >= (3, 0):
def byte_string(integers):
return bytes(integers)
else:
def byte_string(integers):
return ''.join(chr(i) for i in integers)
if sys.version_info >= (3, 0):
import reprlib
reprlib = reprlib # Keep the static analyzer happy
else:
import repr as reprlib
if sys.version_info >= (3, 0):
izip = zip
else:
from itertools import izip
izip = izip # Keep the static analyzer happy
+3 -6
View File
@@ -6,10 +6,10 @@ from datatypes import DATA_SAMPLE_FORMAT, CTYPE_DESCRIPTION, CTYPES, size_in_byt
from toolkit import (extract_revision,
bytes_per_sample,
read_reel_header,
read_trace_header,
catalog_traces,
read_binary_values,
compile_trace_header_format,
TraceHeader,
REEL_HEADER_NUM_BYTES,
TRACE_HEADER_NUM_BYTES)
@@ -60,7 +60,7 @@ def create_reader(fh, endian='>', progress=None):
print(reader.num_traces())
"""
if fh.encoding is not None:
if hasattr(fh, 'encoding') and fh.encoding is not None:
raise TypeError(
"SegYReader must be provided with a binary mode file object")
@@ -217,10 +217,7 @@ class SegYReader(object):
if not (0 <= trace_index < self.num_traces()):
raise ValueError("Trace index {} out of range".format(trace_index))
pos = self._trace_offset_catalog[trace_index]
self._fh.seek(pos)
data = self._fh.read(TRACE_HEADER_NUM_BYTES)
trace_header = TraceHeader._make(
self._trace_header_format.unpack(data))
trace_header = read_trace_header(self._fh, pos)
return trace_header
@property
+1
View File
@@ -0,0 +1 @@
+50
View File
@@ -0,0 +1,50 @@
import unittest
from ibm_float import ieee2ibm, ibm2ieee
class Ibm2Ieee(unittest.TestCase):
def test_zero(self):
self.assertEqual(ibm2ieee(b'\0\0\0\0'), 0.0)
def test_positive_half(self):
self.assertEqual(ibm2ieee((0b11000000, 0x80, 0x00, 0x00)), -0.5)
def test_negative_half(self):
self.assertEqual(ibm2ieee((0b01000000, 0x80, 0x00, 0x00)), 0.5)
def test_one(self):
self.assertEqual(ibm2ieee(b'\x41\x10\x00\x00'), 1.0)
def test_negative_118_625(self):
# Example taken from Wikipedia http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
self.assertEqual(ibm2ieee((0b11000010, 0b01110110, 0b10100000, 0b00000000)), -118.625)
class Ieee2Ibm(unittest.TestCase):
def test_zero(self):
self.assertEqual(ieee2ibm(0.0), b'\0\0\0\0')
def test_positive_half(self):
self.assertEqual(ieee2ibm(-0.5), bytes((0b11000000, 0x80, 0x00, 0x00)))
def test_negative_half(self):
self.assertEqual(ieee2ibm(0.5), bytes((0b01000000, 0x80, 0x00, 0x00)))
def test_one(self):
self.assertEqual(ieee2ibm(1.0), b'\x41\x10\x00\x00')
def test_negative_118_625(self):
# Example taken from Wikipedia http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture
self.assertEqual(ieee2ibm(-118.625), bytes((0b11000010, 0b01110110, 0b10100000, 0b00000000)))
def test_0_1(self):
# Note, this is different from the Wikipedia example, but I'll stick my neck out and say
# Wikipedia is wrong.
self.assertEqual(ieee2ibm(0.1), bytes((0b01000000, 0b00011001, 0b10011001, 0b10011001)))
if __name__ == '__main__':
unittest.main()
+106 -4
View File
@@ -7,10 +7,11 @@ import struct
from catalog import CatalogBuilder
from datatypes import CTYPES, size_in_bytes
from reel_header_definition import HEADER_DEF
from ibm_float import ibm2ieee
from ibm_float import ibm2ieee, ieee2ibm
from revisions import canonicalize_revision
from trace_header_definition import TRACE_HEADER_DEF
from util import file_length
from portability import EMPTY_BYTE_STRING
REEL_HEADER_NUM_BYTES = 3600
@@ -208,16 +209,36 @@ def catalog_traces(fh, bps, endian='>', progress=None):
cdp_catalog,
line_catalog)
def read_trace_header(fh, trace_header_format, pos=None):
"""Read a trace header.
Args:
fh: A file-like-object open in binary mode.
trace_header_format: A Struct object, such as obtained from a
call to compile_trace_header_format()
pos: The file offset in bytes from the beginning from which the data
is to be read.
Returns:
A TraceHeader object.
"""
if pos is not None:
fh.seek(pos)
data = fh.read(TRACE_HEADER_NUM_BYTES)
trace_header = TraceHeader._make(
trace_header_format.unpack(data))
return trace_header
def read_binary_values(fh, pos, ctype='l', count=1, endian='>'):
def read_binary_values(fh, pos=None, ctype='l', count=1, endian='>'):
"""Read a series of values from a binary file.
Args:
fh: A file-like-object open in binary mode.
pos: The file offset in bytes from the beginning from which the data
is to be read.
c
ctype: The SEG Y data type.
@@ -270,6 +291,9 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
fmt: A format code (one of the values in the datatype.CTYPES
dictionary)
endian: '>' for big-endian data (the standard and default), '<'
for little-endian (non-standard)
Returns:
A sequence of objects with type corresponding to the format code.
"""
@@ -283,6 +307,84 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
# swapping ourselves.
def write_trace_header(fh, trace_header, trace_header_format, pos=None):
"""Write a TraceHeader to file.
Args:
fh: A file-like object open in binary mode for writing.
trace_header: A TraceHeader object.
trace_header_format: A Struct object, such as obtained from a
call to compile_trace_header_format()
pos: An optional file offset in bytes from the beginning of the
file. Defaults to the current file position.
"""
if pos is not None:
fh.seek(pos, os.SEEK_SET)
buf = trace_header_format.pack(trace_header)
fh.write(buf)
def write_trace_values(fh, values, ctype='l', pos=None):
write_binary_values(fh, values, ctype, pos)
def write_binary_values(fh, values, ctype='l', pos=None):
"""Write a series on values to a file.
Args:
fh: A file-like-object open for writing in binary mode.
values: An iterable series of values.
ctype: The SEG Y data type.
pos: An optional offset from the beginning of the file. If omitted,
any writing is done at the current file position.
"""
fmt = CTYPES[ctype]
if pos is not None:
fh.seek(pos, os.SEEK_SET)
buf = (pack_ibm_floats(values)
if fmt == 'ibm'
else pack_values(values, fmt))
fh.write(buf)
def pack_ibm_floats(values):
"""Pack floats into binary-encoded big-endian single-precision IBM floats.
Args:
values: An iterable series of numeric values.
Returns:
A sequence of bytes. (Python 2 - a str object, Python 3 - a bytes
object)
"""
return EMPTY_BYTE_STRING.join(ieee2ibm(value) for value in values)
def pack_values(values, fmt, endian='>'):
"""Pack values into binary encoded big-endian byte strings.
Args:
values: An iterable series of values.
fmt: A format code (one of the values in the datatype.CTYPES
dictionary)
endian: '>' for big-endian data (the standard and default), '<'
for little-endian (non-standard)
"""
c_format = '{}{}{}'.format(endian, len(values), fmt)
return struct.pack(c_format, *values)
_TraceAttributeSpec = namedtuple('Record', ['name', 'pos', 'type'])
+3 -1
View File
@@ -2,6 +2,8 @@ import itertools
import time
import os
from portability import izip
def pairwise(iterable):
"""Pairwise iteration.
@@ -14,7 +16,7 @@ def pairwise(iterable):
"""
a, b = itertools.tee(iterable)
next(b, None)
return itertools.izip(a, b)
return izip(a, b)
def contains_duplicates(sorted_iterable):