Merge branch 'ext-numpy'

This commit is contained in:
Robert Smallshire
2015-06-14 17:38:42 +02:00
12 changed files with 814 additions and 9 deletions
@@ -0,0 +1,114 @@
#!/usr/bin/env python3
"""Extract a single field from the trace headers of 3D seismic volume to a 2D Numpy array.
This utility assumes the inline and crossline numbers are evenly spaced.
Each inline of the source data will be represented as a single row, and
each crossline as a single column in the resulting 2D array.
Usage: extract_trace_header_field.py [-h] [--null NULL]
segy-file npy-file field-name
Positional arguments:
segy-file Path to an existing SEG Y file of 3D seismic data
npy-file Path to the Numpy array file to be created for the timeslice
field-name Zero based index of the time slice to be extracted
Optional arguments:
-h, --help show this help message and exit
--dtype DTYPE Numpy data type. If not provided a dtype compatible with the
SEG Y data will be used.
--null NULL Sample value to use for missing or short traces.
Example:
extract_trace_header_field.py stack_final_int8.sgy slice_800.npy ensemble_num
"""
import argparse
import os
import sys
import traceback
import numpy as np
from numpy import s_
from segpy.reader import create_reader
from segpy.trace_header import TraceHeaderRev1
from segpy_numpy.dtypes import make_dtype
from segpy_numpy.extract import extract_inline_3d, extract_trace_header_field_3d
class DimensionalityError(Exception):
pass
def extract_header_field(segy_filename, out_filename, field_name, null=None):
"""Extract a timeslice from a 3D SEG Y file to a Numpy NPY file.
Args:
segy_filename: Filename of a SEG Y file.
out_filename: Filename of the NPY file.
inline_index: The zero-based index (increasing with depth) of the slice to be extracted.
null: Optional sample value to use for missing or short traces. Defaults to zero.
"""
header_field = getattr(TraceHeaderRev1, field_name)
with open(segy_filename, 'rb') as segy_file:
segy_reader = create_reader(segy_file)
header_field_arrays = extract_trace_header_field_3d(segy_reader, [header_field], null)
return header_field_arrays
def nullable_int(s):
return None if not bool(s) else int(s)
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument("segy_file", metavar="segy-file",
help="Path to an existing SEG Y file of 3D seismic data")
parser.add_argument("npy_file", metavar="npy-file",
help="Path to the Numpy array file to be created for the timeslice")
parser.add_argument("field_name", metavar="field-name", type=str,
help="Name of the trace header field to be extracted", )
parser.add_argument("--null", type=nullable_int, default="",
help="Header value to use for missing or short traces.")
if argv is None:
argv = sys.argv[1:]
args = parser.parse_args(argv)
try:
extract_header_field(
segy_filename=args.segy_file,
out_filename=args.npy_file,
field_name=args.field_name,
null=args.null)
except (FileNotFoundError, IsADirectoryError) as e:
print(e, file=sys.stderr)
return os.EX_NOINPUT
except PermissionError as e:
print(e, file=sys.stderr)
return os.EX_NOPERM
except Exception as e:
traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
return os.EX_SOFTWARE
return os.EX_OK
if __name__ == '__main__':
sys.exit(main())
+108
View File
@@ -0,0 +1,108 @@
#!/usr/bin/env python3
"""Extract an inline from a 3D seismic volume to a Numpy array.
This utility assumes the inline and crossline numbers are evenly spaced.
Each inline of the source data will be represented as a single row, and
each crossline as a single column in the resulting 2D array.
Usage: inline.py [-h] [--dtype DTYPE] [--null NULL]
segy-file npy-file inline-number
Positional arguments:
segy-file Path to an existing SEG Y file of 3D seismic data
npy-file Path to the Numpy array file to be created for the timeslice
slice-index Zero based index of the time slice to be extracted
Optional arguments:
-h, --help show this help message and exit
--dtype DTYPE Numpy data type. If not provided a dtype compatible with the
SEG Y data will be used.
--null NULL Sample value to use for missing or short traces.
Example:
inline.py stack_final_int8.sgy slice_800.npy 800 --null=42.0 --dtype=f
"""
import argparse
import os
import sys
import traceback
import numpy as np
from numpy import s_
from segpy.reader import create_reader
from segpy_numpy.dtypes import make_dtype
from segpy_numpy.extract import extract_inline_3d
class DimensionalityError(Exception):
pass
def extract_inline(segy_filename, out_filename, inline_number, null=None):
"""Extract a timeslice from a 3D SEG Y file to a Numpy NPY file.
Args:
segy_filename: Filename of a SEG Y file.
out_filename: Filename of the NPY file.
inline_index: The zero-based index (increasing with depth) of the slice to be extracted.
null: Optional sample value to use for missing or short traces. Defaults to zero.
"""
with open(segy_filename, 'rb') as segy_file:
segy_reader = create_reader(segy_file)
inline_array = extract_inline_3d(segy_reader, inline_number, null=null)
return inline_array
def nullable_float(s):
return None if not bool(s) else float(s)
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument("segy_file", metavar="segy-file",
help="Path to an existing SEG Y file of 3D seismic data")
parser.add_argument("npy_file", metavar="npy-file",
help="Path to the Numpy array file to be created for the timeslice")
parser.add_argument("inline_number", metavar="inline-number", type=int,
help="Zero based index of the inline to be extracted", )
parser.add_argument("--null", type=nullable_float, default="",
help="Sample value to use for missing or short traces.")
if argv is None:
argv = sys.argv[1:]
args = parser.parse_args(argv)
try:
extract_inline(
segy_filename=args.segy_file,
out_filename=args.npy_file,
inline_number=args.inline_number,
null=args.null)
except (FileNotFoundError, IsADirectoryError) as e:
print(e, file=sys.stderr)
return os.EX_NOINPUT
except PermissionError as e:
print(e, file=sys.stderr)
return os.EX_NOPERM
except Exception as e:
traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
return os.EX_SOFTWARE
return os.EX_OK
if __name__ == '__main__':
sys.exit(main())
+1 -1
View File
@@ -9,7 +9,7 @@ NUMPY_DTYPES = {'ibm': numpy.dtype('f4'),
'int8': numpy.dtype('i1')}
def make_dtype(data_sample_format):
def make_dtype(data_sample_format): # TODO: What is the correct name for this arg?
"""Convert a SEG Y data sample format to a compatible numpy dtype.
Note :
@@ -0,0 +1,402 @@
"""Tools for interoperability between Segpy and Numpy arrays."""
from collections import namedtuple
import numpy as np
from segpy.header import Header, SubFormatMeta
from segpy.packer import make_header_packer
from segpy.util import ensure_superset
from segpy_numpy.dtypes import make_dtype
def extract_trace_header_field_3d(reader_3d, fields, inline_numbers=None, xline_numbers=None, null=None):
"""Extract a single trace header field from all trace headers as an array.
Args:
reader_3d: A SegYReader3D
fields: A an iterable series where each item is either the name of a field as a string
or an object such as a NamedField with a 'name' attribute which in turn is the name
of a field as a string, such as a NamedField.
inline_numbers: The inline numbers for which traces are to be extracted.
This argument can be specified in three ways:
None (the default) - All traces within the each crossline will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those traces at
inline numbers corresponding to the items in the sequence will be extracted. The
traces will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate
traces from inline number 100 to inline number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
inline numbers. For example inline_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred traces, irrespective of their numbers.
xline_numbers: The crossline numbers at which traces are to be extracted.
This argument can be specified in three ways:
None (the default) - All traces at within each inline will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those traces at
crossline numbers corresponding to the items in the sequence will be extracted. The
traces will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate
traces from crossline number 100 to crossline number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
crossline numbers. For example xline_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred traces, irrespective of their numbers.
null: An optional null value for missing traces. The null value must be convertible
to all field value types.
Returns:
An namedtuple object with attributes which are two-dimensional Numpy arrays.
If a null value was specified the arrays will be ndarrays, otherwise they
will be masked arrays. The attributes of the named tuple are in the same
order as the fields specified in the `fields` argument.
Raises:
AttributeError: If the the named fields do not exist in the trace header definition.
"""
field_names = [_extract_field_name(field) for field in fields]
inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers)
xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers)
shape = (len(inline_numbers), len(xline_numbers))
class SubHeader(metaclass=SubFormatMeta,
parent_format=reader_3d.trace_header_format_class,
parent_field_names=field_names):
pass
sub_header_packer = make_header_packer(SubHeader, reader_3d.endian)
TraceHeaderArrays = namedtuple('TraceHeaderArrays', field_names)
arrays = (_make_array(shape,
make_dtype(getattr(SubHeader, field_name).value_type.SEG_Y_TYPE),
null)
for field_name in field_names)
trace_header_arrays = TraceHeaderArrays(*arrays)
for inline_index, inline_number in enumerate(inline_numbers):
for xline_index, xline_number in enumerate(xline_numbers):
inline_xline_number = (inline_number, xline_number)
if reader_3d.has_trace_index(inline_xline_number):
trace_index = reader_3d.trace_index((inline_number, xline_number))
trace_header = reader_3d.trace_header(trace_index, sub_header_packer)
for field_name, a in zip(field_names, trace_header_arrays):
field_value = getattr(trace_header, field_name)
a[inline_index, xline_index] = field_value
return trace_header_arrays
def extract_trace(reader, trace_index, sample_numbers):
"""Extract an single trace as a one-dimensional array.
Args:
reader: A SegYReader3D object.
trace_index: The index of the trace to be extracted.
sample_numbers: The sample numbers within each trace at which samples are to be extracted.
This argument can be specified in three ways:
None (the default) - All samples within the trace will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those samples at
sample numbers corresponding to the items in the sequence will be extracted. The
samples will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate
samples from sample number 100 to sample number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
sample numbers. For example sample_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred samples, irrespective of their numbers.
Returns:
A one-dimensional array.
"""
if not reader.has_trace_index(trace_index):
raise ValueError("Inline number {} not present in {}".format(trace_index, reader))
sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers)
trace_sample_start = sample_numbers[0]
trace_sample_stop = min(sample_numbers[-1] + 1, reader.num_trace_samples(trace_index))
trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop)
arr = np.fromiter((trace_samples[sample_number - trace_sample_start] for sample_number in sample_numbers),
make_dtype(reader.data_sample_format))
return arr
def extract_inline_3d(reader_3d, inline_number, xline_numbers=None, sample_numbers=None, null=None):
"""Extract an inline as a two-dimensional array.
Args:
reader: A SegYReader3D object.
inline_number: The number of the inline to be extracted.
xline_numbers: The crossline numbers within the inline at which traces are to be extracted.
This argument can be specified in three ways:
None (the default) - All traces within the inline will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those traces at
crossline numbers corresponding to the items in the sequence will be extracted. The
traces will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate
traces from crossline number 100 to crossline number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
crossline numbers. For example xline_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred traces, irrespective of their numbers.
sample_numbers: The sample numbers within each trace at which samples are to be extracted.
This argument can be specified in three ways:
None (the default) - All samples within the trace will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those samples at
sample numbers corresponding to the items in the sequence will be extracted. The
samples will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate
samples from sample number 100 to sample number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
sample numbers. For example sample_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred samples, irrespective of their numbers.
null: A null value. When None is specified as the null value a masked array will be returned.
Returns:
A two-dimensional array. If null is None a masked array will be returned, otherwise
a regular array will be returned. The first (slowest changing) index will correspond
to the traces (index zero will correspond to the first crossline number). The
second (fastest changing) index will correspond to the samples (index zero will
correspond to the first sample number).
"""
if inline_number not in reader_3d.inline_numbers():
raise ValueError("Inline number {} not present in {}".format(inline_number, reader_3d))
xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers)
sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers)
shape = (len(xline_numbers), len(sample_numbers))
dtype = make_dtype(reader_3d.data_sample_format)
array = _make_array(shape, dtype, null)
if isinstance(sample_numbers, range):
_populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array)
else:
_populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array)
return array
def _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array):
for xline_index, xline_number in enumerate(xline_numbers):
inline_xline_number = (inline_number, xline_number)
if reader_3d.has_trace_index(inline_xline_number):
trace_index = reader_3d.trace_index(inline_xline_number)
num_trace_samples = reader_3d.num_trace_samples(trace_index)
trace_sample_start = sample_numbers[0]
trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples)
trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop)
for sample_index, sample_number in enumerate(sample_numbers):
array[xline_index, sample_index] = trace_samples[sample_number - trace_sample_start]
def _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array):
for xline_index, xline_number in enumerate(xline_numbers):
inline_xline_number = (inline_number, xline_number)
if reader_3d.has_trace_index(inline_xline_number):
trace_index = reader_3d.trace_index(inline_xline_number)
num_trace_samples = reader_3d.num_trace_samples(trace_index)
trace_sample_stop = min(sample_numbers.stop, num_trace_samples)
trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop)
source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step)
array[xline_index, :] = trace_samples[source_slice]
def extract_xline_3d(reader_3d, xline_number, inline_numbers=None, sample_numbers=None, null=None):
"""Extract an inline as a two-dimensional array.
Args:
reader: A SegYReader3D object.
xline_number: The number of the xline to be extracted.
inline_numbers: The inline numbers within the crossline at which traces are to be extracted.
This argument can be specified in three ways:
None (the default) - All traces within the crossline will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those traces at
inline numbers corresponding to the items in the sequence will be extracted. The
traces will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate
traces from inline number 100 to inline number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
inline numbers. For example inline_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred traces, irrespective of their numbers.
sample_numbers: The sample numbers within each trace at which samples are to be extracted.
This argument can be specified in three ways:
None (the default) - All samples within the trace will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those samples at
sample numbers corresponding to the items in the sequence will be extracted. The
samples will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate
samples from sample number 100 to sample number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
sample numbers. For example sample_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred samples, irrespective of their numbers.
null: A null value. When None is specified as the null value a masked array will be returned.
Returns:
A two-dimensional array. If null is None a masked array will be returned, otherwise
a regular array will be returned. The first (slowest changing) index will correspond
to the traces (index zero will correspond to the first inline number). The
second (fastest changing) index will correspond to the samples (index zero will
correspond to the first sample number).
"""
if xline_number not in reader_3d.xline_numbers():
raise ValueError("Crossline number {} not present in {}".format(xline_number, reader_3d))
inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers)
sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers)
shape = (len(inline_numbers), len(sample_numbers))
dtype = make_dtype(reader_3d.data_sample_format)
array = _make_array(shape, dtype, null)
if isinstance(sample_numbers, range):
_populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array)
else:
_populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array)
return array
def _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array):
for inline_index, inline_number in enumerate(inline_numbers):
inline_xline_number = (inline_number, xline_number)
if reader_3d.has_trace_index(inline_xline_number):
trace_index = reader_3d.trace_index(inline_xline_number)
num_trace_samples = reader_3d.num_trace_samples(trace_index)
trace_sample_start = sample_numbers[0]
trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples)
trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop)
for sample_index, sample_number in enumerate(sample_numbers):
array[inline_index, sample_index] = trace_samples[sample_number - trace_sample_start]
def _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array):
for inline_index, inline_number in enumerate(inline_numbers):
inline_xline_number = (inline_number, xline_number)
if reader_3d.has_trace_index(inline_xline_number):
trace_index = reader_3d.trace_index(inline_xline_number)
num_trace_samples = reader_3d.num_trace_samples(trace_index)
trace_sample_stop = min(sample_numbers.stop, num_trace_samples)
trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop)
source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step)
array[inline_index, :] = trace_samples[source_slice]
def extract_timeslice_3d(reader_3d, sample_number, inline_numbers=None, xline_numbers=None, null=None):
"""Extract a single timeslice header field from all trace headers as an array.
Args:
reader_3d: A SegYReader3D
sample_number: The zero-based sample index.
inline_numbers: The inline numbers for which traces are to be extracted.
This argument can be specified in three ways:
None (the default) - All traces within the each crossline will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those traces at
inline numbers corresponding to the items in the sequence will be extracted. The
traces will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate
traces from inline number 100 to inline number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
inline numbers. For example inline_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred traces, irrespective of their numbers.
xline_numbers: The crossline numbers at which traces are to be extracted.
This argument can be specified in three ways:
None (the default) - All traces at within each inline will be be extracted.
sequence - When a sequence, such as a range or a list is provided only those traces at
crossline numbers corresponding to the items in the sequence will be extracted. The
traces will always be extracted in increasing numeric order and duplicate entries
will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate
traces from crossline number 100 to crossline number 198 inclusive.
slice - When a slice object is provided the slice will be applied to the sequence of all
crossline numbers. For example xline_numbers=slice(100, -100) will omit the first
one hundred and the last one hundred traces, irrespective of their numbers.
null: An optional null value for missing traces. The null value must be convertible
to all field value types.
Returns:
An namedtuple object with attributes which are two-dimensional Numpy arrays.
If a null value was specified the arrays will be ndarrays, otherwise they
will be masked arrays. The attributes of the named tuple are in the same
order as the fields specified in the `fields` argument.
Raises:
AttributeError: If the the named fields do not exist in the trace header definition.
"""
inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers)
xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers)
shape = (len(inline_numbers), len(xline_numbers))
dtype = make_dtype(reader_3d.data_sample_format)
array = _make_array(shape, dtype, null)
sample_number_stop = sample_number + 1
for inline_index, inline_number in enumerate(inline_numbers):
for xline_index, xline_number in enumerate(xline_numbers):
inline_xline_number = (inline_number, xline_number)
if reader_3d.has_trace_index(inline_xline_number):
trace_index = reader_3d.trace_index((inline_number, xline_number))
trace_samples = reader_3d.trace_samples(trace_index, sample_number, sample_number_stop)
array[inline_index, xline_index] = trace_samples[0]
return array
def _make_array(shape, dtype, null=None):
"""Make an array"""
if null is None:
return np.ma.masked_all(shape, dtype)
array = np.empty(shape, dtype)
array.fill(null)
return array
def _extract_field_name(field):
"""Args:
field: If field in an object with a name attribute the name is returned. If field is a string it is returned
unmodified.
Raises:
TypeError:
"""
if isinstance(field, str):
return field
try:
return field.name
except AttributeError:
raise TypeError("{!r} neither is a string nor has a 'name' attribute".format(field))
+1
View File
@@ -0,0 +1 @@
@@ -0,0 +1,10 @@
import unittest
class MyTestCase(unittest.TestCase):
def test_something(self):
self.assertEqual(True, False)
if __name__ == '__main__':
unittest.main()
+2 -1
View File
@@ -511,7 +511,8 @@ class LinearRegularCatalog(Mapping):
key_max: The maximum key.
key_stride: The difference between successive keys.
value_start: The value corresponding to the minimum key.
value_max: The value corresponding to the maximum key.
value_stop: The value corresponding to the maximum key.
value_stride:
Raises:
ValueError: There is any inconsistency in the keys, strides,
+59 -2
View File
@@ -4,7 +4,7 @@ from itertools import chain
from segpy import __version__
from segpy.docstring import docstring_property
from segpy.util import underscores_to_camelcase, first_sentence, super_class
from segpy.util import underscores_to_camelcase, first_sentence, super_class, collect_attributes
class Header:
@@ -96,7 +96,7 @@ class FormatMeta(type):
"""
@classmethod
def __prepare__(mcs, name, bases):
def __prepare__(mcs, name, bases, *args, **kwargs):
return OrderedDict()
def __new__(mcs, name, bases, namespace):
@@ -134,6 +134,63 @@ class FormatMeta(type):
return super().__new__(mcs, name, bases, namespace)
def is_public_non_field_attr(name, attr):
return (not name.startswith('_')) and (not isinstance(attr, HeaderFieldDescriptor) and (not isinstance(attr, classmethod)))
class SubFormatMeta(FormatMeta):
"""A metaclass for a format class which has a subset of the fields in an existing format class.
SubFormat classes can be used to reduce storage requirements and increase performance, since they can be
used to generate simpler HeaderPackers.
SubFormat classes must be declared as:
class MySubFormat(metaclass=SubFormatMeta,
format_class=MyFormatClass,
field_names=['first_field_name',
'second_field_name'])
pass
"""
def __new__(mcs, name, bases, namespace, parent_format, parent_field_names):
"""
Args:
name: The name of the actual class being created by this metaclass.
bases: The base classes of the actual class.
parent_format: An existing Format (?Header) on which (sort out terminology here)
of which this format has a subset of fields.
field_names: An iterable series of field names which this format should
duplicate from the parent_format.
"""
# TODO: We need an inheritance aware getattr for class attributes!
# Copy the requested fields, by creating a new descriptor based
# on information retrieved from the existing descriptor
for field_name in parent_field_names:
named_field = getattr(parent_format, field_name)
assert named_field.name == field_name
field_copy = field(named_field.value_type,
named_field.offset,
named_field.default,
named_field.documentation)
namespace[field_name] = field_copy
# Copy other non-field class attributes
non_field_attributes = list(collect_attributes(parent_format, Header, is_public_non_field_attr))
namespace.update((name, value) for _, name, value in non_field_attributes)
# Add a reference back to the original format
namespace['_parent_format'] = parent_format
return super().__new__(mcs, name, bases, namespace)
def __init__(mcs, name, bases, namespace, parent_format, parent_field_names):
# Absorb the additional arguments
super().__init__(name, bases, namespace)
class NamedField:
"""Instances of NamedField can be detected by the NamedDescriptorResolver metaclass."""
+20 -3
View File
@@ -292,6 +292,7 @@ class SegYReader(object):
self._revision = extract_revision(self._binary_reel_header)
self._bytes_per_sample = bytes_per_sample(
self._binary_reel_header, self.revision)
self._max_num_trace_samples = None
def __getstate__(self):
"""Copy the reader's state to a pickleable dictionary.
@@ -311,6 +312,8 @@ class SegYReader(object):
file_pos = self._fh.tell()
file_mode = self._fh.mode
_ = self.max_num_trace_samples()
state = self.__dict__.copy()
state['__version__'] = __version__
state['_file_name'] = filename
@@ -363,7 +366,9 @@ class SegYReader(object):
def max_num_trace_samples(self):
"""The number of samples in the trace_samples with the most samples."""
return max(self._trace_length_catalog.values())
if self._max_num_trace_samples is None:
self._max_num_trace_samples = max(self._trace_length_catalog.values())
return self._max_num_trace_samples
def num_trace_samples(self, trace_index):
"""The number of samples in the specified trace_samples."""
@@ -416,12 +421,15 @@ class SegYReader(object):
self._fh, start_pos, seg_y_type, num_samples_to_read, self._endian)
return trace_values
def trace_header(self, trace_index):
def trace_header(self, trace_index, header_packer_override=None):
"""Read a specific trace_samples.
Args:
trace_index: An integer in the range zero to num_traces() - 1
header_packer_override: Override the default header packer (for example
to more efficiently extract only a few fields)
Returns:
A TraceHeader corresponding to the requested trace_samples.
@@ -431,10 +439,19 @@ class SegYReader(object):
"""
if not (0 <= trace_index < self.num_traces()):
raise ValueError("Trace index {} out of range".format(trace_index))
header_packer = self._trace_header_packer if header_packer_override is None else header_packer_override
pos = self._trace_offset_catalog[trace_index]
trace_header = read_trace_header(self._fh, self._trace_header_packer, pos)
trace_header = read_trace_header(self._fh, header_packer, pos)
return trace_header
@property
def trace_header_format_class(self):
"""The trace header format class.
Instances of this class are what is returned from trace_header() unless the
header_packer has been overridden."""
return self._trace_header_packer.header_format_class
@property
def dimensionality(self):
"""The spatial dimensionality of the data.
+6 -2
View File
@@ -6,8 +6,12 @@ from itertools import chain
class SortedFrozenSet(Sequence, Set):
def __init__(self, items=None):
self._items = sorted(set(items)) if items is not None else []
def __new__(cls, items=None):
if type(items) == cls:
return items
obj = object.__new__(cls)
obj._items = sorted(set(items)) if items is not None else []
return obj
def __contains__(self, item):
try:
+2
View File
@@ -347,6 +347,8 @@ def catalog_traces(fh, bps, trace_header_format=TraceHeaderRev1, endian='>', pro
alt_line_catalog_builder = CatalogBuilder()
cdp_catalog_builder = CatalogBuilder()
# TODO: Use a SubHeaderFormat to only load the fields we're interestedin
for trace_number in count():
progress_callback(_READ_PROPORTION * pos_begin / length)
fh.seek(pos_begin)
+89
View File
@@ -3,7 +3,9 @@ import time
import os
import sys
from collections.abc import Set
from itertools import (islice, cycle, tee, chain, repeat)
from segpy.sorted_set import SortedFrozenSet
UNKNOWN_FILENAME = '<unknown>'
@@ -346,6 +348,11 @@ def make_sorted_distinct_sequence(iterable):
An immutable collection which supports the Sized, Iterable,
Container and Sequence protocols.
"""
if isinstance(iterable, range):
if iterable.step > 0:
return iterable
else:
return reversed(iterable)
sorted_set = SortedFrozenSet(iterable)
if len(sorted_set) == 1:
return single_item_range(sorted_set[0])
@@ -386,3 +393,85 @@ def hash_for_file(fh, *args):
sha1.update(encoded_arg)
digest = sha1.hexdigest()
return digest
def is_range_superset_of_range(superset_range, subset_range):
"""Are all the elements of
"""
if subset_range.start not in superset_range:
return False
if subset_range.step % superset_range.step != 0:
return False
if subset_range[-1] > superset_range[-1]:
return False
assert set(subset_range).issubset(set(superset_range))
return True
def is_superset(superset, subset):
"""A more general version of set.issuperset that is smart enough to work with ranges."""
if isinstance(subset, range) and isinstance(superset, range):
return is_range_superset_of_range(superset, subset)
if isinstance(superset, range):
return all(item in superset for item in subset)
if isinstance(superset, set):
return superset.issuperset(subset)
if isinstance(subset, set):
return subset.issubset(superset)
return set(superset).issuperset(subset)
def ensure_superset(superset, subset):
"""Ensure that one collection is a subset of another.
Args:
all_items: A sequence containing all items.
subset: Subset must either be a collection the elements of which are a subset of
all_items, or a slice object, in which case the subset items will be sliced
from all_items.
Returns:
A sorted, distinct collection which is a subset of all_items.
Raises:
ValueError: If the items in subset are not a subset of the items in all_items.
"""
if subset is None:
return superset
elif isinstance(subset, slice):
return superset[subset]
else:
subset = make_sorted_distinct_sequence(subset)
if not is_superset(superset, subset):
raise ValueError("subset_or_slice {!r} is not a subset of all_items {!r}"
.format(subset, superset))
return subset
def true(*args, **kwargs):
return True
def collect_attributes(derived_class, base_class=object, predicate=None):
"""
Args:
derived_class: The class at which to start searching.
base_class: The class at which to stop searching
predicate: A predicate which accepts
Returns:
A generator of items containing the (class, attribute_name)
"""
# TODO: Consider using the inspect module to do this
if predicate is None:
predicate = true
for cls in derived_class.__mro__:
for key, value in vars(cls).items():
if predicate(key, value):
yield cls, key, value
if cls is base_class:
break