diff --git a/segpy-ext/segpy-numpy/examples/extract_trace_header_field.py b/segpy-ext/segpy-numpy/examples/extract_trace_header_field.py new file mode 100644 index 0000000..e8b2985 --- /dev/null +++ b/segpy-ext/segpy-numpy/examples/extract_trace_header_field.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +"""Extract a single field from the trace headers of 3D seismic volume to a 2D Numpy array. + +This utility assumes the inline and crossline numbers are evenly spaced. +Each inline of the source data will be represented as a single row, and +each crossline as a single column in the resulting 2D array. + +Usage: extract_trace_header_field.py [-h] [--null NULL] + segy-file npy-file field-name + +Positional arguments: + segy-file Path to an existing SEG Y file of 3D seismic data + npy-file Path to the Numpy array file to be created for the timeslice + field-name Zero based index of the time slice to be extracted + +Optional arguments: + -h, --help show this help message and exit + --dtype DTYPE Numpy data type. If not provided a dtype compatible with the + SEG Y data will be used. + --null NULL Sample value to use for missing or short traces. + +Example: + + extract_trace_header_field.py stack_final_int8.sgy slice_800.npy ensemble_num +""" + +import argparse +import os +import sys +import traceback + +import numpy as np + +from numpy import s_ + +from segpy.reader import create_reader +from segpy.trace_header import TraceHeaderRev1 +from segpy_numpy.dtypes import make_dtype +from segpy_numpy.extract import extract_inline_3d, extract_trace_header_field_3d + + +class DimensionalityError(Exception): + pass + + + +def extract_header_field(segy_filename, out_filename, field_name, null=None): + """Extract a timeslice from a 3D SEG Y file to a Numpy NPY file. + + Args: + segy_filename: Filename of a SEG Y file. + + out_filename: Filename of the NPY file. + + inline_index: The zero-based index (increasing with depth) of the slice to be extracted. + + null: Optional sample value to use for missing or short traces. Defaults to zero. + """ + header_field = getattr(TraceHeaderRev1, field_name) + + with open(segy_filename, 'rb') as segy_file: + + segy_reader = create_reader(segy_file) + + header_field_arrays = extract_trace_header_field_3d(segy_reader, [header_field], null) + return header_field_arrays + + +def nullable_int(s): + return None if not bool(s) else int(s) + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("segy_file", metavar="segy-file", + help="Path to an existing SEG Y file of 3D seismic data") + + parser.add_argument("npy_file", metavar="npy-file", + help="Path to the Numpy array file to be created for the timeslice") + + parser.add_argument("field_name", metavar="field-name", type=str, + help="Name of the trace header field to be extracted", ) + + parser.add_argument("--null", type=nullable_int, default="", + help="Header value to use for missing or short traces.") + + if argv is None: + argv = sys.argv[1:] + + args = parser.parse_args(argv) + + try: + extract_header_field( + segy_filename=args.segy_file, + out_filename=args.npy_file, + field_name=args.field_name, + null=args.null) + except (FileNotFoundError, IsADirectoryError) as e: + print(e, file=sys.stderr) + return os.EX_NOINPUT + except PermissionError as e: + print(e, file=sys.stderr) + return os.EX_NOPERM + except Exception as e: + traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr) + return os.EX_SOFTWARE + return os.EX_OK + + +if __name__ == '__main__': + sys.exit(main()) + + diff --git a/segpy-ext/segpy-numpy/examples/inline.py b/segpy-ext/segpy-numpy/examples/inline.py new file mode 100644 index 0000000..0fb15f2 --- /dev/null +++ b/segpy-ext/segpy-numpy/examples/inline.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +"""Extract an inline from a 3D seismic volume to a Numpy array. + +This utility assumes the inline and crossline numbers are evenly spaced. +Each inline of the source data will be represented as a single row, and +each crossline as a single column in the resulting 2D array. + +Usage: inline.py [-h] [--dtype DTYPE] [--null NULL] + segy-file npy-file inline-number + +Positional arguments: + segy-file Path to an existing SEG Y file of 3D seismic data + npy-file Path to the Numpy array file to be created for the timeslice + slice-index Zero based index of the time slice to be extracted + +Optional arguments: + -h, --help show this help message and exit + --dtype DTYPE Numpy data type. If not provided a dtype compatible with the + SEG Y data will be used. + --null NULL Sample value to use for missing or short traces. + +Example: + + inline.py stack_final_int8.sgy slice_800.npy 800 --null=42.0 --dtype=f +""" + +import argparse +import os +import sys +import traceback + +import numpy as np + +from numpy import s_ + +from segpy.reader import create_reader +from segpy_numpy.dtypes import make_dtype +from segpy_numpy.extract import extract_inline_3d + + +class DimensionalityError(Exception): + pass + + +def extract_inline(segy_filename, out_filename, inline_number, null=None): + """Extract a timeslice from a 3D SEG Y file to a Numpy NPY file. + + Args: + segy_filename: Filename of a SEG Y file. + + out_filename: Filename of the NPY file. + + inline_index: The zero-based index (increasing with depth) of the slice to be extracted. + + null: Optional sample value to use for missing or short traces. Defaults to zero. + """ + with open(segy_filename, 'rb') as segy_file: + + segy_reader = create_reader(segy_file) + inline_array = extract_inline_3d(segy_reader, inline_number, null=null) + return inline_array + + +def nullable_float(s): + return None if not bool(s) else float(s) + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("segy_file", metavar="segy-file", + help="Path to an existing SEG Y file of 3D seismic data") + + parser.add_argument("npy_file", metavar="npy-file", + help="Path to the Numpy array file to be created for the timeslice") + + parser.add_argument("inline_number", metavar="inline-number", type=int, + help="Zero based index of the inline to be extracted", ) + + parser.add_argument("--null", type=nullable_float, default="", + help="Sample value to use for missing or short traces.") + + if argv is None: + argv = sys.argv[1:] + + args = parser.parse_args(argv) + + try: + extract_inline( + segy_filename=args.segy_file, + out_filename=args.npy_file, + inline_number=args.inline_number, + null=args.null) + except (FileNotFoundError, IsADirectoryError) as e: + print(e, file=sys.stderr) + return os.EX_NOINPUT + except PermissionError as e: + print(e, file=sys.stderr) + return os.EX_NOPERM + except Exception as e: + traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr) + return os.EX_SOFTWARE + return os.EX_OK + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py b/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py index 2583caf..74431f3 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py @@ -9,7 +9,7 @@ NUMPY_DTYPES = {'ibm': numpy.dtype('f4'), 'int8': numpy.dtype('i1')} -def make_dtype(data_sample_format): +def make_dtype(data_sample_format): # TODO: What is the correct name for this arg? """Convert a SEG Y data sample format to a compatible numpy dtype. Note : diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py new file mode 100644 index 0000000..7ccb53a --- /dev/null +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -0,0 +1,402 @@ +"""Tools for interoperability between Segpy and Numpy arrays.""" +from collections import namedtuple +import numpy as np +from segpy.header import Header, SubFormatMeta +from segpy.packer import make_header_packer + +from segpy.util import ensure_superset +from segpy_numpy.dtypes import make_dtype + + +def extract_trace_header_field_3d(reader_3d, fields, inline_numbers=None, xline_numbers=None, null=None): + """Extract a single trace header field from all trace headers as an array. + + Args: + reader_3d: A SegYReader3D + + fields: A an iterable series where each item is either the name of a field as a string + or an object such as a NamedField with a 'name' attribute which in turn is the name + of a field as a string, such as a NamedField. + + inline_numbers: The inline numbers for which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the each crossline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + inline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate + traces from inline number 100 to inline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + inline numbers. For example inline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + xline_numbers: The crossline numbers at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces at within each inline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + crossline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate + traces from crossline number 100 to crossline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + crossline numbers. For example xline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + null: An optional null value for missing traces. The null value must be convertible + to all field value types. + + Returns: + An namedtuple object with attributes which are two-dimensional Numpy arrays. + If a null value was specified the arrays will be ndarrays, otherwise they + will be masked arrays. The attributes of the named tuple are in the same + order as the fields specified in the `fields` argument. + + Raises: + AttributeError: If the the named fields do not exist in the trace header definition. + """ + field_names = [_extract_field_name(field) for field in fields] + + inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers) + xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers) + shape = (len(inline_numbers), len(xline_numbers)) + + class SubHeader(metaclass=SubFormatMeta, + parent_format=reader_3d.trace_header_format_class, + parent_field_names=field_names): + pass + + sub_header_packer = make_header_packer(SubHeader, reader_3d.endian) + TraceHeaderArrays = namedtuple('TraceHeaderArrays', field_names) + + arrays = (_make_array(shape, + make_dtype(getattr(SubHeader, field_name).value_type.SEG_Y_TYPE), + null) + for field_name in field_names) + + trace_header_arrays = TraceHeaderArrays(*arrays) + + for inline_index, inline_number in enumerate(inline_numbers): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index((inline_number, xline_number)) + trace_header = reader_3d.trace_header(trace_index, sub_header_packer) + + for field_name, a in zip(field_names, trace_header_arrays): + field_value = getattr(trace_header, field_name) + a[inline_index, xline_index] = field_value + + return trace_header_arrays + + +def extract_trace(reader, trace_index, sample_numbers): + """Extract an single trace as a one-dimensional array. + + Args: + reader: A SegYReader3D object. + + trace_index: The index of the trace to be extracted. + + sample_numbers: The sample numbers within each trace at which samples are to be extracted. + This argument can be specified in three ways: + + None (the default) - All samples within the trace will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those samples at + sample numbers corresponding to the items in the sequence will be extracted. The + samples will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate + samples from sample number 100 to sample number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + sample numbers. For example sample_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred samples, irrespective of their numbers. + + Returns: + A one-dimensional array. + """ + if not reader.has_trace_index(trace_index): + raise ValueError("Inline number {} not present in {}".format(trace_index, reader)) + + sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers) + + trace_sample_start = sample_numbers[0] + trace_sample_stop = min(sample_numbers[-1] + 1, reader.num_trace_samples(trace_index)) + trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + arr = np.fromiter((trace_samples[sample_number - trace_sample_start] for sample_number in sample_numbers), + make_dtype(reader.data_sample_format)) + return arr + + +def extract_inline_3d(reader_3d, inline_number, xline_numbers=None, sample_numbers=None, null=None): + """Extract an inline as a two-dimensional array. + + Args: + reader: A SegYReader3D object. + + inline_number: The number of the inline to be extracted. + + xline_numbers: The crossline numbers within the inline at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the inline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + crossline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate + traces from crossline number 100 to crossline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + crossline numbers. For example xline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + sample_numbers: The sample numbers within each trace at which samples are to be extracted. + This argument can be specified in three ways: + + None (the default) - All samples within the trace will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those samples at + sample numbers corresponding to the items in the sequence will be extracted. The + samples will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate + samples from sample number 100 to sample number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + sample numbers. For example sample_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred samples, irrespective of their numbers. + + null: A null value. When None is specified as the null value a masked array will be returned. + + Returns: + A two-dimensional array. If null is None a masked array will be returned, otherwise + a regular array will be returned. The first (slowest changing) index will correspond + to the traces (index zero will correspond to the first crossline number). The + second (fastest changing) index will correspond to the samples (index zero will + correspond to the first sample number). + """ + if inline_number not in reader_3d.inline_numbers(): + raise ValueError("Inline number {} not present in {}".format(inline_number, reader_3d)) + + xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers) + sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers) + shape = (len(xline_numbers), len(sample_numbers)) + dtype = make_dtype(reader_3d.data_sample_format) + array = _make_array(shape, dtype, null) + + if isinstance(sample_numbers, range): + _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array) + else: + _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array) + + return array + + +def _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) + trace_sample_start = sample_numbers[0] + trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples) + trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + for sample_index, sample_number in enumerate(sample_numbers): + array[xline_index, sample_index] = trace_samples[sample_number - trace_sample_start] + + +def _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) + trace_sample_stop = min(sample_numbers.stop, num_trace_samples) + trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) + source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step) + array[xline_index, :] = trace_samples[source_slice] + + +def extract_xline_3d(reader_3d, xline_number, inline_numbers=None, sample_numbers=None, null=None): + """Extract an inline as a two-dimensional array. + + Args: + reader: A SegYReader3D object. + + xline_number: The number of the xline to be extracted. + + inline_numbers: The inline numbers within the crossline at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the crossline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + inline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate + traces from inline number 100 to inline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + inline numbers. For example inline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + sample_numbers: The sample numbers within each trace at which samples are to be extracted. + This argument can be specified in three ways: + + None (the default) - All samples within the trace will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those samples at + sample numbers corresponding to the items in the sequence will be extracted. The + samples will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate + samples from sample number 100 to sample number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + sample numbers. For example sample_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred samples, irrespective of their numbers. + + null: A null value. When None is specified as the null value a masked array will be returned. + + Returns: + A two-dimensional array. If null is None a masked array will be returned, otherwise + a regular array will be returned. The first (slowest changing) index will correspond + to the traces (index zero will correspond to the first inline number). The + second (fastest changing) index will correspond to the samples (index zero will + correspond to the first sample number). + """ + if xline_number not in reader_3d.xline_numbers(): + raise ValueError("Crossline number {} not present in {}".format(xline_number, reader_3d)) + + inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers) + sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers) + shape = (len(inline_numbers), len(sample_numbers)) + dtype = make_dtype(reader_3d.data_sample_format) + array = _make_array(shape, dtype, null) + + if isinstance(sample_numbers, range): + _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array) + else: + _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array) + + return array + + +def _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array): + for inline_index, inline_number in enumerate(inline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) + trace_sample_start = sample_numbers[0] + trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples) + trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + for sample_index, sample_number in enumerate(sample_numbers): + array[inline_index, sample_index] = trace_samples[sample_number - trace_sample_start] + + +def _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array): + for inline_index, inline_number in enumerate(inline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) + trace_sample_stop = min(sample_numbers.stop, num_trace_samples) + trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) + source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step) + array[inline_index, :] = trace_samples[source_slice] + + +def extract_timeslice_3d(reader_3d, sample_number, inline_numbers=None, xline_numbers=None, null=None): + """Extract a single timeslice header field from all trace headers as an array. + + Args: + reader_3d: A SegYReader3D + + sample_number: The zero-based sample index. + + inline_numbers: The inline numbers for which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the each crossline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + inline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate + traces from inline number 100 to inline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + inline numbers. For example inline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + xline_numbers: The crossline numbers at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces at within each inline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + crossline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate + traces from crossline number 100 to crossline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + crossline numbers. For example xline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + null: An optional null value for missing traces. The null value must be convertible + to all field value types. + + Returns: + An namedtuple object with attributes which are two-dimensional Numpy arrays. + If a null value was specified the arrays will be ndarrays, otherwise they + will be masked arrays. The attributes of the named tuple are in the same + order as the fields specified in the `fields` argument. + + Raises: + AttributeError: If the the named fields do not exist in the trace header definition. + """ + + inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers) + xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers) + shape = (len(inline_numbers), len(xline_numbers)) + dtype = make_dtype(reader_3d.data_sample_format) + array = _make_array(shape, dtype, null) + sample_number_stop = sample_number + 1 + + for inline_index, inline_number in enumerate(inline_numbers): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index((inline_number, xline_number)) + trace_samples = reader_3d.trace_samples(trace_index, sample_number, sample_number_stop) + array[inline_index, xline_index] = trace_samples[0] + return array + +def _make_array(shape, dtype, null=None): + """Make an array""" + if null is None: + return np.ma.masked_all(shape, dtype) + array = np.empty(shape, dtype) + array.fill(null) + return array + +def _extract_field_name(field): + """Args: + field: If field in an object with a name attribute the name is returned. If field is a string it is returned + unmodified. + Raises: + TypeError: + """ + if isinstance(field, str): + return field + try: + return field.name + except AttributeError: + raise TypeError("{!r} neither is a string nor has a 'name' attribute".format(field)) \ No newline at end of file diff --git a/segpy-ext/segpy-numpy/test/__init__.py b/segpy-ext/segpy-numpy/test/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/segpy-ext/segpy-numpy/test/__init__.py @@ -0,0 +1 @@ + diff --git a/segpy-ext/segpy-numpy/test/test_extract.py b/segpy-ext/segpy-numpy/test/test_extract.py new file mode 100644 index 0000000..58e5dfe --- /dev/null +++ b/segpy-ext/segpy-numpy/test/test_extract.py @@ -0,0 +1,10 @@ +import unittest + + +class MyTestCase(unittest.TestCase): + def test_something(self): + self.assertEqual(True, False) + + +if __name__ == '__main__': + unittest.main() diff --git a/segpy/catalog.py b/segpy/catalog.py index ab0eb08..56459d7 100644 --- a/segpy/catalog.py +++ b/segpy/catalog.py @@ -511,7 +511,8 @@ class LinearRegularCatalog(Mapping): key_max: The maximum key. key_stride: The difference between successive keys. value_start: The value corresponding to the minimum key. - value_max: The value corresponding to the maximum key. + value_stop: The value corresponding to the maximum key. + value_stride: Raises: ValueError: There is any inconsistency in the keys, strides, diff --git a/segpy/header.py b/segpy/header.py index 2083cdc..82a9f01 100644 --- a/segpy/header.py +++ b/segpy/header.py @@ -4,7 +4,7 @@ from itertools import chain from segpy import __version__ from segpy.docstring import docstring_property -from segpy.util import underscores_to_camelcase, first_sentence, super_class +from segpy.util import underscores_to_camelcase, first_sentence, super_class, collect_attributes class Header: @@ -96,7 +96,7 @@ class FormatMeta(type): """ @classmethod - def __prepare__(mcs, name, bases): + def __prepare__(mcs, name, bases, *args, **kwargs): return OrderedDict() def __new__(mcs, name, bases, namespace): @@ -134,6 +134,63 @@ class FormatMeta(type): return super().__new__(mcs, name, bases, namespace) +def is_public_non_field_attr(name, attr): + return (not name.startswith('_')) and (not isinstance(attr, HeaderFieldDescriptor) and (not isinstance(attr, classmethod))) + + +class SubFormatMeta(FormatMeta): + """A metaclass for a format class which has a subset of the fields in an existing format class. + + SubFormat classes can be used to reduce storage requirements and increase performance, since they can be + used to generate simpler HeaderPackers. + + SubFormat classes must be declared as: + + class MySubFormat(metaclass=SubFormatMeta, + format_class=MyFormatClass, + field_names=['first_field_name', + 'second_field_name']) + pass + """ + + def __new__(mcs, name, bases, namespace, parent_format, parent_field_names): + """ + Args: + name: The name of the actual class being created by this metaclass. + bases: The base classes of the actual class. + parent_format: An existing Format (?Header) on which (sort out terminology here) + of which this format has a subset of fields. + field_names: An iterable series of field names which this format should + duplicate from the parent_format. + """ + + # TODO: We need an inheritance aware getattr for class attributes! + # Copy the requested fields, by creating a new descriptor based + # on information retrieved from the existing descriptor + + for field_name in parent_field_names: + named_field = getattr(parent_format, field_name) + assert named_field.name == field_name + field_copy = field(named_field.value_type, + named_field.offset, + named_field.default, + named_field.documentation) + namespace[field_name] = field_copy + + # Copy other non-field class attributes + non_field_attributes = list(collect_attributes(parent_format, Header, is_public_non_field_attr)) + namespace.update((name, value) for _, name, value in non_field_attributes) + + # Add a reference back to the original format + namespace['_parent_format'] = parent_format + + return super().__new__(mcs, name, bases, namespace) + + def __init__(mcs, name, bases, namespace, parent_format, parent_field_names): + # Absorb the additional arguments + super().__init__(name, bases, namespace) + + class NamedField: """Instances of NamedField can be detected by the NamedDescriptorResolver metaclass.""" diff --git a/segpy/reader.py b/segpy/reader.py index 9eee0d8..62db921 100644 --- a/segpy/reader.py +++ b/segpy/reader.py @@ -292,6 +292,7 @@ class SegYReader(object): self._revision = extract_revision(self._binary_reel_header) self._bytes_per_sample = bytes_per_sample( self._binary_reel_header, self.revision) + self._max_num_trace_samples = None def __getstate__(self): """Copy the reader's state to a pickleable dictionary. @@ -311,6 +312,8 @@ class SegYReader(object): file_pos = self._fh.tell() file_mode = self._fh.mode + _ = self.max_num_trace_samples() + state = self.__dict__.copy() state['__version__'] = __version__ state['_file_name'] = filename @@ -363,7 +366,9 @@ class SegYReader(object): def max_num_trace_samples(self): """The number of samples in the trace_samples with the most samples.""" - return max(self._trace_length_catalog.values()) + if self._max_num_trace_samples is None: + self._max_num_trace_samples = max(self._trace_length_catalog.values()) + return self._max_num_trace_samples def num_trace_samples(self, trace_index): """The number of samples in the specified trace_samples.""" @@ -416,12 +421,15 @@ class SegYReader(object): self._fh, start_pos, seg_y_type, num_samples_to_read, self._endian) return trace_values - def trace_header(self, trace_index): + def trace_header(self, trace_index, header_packer_override=None): """Read a specific trace_samples. Args: trace_index: An integer in the range zero to num_traces() - 1 + header_packer_override: Override the default header packer (for example + to more efficiently extract only a few fields) + Returns: A TraceHeader corresponding to the requested trace_samples. @@ -431,10 +439,19 @@ class SegYReader(object): """ if not (0 <= trace_index < self.num_traces()): raise ValueError("Trace index {} out of range".format(trace_index)) + header_packer = self._trace_header_packer if header_packer_override is None else header_packer_override pos = self._trace_offset_catalog[trace_index] - trace_header = read_trace_header(self._fh, self._trace_header_packer, pos) + trace_header = read_trace_header(self._fh, header_packer, pos) return trace_header + @property + def trace_header_format_class(self): + """The trace header format class. + + Instances of this class are what is returned from trace_header() unless the + header_packer has been overridden.""" + return self._trace_header_packer.header_format_class + @property def dimensionality(self): """The spatial dimensionality of the data. diff --git a/segpy/sorted_set.py b/segpy/sorted_set.py index 507ae93..08e709f 100644 --- a/segpy/sorted_set.py +++ b/segpy/sorted_set.py @@ -6,8 +6,12 @@ from itertools import chain class SortedFrozenSet(Sequence, Set): - def __init__(self, items=None): - self._items = sorted(set(items)) if items is not None else [] + def __new__(cls, items=None): + if type(items) == cls: + return items + obj = object.__new__(cls) + obj._items = sorted(set(items)) if items is not None else [] + return obj def __contains__(self, item): try: diff --git a/segpy/toolkit.py b/segpy/toolkit.py index a741afb..d7ba87f 100644 --- a/segpy/toolkit.py +++ b/segpy/toolkit.py @@ -347,6 +347,8 @@ def catalog_traces(fh, bps, trace_header_format=TraceHeaderRev1, endian='>', pro alt_line_catalog_builder = CatalogBuilder() cdp_catalog_builder = CatalogBuilder() + # TODO: Use a SubHeaderFormat to only load the fields we're interestedin + for trace_number in count(): progress_callback(_READ_PROPORTION * pos_begin / length) fh.seek(pos_begin) diff --git a/segpy/util.py b/segpy/util.py index 01dbac2..3534ee9 100644 --- a/segpy/util.py +++ b/segpy/util.py @@ -3,7 +3,9 @@ import time import os import sys +from collections.abc import Set from itertools import (islice, cycle, tee, chain, repeat) + from segpy.sorted_set import SortedFrozenSet UNKNOWN_FILENAME = '' @@ -346,6 +348,11 @@ def make_sorted_distinct_sequence(iterable): An immutable collection which supports the Sized, Iterable, Container and Sequence protocols. """ + if isinstance(iterable, range): + if iterable.step > 0: + return iterable + else: + return reversed(iterable) sorted_set = SortedFrozenSet(iterable) if len(sorted_set) == 1: return single_item_range(sorted_set[0]) @@ -386,3 +393,85 @@ def hash_for_file(fh, *args): sha1.update(encoded_arg) digest = sha1.hexdigest() return digest + + +def is_range_superset_of_range(superset_range, subset_range): + """Are all the elements of + + """ + if subset_range.start not in superset_range: + return False + if subset_range.step % superset_range.step != 0: + return False + if subset_range[-1] > superset_range[-1]: + return False + assert set(subset_range).issubset(set(superset_range)) + return True + + +def is_superset(superset, subset): + """A more general version of set.issuperset that is smart enough to work with ranges.""" + if isinstance(subset, range) and isinstance(superset, range): + return is_range_superset_of_range(superset, subset) + if isinstance(superset, range): + return all(item in superset for item in subset) + if isinstance(superset, set): + return superset.issuperset(subset) + if isinstance(subset, set): + return subset.issubset(superset) + return set(superset).issuperset(subset) + + +def ensure_superset(superset, subset): + """Ensure that one collection is a subset of another. + + Args: + all_items: A sequence containing all items. + + subset: Subset must either be a collection the elements of which are a subset of + all_items, or a slice object, in which case the subset items will be sliced + from all_items. + Returns: + A sorted, distinct collection which is a subset of all_items. + + Raises: + ValueError: If the items in subset are not a subset of the items in all_items. + """ + if subset is None: + return superset + elif isinstance(subset, slice): + return superset[subset] + else: + subset = make_sorted_distinct_sequence(subset) + if not is_superset(superset, subset): + raise ValueError("subset_or_slice {!r} is not a subset of all_items {!r}" + .format(subset, superset)) + return subset + +def true(*args, **kwargs): + return True + + +def collect_attributes(derived_class, base_class=object, predicate=None): + """ + + Args: + derived_class: The class at which to start searching. + base_class: The class at which to stop searching + predicate: A predicate which accepts + + Returns: + A generator of items containing the (class, attribute_name) + """ + # TODO: Consider using the inspect module to do this + if predicate is None: + predicate = true + + for cls in derived_class.__mro__: + for key, value in vars(cls).items(): + if predicate(key, value): + yield cls, key, value + if cls is base_class: + break + +