From 6d986a21a99d8052409a7711670b8358919a7990 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sat, 6 Jun 2015 20:08:39 +0200 Subject: [PATCH 01/15] Adds code for extraction of an inline. --- examples/inline.py | 108 +++++++++++++++++++ segpy-ext/segpy-numpy/segpy_numpy/extract.py | 97 +++++++++++++++++ segpy/catalog.py | 3 +- segpy/reader.py | 5 +- segpy/util.py | 5 + 5 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 examples/inline.py create mode 100644 segpy-ext/segpy-numpy/segpy_numpy/extract.py diff --git a/examples/inline.py b/examples/inline.py new file mode 100644 index 0000000..ac02931 --- /dev/null +++ b/examples/inline.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +"""Extract an inline from a 3D seismic volume to a Numpy array. + +This utility assumes the inline and crossline numbers are evenly spaced. +Each inline of the source data will be represented as a single row, and +each crossline as a single column in the resulting 2D array. + +Usage: inline.py [-h] [--dtype DTYPE] [--null NULL] + segy-file npy-file inline-number + +Positional arguments: + segy-file Path to an existing SEG Y file of 3D seismic data + npy-file Path to the Numpy array file to be created for the timeslice + slice-index Zero based index of the time slice to be extracted + +Optional arguments: + -h, --help show this help message and exit + --dtype DTYPE Numpy data type. If not provided a dtype compatible with the + SEG Y data will be used. + --null NULL Sample value to use for missing or short traces. + +Example: + + timeslice.py stack_final_int8.sgy slice_800.npy 800 --null=42.0 --dtype=f +""" + +import argparse +import os +import sys +import traceback + +import numpy as np + +from numpy import s_ + +from segpy.reader import create_reader +from segpy_numpy.dtypes import make_dtype +from segpy_numpy.extract import extract_inline_3d + + +class DimensionalityError(Exception): + pass + + +def extract_inline(segy_filename, out_filename, inline_number, null=None): + """Extract a timeslice from a 3D SEG Y file to a Numpy NPY file. + + Args: + segy_filename: Filename of a SEG Y file. + + out_filename: Filename of the NPY file. + + inline_index: The zero-based index (increasing with depth) of the slice to be extracted. + + null: Optional sample value to use for missing or short traces. Defaults to zero. + """ + with open(segy_filename, 'rb') as segy_file: + + segy_reader = create_reader(segy_file) + inline_array = extract_inline_3d(segy_reader, inline_number, sample_numbers=s_[::2], null=null) + return inline_number + + +def nullable_float(s): + return None if not bool(s) else float(s) + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("segy_file", metavar="segy-file", + help="Path to an existing SEG Y file of 3D seismic data") + + parser.add_argument("npy_file", metavar="npy-file", + help="Path to the Numpy array file to be created for the timeslice") + + parser.add_argument("inline_number", metavar="inline-number", type=int, + help="Zero based index of the inline to be extracted", ) + + parser.add_argument("--null", type=nullable_float, default="", + help="Sample value to use for missing or short traces.") + + if argv is None: + argv = sys.argv[1:] + + args = parser.parse_args(argv) + + try: + extract_inline( + segy_filename=args.segy_file, + out_filename=args.npy_file, + inline_number=args.inline_number, + null=args.null) + except (FileNotFoundError, IsADirectoryError) as e: + print(e, file=sys.stderr) + return os.EX_NOINPUT + except PermissionError as e: + print(e, file=sys.stderr) + return os.EX_NOPERM + except Exception as e: + traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr) + return os.EX_SOFTWARE + return os.EX_OK + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py new file mode 100644 index 0000000..052f6d8 --- /dev/null +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -0,0 +1,97 @@ +"""Tools for interoperability between Segpy and Numpy arrays.""" +from functools import singledispatch +import numpy as np +from segpy.util import make_sorted_distinct_sequence + +from segpy_numpy.dtypes import make_dtype + + +class DimensionalityError: + pass + + +def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers=None, null=None, packed=False): + """Extract an inline as a two-dimensional array. + + Args: + reader: + + inline_number: The number of the inline to be extracted. + + xline_numbers: An optional sorted sequence of crossline numbers at which to + extract samples. If not provided, samples will be extracted at all + crosslines. + + sample_numbers: An optional sorted sequence of samples numbers at which to + extract samples. If not provided, samples will be extracted at all + depths. + + null: A null value + + Returns: + A two-dimensional array. If null is None a masked array will be returned, otherwise + a simple array will be returned.. The first (slowest changing) index will correspond + to the traces (index zero will correspond to the first crossline number). The + second (fastest changing) index will correspond to the samples (index zero will + correspond to the first sample number. + """ + if inline_number not in reader.inline_numbers(): + raise ValueError("Inline number {} not present in {}".format(inline_number, reader)) + + if xline_numbers is None: + xline_numbers = reader.xline_numbers() + elif isinstance(xline_numbers, slice): + xline_numbers = reader.xline_numbers()[xline_numbers] + else: + xline_numbers = make_sorted_distinct_sequence(xline_numbers) + + if sample_numbers is None: + sample_numbers = range(0, reader.max_num_trace_samples()) + elif isinstance(sample_numbers, slice): + sample_numbers = range(0, reader.max_num_trace_samples())[sample_numbers] + else: + sample_numbers = make_sorted_distinct_sequence(sample_numbers) + + shape = (len(xline_numbers), len(sample_numbers)) + dtype = make_dtype(reader.data_sample_format) + + if null is None: + array = np.ma.masked_all(shape, dtype) + else: + array = np.empty(shape, dtype) + array.fill(null) + + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader.has_trace_index(inline_xline_number): + # Read the trace + trace_index = reader.trace_index(inline_xline_number) + + sample_start = sample_numbers[0] + try: + sample_stop = sample_numbers.stop + except AttributeError: + sample_stop = sample_numbers[-1] + 1 + + num_trace_samples = reader.num_trace_samples(trace_index) + sample_stop = max(sample_stop, num_trace_samples) + + trace_samples = reader.trace_samples(trace_index, sample_start, sample_stop) + + try: + sample_step = sample_numbers.step + except AttributeError: + sample_step = None + + if sample_step is not None: + # Assign to a slice of the target array + source_slice = slice( + sample_numbers.start - sample_start, + sample_numbers.stop - sample_start, + sample_step) + array[xline_index, :] = trace_samples[source_slice] + else: + # Assign element by element + for sample_index, sample_number in enumerate(sample_numbers): + array[xline_number, sample_index] = trace_samples[sample_number - sample_start] + return array \ No newline at end of file diff --git a/segpy/catalog.py b/segpy/catalog.py index ab0eb08..56459d7 100644 --- a/segpy/catalog.py +++ b/segpy/catalog.py @@ -511,7 +511,8 @@ class LinearRegularCatalog(Mapping): key_max: The maximum key. key_stride: The difference between successive keys. value_start: The value corresponding to the minimum key. - value_max: The value corresponding to the maximum key. + value_stop: The value corresponding to the maximum key. + value_stride: Raises: ValueError: There is any inconsistency in the keys, strides, diff --git a/segpy/reader.py b/segpy/reader.py index 9eee0d8..ce154d9 100644 --- a/segpy/reader.py +++ b/segpy/reader.py @@ -292,6 +292,7 @@ class SegYReader(object): self._revision = extract_revision(self._binary_reel_header) self._bytes_per_sample = bytes_per_sample( self._binary_reel_header, self.revision) + self._max_num_trace_samples = None def __getstate__(self): """Copy the reader's state to a pickleable dictionary. @@ -363,7 +364,9 @@ class SegYReader(object): def max_num_trace_samples(self): """The number of samples in the trace_samples with the most samples.""" - return max(self._trace_length_catalog.values()) + if self._max_num_trace_samples is None: + self._max_num_trace_samples = max(self._trace_length_catalog.values()) + return self._max_num_trace_samples def num_trace_samples(self, trace_index): """The number of samples in the specified trace_samples.""" diff --git a/segpy/util.py b/segpy/util.py index 01dbac2..f30975a 100644 --- a/segpy/util.py +++ b/segpy/util.py @@ -346,6 +346,11 @@ def make_sorted_distinct_sequence(iterable): An immutable collection which supports the Sized, Iterable, Container and Sequence protocols. """ + if isinstance(iterable, range): + if iterable.step > 0: + return iterable + else: + return reversed(iterable) sorted_set = SortedFrozenSet(iterable) if len(sorted_set) == 1: return single_item_range(sorted_set[0]) From ee9f2cbb528a0ee4e4a2ccb9e51bdbc11a8ecf76 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Mon, 8 Jun 2015 21:37:03 +0200 Subject: [PATCH 02/15] Adds an extract_inline_3d() function to segpy_numpy --- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 90 ++++++++++---------- segpy/sorted_set.py | 7 +- segpy/util.py | 50 +++++++++++ 3 files changed, 99 insertions(+), 48 deletions(-) diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index 052f6d8..e0504de 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -1,8 +1,7 @@ """Tools for interoperability between Segpy and Numpy arrays.""" -from functools import singledispatch import numpy as np -from segpy.util import make_sorted_distinct_sequence +from segpy.util import ensure_superset from segpy_numpy.dtypes import make_dtype @@ -18,9 +17,10 @@ def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers= inline_number: The number of the inline to be extracted. - xline_numbers: An optional sorted sequence of crossline numbers at which to - extract samples. If not provided, samples will be extracted at all - crosslines. + xline_numbers: Either a sequence of crossline numbers from which traces are + to be extracted or a slice object indicating that some slice of all + crossline numbers is to be used. If None, traces will be extracted at + all crosslines. sample_numbers: An optional sorted sequence of samples numbers at which to extract samples. If not provided, samples will be extracted at all @@ -38,60 +38,58 @@ def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers= if inline_number not in reader.inline_numbers(): raise ValueError("Inline number {} not present in {}".format(inline_number, reader)) - if xline_numbers is None: - xline_numbers = reader.xline_numbers() - elif isinstance(xline_numbers, slice): - xline_numbers = reader.xline_numbers()[xline_numbers] - else: - xline_numbers = make_sorted_distinct_sequence(xline_numbers) - - if sample_numbers is None: - sample_numbers = range(0, reader.max_num_trace_samples()) - elif isinstance(sample_numbers, slice): - sample_numbers = range(0, reader.max_num_trace_samples())[sample_numbers] - else: - sample_numbers = make_sorted_distinct_sequence(sample_numbers) - + xline_numbers = ensure_superset(reader.xline_numbers(), xline_numbers) + sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers) shape = (len(xline_numbers), len(sample_numbers)) dtype = make_dtype(reader.data_sample_format) + sample_start, sample_stop = start_and_stop(sample_numbers) + array = make_array(shape, dtype, null) - if null is None: - array = np.ma.masked_all(shape, dtype) - else: - array = np.empty(shape, dtype) - array.fill(null) + src_start = sample_numbers.start - sample_start + + try: + src_step = sample_numbers.step + except AttributeError: + src_step = None for xline_index, xline_number in enumerate(xline_numbers): inline_xline_number = (inline_number, xline_number) if reader.has_trace_index(inline_xline_number): - # Read the trace trace_index = reader.trace_index(inline_xline_number) - - sample_start = sample_numbers[0] - try: - sample_stop = sample_numbers.stop - except AttributeError: - sample_stop = sample_numbers[-1] + 1 - num_trace_samples = reader.num_trace_samples(trace_index) - sample_stop = max(sample_stop, num_trace_samples) + trace_sample_stop = max(sample_stop, num_trace_samples) + trace_samples = reader.trace_samples(trace_index, sample_start, trace_sample_stop) + src_stop = trace_sample_stop - sample_start - trace_samples = reader.trace_samples(trace_index, sample_start, sample_stop) - - try: - sample_step = sample_numbers.step - except AttributeError: - sample_step = None - - if sample_step is not None: + if src_step is not None: # Assign to a slice of the target array source_slice = slice( - sample_numbers.start - sample_start, - sample_numbers.stop - sample_start, - sample_step) + src_start, + src_stop, + src_step) array[xline_index, :] = trace_samples[source_slice] else: # Assign element by element for sample_index, sample_number in enumerate(sample_numbers): - array[xline_number, sample_index] = trace_samples[sample_number - sample_start] - return array \ No newline at end of file + array[xline_index, sample_index] = trace_samples[sample_number - sample_start] + return array + + +def make_array(shape, dtype, null=None): + """Make an array""" + if null is None: + return np.ma.masked_all(shape, dtype) + + array = np.empty(shape, dtype) + array.fill(null) + return array + + +def start_and_stop(sequence): + """Obtain start and stop values from a sequence.""" + sample_start = sequence[0] + try: + sample_stop = sequence.stop + except AttributeError: + sample_stop = sequence[-1] + 1 + return sample_start, sample_stop diff --git a/segpy/sorted_set.py b/segpy/sorted_set.py index 507ae93..2b406f0 100644 --- a/segpy/sorted_set.py +++ b/segpy/sorted_set.py @@ -6,8 +6,11 @@ from itertools import chain class SortedFrozenSet(Sequence, Set): - def __init__(self, items=None): - self._items = sorted(set(items)) if items is not None else [] + def __new__(cls, items=None): + if type(items) == cls: + return items + obj = object.__new__(cls) + obj._items = sorted(set(items)) if items is not None else [] def __contains__(self, item): try: diff --git a/segpy/util.py b/segpy/util.py index f30975a..b8ca65d 100644 --- a/segpy/util.py +++ b/segpy/util.py @@ -391,3 +391,53 @@ def hash_for_file(fh, *args): sha1.update(encoded_arg) digest = sha1.hexdigest() return digest + + +def is_range_superset_of_range(superset_range, subset_range): + """Are all the elements of + + """ + if subset_range.start not in superset_range: + return False + if subset_range.step % superset_range.step != 0: + return False + if subset_range[-1] > superset_range[-1]: + return False + assert set(subset_range).issubset(set(superset_range)) + return True + + +def is_subset(superset, subset): + """A more general version of set.issubset that is smart enough to work with ranges.""" + if isinstance(subset, range) and isinstance(superset, range): + return is_range_superset_of_range(superset, subset) + if isinstance(superset, range): + return all(item in superset for item in subset) + return set(superset).issuperset(subset) + + +def ensure_superset(superset, subset): + """Obtains a subset of all items. + + Args: + all_items: A sequence containing all items. + + subset: Subset must either be a collection the elements of which are a subset of + all_items, or a slice object, in which case the subset items will be sliced + from all_items. + Returns: + A sorted, distinct collection which is a subset of all_items. + + Raises: + ValueError: If the items in subset are not a subset of the items in all_items. + """ + if subset is None: + return superset + elif isinstance(subset, slice): + return superset[subset] + else: + subset = make_sorted_distinct_sequence(subset) + if not is_subset(superset, subset): + raise ValueError("subset_or_slice {!r} is not a subset of all_items {!r}" + .format(subset, superset)) + return subset \ No newline at end of file From ebb62fbc7b7004bc232489fcd7d4b3176939ca33 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Mon, 8 Jun 2015 23:36:42 +0200 Subject: [PATCH 03/15] Fixes SortedSet.__new__ failure to return object --- segpy/sorted_set.py | 1 + 1 file changed, 1 insertion(+) diff --git a/segpy/sorted_set.py b/segpy/sorted_set.py index 2b406f0..08e709f 100644 --- a/segpy/sorted_set.py +++ b/segpy/sorted_set.py @@ -11,6 +11,7 @@ class SortedFrozenSet(Sequence, Set): return items obj = object.__new__(cls) obj._items = sorted(set(items)) if items is not None else [] + return obj def __contains__(self, item): try: From 7f50044b430d34f077020db0c7ef8f999fdd1ed2 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Mon, 8 Jun 2015 23:38:24 +0200 Subject: [PATCH 04/15] Makes SegYReader compute the maximum number of samples per trace before serializing (to the cache). --- segpy/reader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/segpy/reader.py b/segpy/reader.py index ce154d9..ef7ccec 100644 --- a/segpy/reader.py +++ b/segpy/reader.py @@ -312,6 +312,8 @@ class SegYReader(object): file_pos = self._fh.tell() file_mode = self._fh.mode + _ = self.max_num_trace_samples() + state = self.__dict__.copy() state['__version__'] = __version__ state['_file_name'] = filename From e5d409eb93ea17c95457b04559678dda855d123a Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Mon, 8 Jun 2015 23:39:09 +0200 Subject: [PATCH 05/15] Refactors extract_inline_3d --- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 46 ++++++++++---------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index e0504de..137acdb 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -9,7 +9,7 @@ class DimensionalityError: pass -def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers=None, null=None, packed=False): +def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers=None, null=None): """Extract an inline as a two-dimensional array. Args: @@ -42,37 +42,39 @@ def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers= sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers) shape = (len(xline_numbers), len(sample_numbers)) dtype = make_dtype(reader.data_sample_format) - sample_start, sample_stop = start_and_stop(sample_numbers) array = make_array(shape, dtype, null) - src_start = sample_numbers.start - sample_start + if isinstance(sample_numbers, range): + _populate_inline_array_over_sample_range(reader, inline_number, xline_numbers, sample_numbers, array) + else: + _populate_inline_array_numbered_samples(reader, inline_number, xline_numbers, sample_numbers, array) - try: - src_step = sample_numbers.step - except AttributeError: - src_step = None + return array + +def _populate_inline_array_numbered_samples(reader, inline_number, xline_numbers, sample_numbers, array): for xline_index, xline_number in enumerate(xline_numbers): inline_xline_number = (inline_number, xline_number) if reader.has_trace_index(inline_xline_number): trace_index = reader.trace_index(inline_xline_number) num_trace_samples = reader.num_trace_samples(trace_index) - trace_sample_stop = max(sample_stop, num_trace_samples) - trace_samples = reader.trace_samples(trace_index, sample_start, trace_sample_stop) - src_stop = trace_sample_stop - sample_start + trace_sample_start = sample_numbers[0] + trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples) + trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + for sample_index, sample_number in enumerate(sample_numbers): + array[xline_index, sample_index] = trace_samples[sample_number - trace_sample_start] - if src_step is not None: - # Assign to a slice of the target array - source_slice = slice( - src_start, - src_stop, - src_step) - array[xline_index, :] = trace_samples[source_slice] - else: - # Assign element by element - for sample_index, sample_number in enumerate(sample_numbers): - array[xline_index, sample_index] = trace_samples[sample_number - sample_start] - return array + +def _populate_inline_array_over_sample_range(reader, inline_number, xline_numbers, sample_numbers, array): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader.has_trace_index(inline_xline_number): + trace_index = reader.trace_index(inline_xline_number) + num_trace_samples = reader.num_trace_samples(trace_index) + trace_sample_stop = min(sample_numbers.stop, num_trace_samples) + trace_samples = reader.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) + source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step) + array[xline_index, :] = trace_samples[source_slice] def make_array(shape, dtype, null=None): From c4703af67a022dabb20a966a93aaac2ff4fb49de Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Thu, 11 Jun 2015 12:32:00 +0200 Subject: [PATCH 06/15] Work in progress on extraction to numpy data structures. --- examples/extract_trace_header_field.py | 113 ++++++++++++++++ examples/inline.py | 6 +- segpy-ext/segpy-numpy/segpy_numpy/dtypes.py | 2 +- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 132 ++++++++++++++----- segpy-ext/segpy-numpy/test/__init__.py | 1 + segpy-ext/segpy-numpy/test/test_extract.py | 10 ++ segpy/util.py | 14 +- 7 files changed, 239 insertions(+), 39 deletions(-) create mode 100644 examples/extract_trace_header_field.py create mode 100644 segpy-ext/segpy-numpy/test/__init__.py create mode 100644 segpy-ext/segpy-numpy/test/test_extract.py diff --git a/examples/extract_trace_header_field.py b/examples/extract_trace_header_field.py new file mode 100644 index 0000000..eaacf47 --- /dev/null +++ b/examples/extract_trace_header_field.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +"""Extract a single field from the trace headers of 3D seismic volume to a 2D Numpy array. + +This utility assumes the inline and crossline numbers are evenly spaced. +Each inline of the source data will be represented as a single row, and +each crossline as a single column in the resulting 2D array. + +Usage: extract_trace_header_field.py [-h] [--null NULL] + segy-file npy-file field-name + +Positional arguments: + segy-file Path to an existing SEG Y file of 3D seismic data + npy-file Path to the Numpy array file to be created for the timeslice + field-name Zero based index of the time slice to be extracted + +Optional arguments: + -h, --help show this help message and exit + --dtype DTYPE Numpy data type. If not provided a dtype compatible with the + SEG Y data will be used. + --null NULL Sample value to use for missing or short traces. + +Example: + + extract_trace_header_field.py stack_final_int8.sgy slice_800.npy ensemble_num +""" + +import argparse +import os +import sys +import traceback + +import numpy as np + +from numpy import s_ + +from segpy.reader import create_reader +from segpy.trace_header import TraceHeaderRev1 +from segpy_numpy.dtypes import make_dtype +from segpy_numpy.extract import extract_inline_3d, extract_trace_header_field_3d + + +class DimensionalityError(Exception): + pass + + + +def extract_header_field(segy_filename, out_filename, field_name, null=None): + """Extract a timeslice from a 3D SEG Y file to a Numpy NPY file. + + Args: + segy_filename: Filename of a SEG Y file. + + out_filename: Filename of the NPY file. + + inline_index: The zero-based index (increasing with depth) of the slice to be extracted. + + null: Optional sample value to use for missing or short traces. Defaults to zero. + """ + header_field = getattr(TraceHeaderRev1, field_name) + + with open(segy_filename, 'rb') as segy_file: + + segy_reader = create_reader(segy_file) + header_field_array = extract_trace_header_field_3d(segy_reader, header_field, null=null) + return header_field_array + + +def nullable_int(s): + return None if not bool(s) else int(s) + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("segy_file", metavar="segy-file", + help="Path to an existing SEG Y file of 3D seismic data") + + parser.add_argument("npy_file", metavar="npy-file", + help="Path to the Numpy array file to be created for the timeslice") + + parser.add_argument("field_name", metavar="field-name", type=str, + help="Name of the trace header field to be extracted", ) + + parser.add_argument("--null", type=nullable_int, default="", + help="Header value to use for missing or short traces.") + + if argv is None: + argv = sys.argv[1:] + + args = parser.parse_args(argv) + + try: + extract_header_field( + segy_filename=args.segy_file, + out_filename=args.npy_file, + field_name=args.field_name, + null=args.null) + except (FileNotFoundError, IsADirectoryError) as e: + print(e, file=sys.stderr) + return os.EX_NOINPUT + except PermissionError as e: + print(e, file=sys.stderr) + return os.EX_NOPERM + except Exception as e: + traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr) + return os.EX_SOFTWARE + return os.EX_OK + + +if __name__ == '__main__': + sys.exit(main()) + + diff --git a/examples/inline.py b/examples/inline.py index ac02931..0fb15f2 100644 --- a/examples/inline.py +++ b/examples/inline.py @@ -22,7 +22,7 @@ Optional arguments: Example: - timeslice.py stack_final_int8.sgy slice_800.npy 800 --null=42.0 --dtype=f + inline.py stack_final_int8.sgy slice_800.npy 800 --null=42.0 --dtype=f """ import argparse @@ -58,8 +58,8 @@ def extract_inline(segy_filename, out_filename, inline_number, null=None): with open(segy_filename, 'rb') as segy_file: segy_reader = create_reader(segy_file) - inline_array = extract_inline_3d(segy_reader, inline_number, sample_numbers=s_[::2], null=null) - return inline_number + inline_array = extract_inline_3d(segy_reader, inline_number, null=null) + return inline_array def nullable_float(s): diff --git a/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py b/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py index 2583caf..74431f3 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/dtypes.py @@ -9,7 +9,7 @@ NUMPY_DTYPES = {'ibm': numpy.dtype('f4'), 'int8': numpy.dtype('i1')} -def make_dtype(data_sample_format): +def make_dtype(data_sample_format): # TODO: What is the correct name for this arg? """Convert a SEG Y data sample format to a compatible numpy dtype. Note : diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index 137acdb..b9b4f8c 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -8,46 +8,127 @@ from segpy_numpy.dtypes import make_dtype class DimensionalityError: pass +def extract_trace_header_field_3d(reader, field, null=None): + """Extract a single trace header field from all trace headers as an array. -def extract_inline_3d(reader, inline_number, xline_numbers=None, sample_numbers=None, null=None): + Args: + reader: A SegYReader + field: A header field + """ + shape = (reader.num_inlines(), reader.num_xlines()) + dtype = make_dtype(field.value_type.SEG_Y_TYPE) + arr = _make_array(shape, dtype, null) + field_name = field.name + for inline_xline in reader.inline_xline_numbers(): + inline_number, xline_number = inline_xline + trace_index = reader.trace_index(inline_xline) + trace_header = reader.trace_header(trace_index) + inline_index = reader.inline_numbers().index(inline_number) + xline_index = reader.xline_numbers().index(xline_number) + field_value = getattr(trace_header, field_name) + arr[inline_index, xline_index] = field_value + return arr + + +def extract_trace(reader, trace_index, sample_numbers): + """Extract an single trace as a one-dimensional array. + + Args: + reader: A SegYReader3D object. + + trace_index: The index of the trace to be extracted. + + sample_numbers: The sample numbers within each trace at which samples are to be extracted. + This argument can be specified in three ways: + + None (the default) - All samples within the trace will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those samples at + sample numbers corresponding to the items in the sequence will be extracted. The + samples will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate + samples from sample number 100 to sample number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + sample numbers. For example sample_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred samples, irrespective of their numbers. + + Returns: + A one-dimensional array. + """ + if not reader.has_trace_index(trace_index): + raise ValueError("Inline number {} not present in {}".format(trace_index, reader)) + + sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers) + + trace_sample_start = sample_numbers[0] + trace_sample_stop = min(sample_numbers[-1] + 1, reader.num_trace_samples(trace_index)) + trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + arr = np.fromiter((trace_samples[sample_number - trace_sample_start] for sample_number in sample_numbers), + make_dtype(reader.data_sample_format)) + return arr + + +def extract_inline_3d(reader_3d, inline_number, xline_numbers=None, sample_numbers=None, null=None): """Extract an inline as a two-dimensional array. Args: - reader: + reader: A SegYReader3D object. inline_number: The number of the inline to be extracted. - xline_numbers: Either a sequence of crossline numbers from which traces are - to be extracted or a slice object indicating that some slice of all - crossline numbers is to be used. If None, traces will be extracted at - all crosslines. + xline_numbers: The crossline numbers within the inline at which traces are to be extracted. + This argument can be specified in three ways: - sample_numbers: An optional sorted sequence of samples numbers at which to - extract samples. If not provided, samples will be extracted at all - depths. + None (the default) - All traces within the inline will be be extracted. - null: A null value + sequence - When a sequence, such as a range or a list is provided only those traces at + crossline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate + traces from crossline number 100 to crossline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + crossline numbers. For example xline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + sample_numbers: The sample numbers within each trace at which samples are to be extracted. + This argument can be specified in three ways: + + None (the default) - All samples within the trace will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those samples at + sample numbers corresponding to the items in the sequence will be extracted. The + samples will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate + samples from sample number 100 to sample number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + sample numbers. For example sample_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred samples, irrespective of their numbers. + + null: A null value. When None is specified as the null value a masked array will be returned. Returns: A two-dimensional array. If null is None a masked array will be returned, otherwise - a simple array will be returned.. The first (slowest changing) index will correspond + a regular array will be returned. The first (slowest changing) index will correspond to the traces (index zero will correspond to the first crossline number). The second (fastest changing) index will correspond to the samples (index zero will correspond to the first sample number. """ - if inline_number not in reader.inline_numbers(): - raise ValueError("Inline number {} not present in {}".format(inline_number, reader)) + if inline_number not in reader_3d.inline_numbers(): + raise ValueError("Inline number {} not present in {}".format(inline_number, reader_3d)) - xline_numbers = ensure_superset(reader.xline_numbers(), xline_numbers) - sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers) + xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers) + sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers) shape = (len(xline_numbers), len(sample_numbers)) - dtype = make_dtype(reader.data_sample_format) - array = make_array(shape, dtype, null) + dtype = make_dtype(reader_3d.data_sample_format) + array = _make_array(shape, dtype, null) if isinstance(sample_numbers, range): - _populate_inline_array_over_sample_range(reader, inline_number, xline_numbers, sample_numbers, array) + _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array) else: - _populate_inline_array_numbered_samples(reader, inline_number, xline_numbers, sample_numbers, array) + _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array) return array @@ -77,21 +158,10 @@ def _populate_inline_array_over_sample_range(reader, inline_number, xline_number array[xline_index, :] = trace_samples[source_slice] -def make_array(shape, dtype, null=None): +def _make_array(shape, dtype, null=None): """Make an array""" if null is None: return np.ma.masked_all(shape, dtype) - array = np.empty(shape, dtype) array.fill(null) return array - - -def start_and_stop(sequence): - """Obtain start and stop values from a sequence.""" - sample_start = sequence[0] - try: - sample_stop = sequence.stop - except AttributeError: - sample_stop = sequence[-1] + 1 - return sample_start, sample_stop diff --git a/segpy-ext/segpy-numpy/test/__init__.py b/segpy-ext/segpy-numpy/test/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/segpy-ext/segpy-numpy/test/__init__.py @@ -0,0 +1 @@ + diff --git a/segpy-ext/segpy-numpy/test/test_extract.py b/segpy-ext/segpy-numpy/test/test_extract.py new file mode 100644 index 0000000..58e5dfe --- /dev/null +++ b/segpy-ext/segpy-numpy/test/test_extract.py @@ -0,0 +1,10 @@ +import unittest + + +class MyTestCase(unittest.TestCase): + def test_something(self): + self.assertEqual(True, False) + + +if __name__ == '__main__': + unittest.main() diff --git a/segpy/util.py b/segpy/util.py index b8ca65d..9649071 100644 --- a/segpy/util.py +++ b/segpy/util.py @@ -3,7 +3,9 @@ import time import os import sys +from collections.abc import Set from itertools import (islice, cycle, tee, chain, repeat) + from segpy.sorted_set import SortedFrozenSet UNKNOWN_FILENAME = '' @@ -407,17 +409,21 @@ def is_range_superset_of_range(superset_range, subset_range): return True -def is_subset(superset, subset): - """A more general version of set.issubset that is smart enough to work with ranges.""" +def is_superset(superset, subset): + """A more general version of set.issuperset that is smart enough to work with ranges.""" if isinstance(subset, range) and isinstance(superset, range): return is_range_superset_of_range(superset, subset) if isinstance(superset, range): return all(item in superset for item in subset) + if isinstance(superset, set): + return superset.issuperset(subset) + if isinstance(subset, set): + return subset.issubset(superset) return set(superset).issuperset(subset) def ensure_superset(superset, subset): - """Obtains a subset of all items. + """Ensure that one collection is a subset of another. Args: all_items: A sequence containing all items. @@ -437,7 +443,7 @@ def ensure_superset(superset, subset): return superset[subset] else: subset = make_sorted_distinct_sequence(subset) - if not is_subset(superset, subset): + if not is_superset(superset, subset): raise ValueError("subset_or_slice {!r} is not a subset of all_items {!r}" .format(subset, superset)) return subset \ No newline at end of file From 5312cca05201c76221dd0c50d3e850337d5c69a9 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 16:39:34 +0200 Subject: [PATCH 07/15] Adds a property to reader to retrieve the default trace header class. Makes it possible to override the trace header class when retrieving a trace header. --- segpy/reader.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/segpy/reader.py b/segpy/reader.py index ef7ccec..62db921 100644 --- a/segpy/reader.py +++ b/segpy/reader.py @@ -421,12 +421,15 @@ class SegYReader(object): self._fh, start_pos, seg_y_type, num_samples_to_read, self._endian) return trace_values - def trace_header(self, trace_index): + def trace_header(self, trace_index, header_packer_override=None): """Read a specific trace_samples. Args: trace_index: An integer in the range zero to num_traces() - 1 + header_packer_override: Override the default header packer (for example + to more efficiently extract only a few fields) + Returns: A TraceHeader corresponding to the requested trace_samples. @@ -436,10 +439,19 @@ class SegYReader(object): """ if not (0 <= trace_index < self.num_traces()): raise ValueError("Trace index {} out of range".format(trace_index)) + header_packer = self._trace_header_packer if header_packer_override is None else header_packer_override pos = self._trace_offset_catalog[trace_index] - trace_header = read_trace_header(self._fh, self._trace_header_packer, pos) + trace_header = read_trace_header(self._fh, header_packer, pos) return trace_header + @property + def trace_header_format_class(self): + """The trace header format class. + + Instances of this class are what is returned from trace_header() unless the + header_packer has been overridden.""" + return self._trace_header_packer.header_format_class + @property def dimensionality(self): """The spatial dimensionality of the data. From a7d658995a80be91346442ae331b5a7d1dcd7b9f Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 16:40:30 +0200 Subject: [PATCH 08/15] Adds a true() function which always returns True. --- segpy/util.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/segpy/util.py b/segpy/util.py index 9649071..3534ee9 100644 --- a/segpy/util.py +++ b/segpy/util.py @@ -446,4 +446,32 @@ def ensure_superset(superset, subset): if not is_superset(superset, subset): raise ValueError("subset_or_slice {!r} is not a subset of all_items {!r}" .format(subset, superset)) - return subset \ No newline at end of file + return subset + +def true(*args, **kwargs): + return True + + +def collect_attributes(derived_class, base_class=object, predicate=None): + """ + + Args: + derived_class: The class at which to start searching. + base_class: The class at which to stop searching + predicate: A predicate which accepts + + Returns: + A generator of items containing the (class, attribute_name) + """ + # TODO: Consider using the inspect module to do this + if predicate is None: + predicate = true + + for cls in derived_class.__mro__: + for key, value in vars(cls).items(): + if predicate(key, value): + yield cls, key, value + if cls is base_class: + break + + From a3ecf62a09b784457fb74066daab9cbedf08ae8a Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 16:42:43 +0200 Subject: [PATCH 09/15] Adds SubFormatMeta for subsetting header formats. --- segpy/header.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/segpy/header.py b/segpy/header.py index 2083cdc..82a9f01 100644 --- a/segpy/header.py +++ b/segpy/header.py @@ -4,7 +4,7 @@ from itertools import chain from segpy import __version__ from segpy.docstring import docstring_property -from segpy.util import underscores_to_camelcase, first_sentence, super_class +from segpy.util import underscores_to_camelcase, first_sentence, super_class, collect_attributes class Header: @@ -96,7 +96,7 @@ class FormatMeta(type): """ @classmethod - def __prepare__(mcs, name, bases): + def __prepare__(mcs, name, bases, *args, **kwargs): return OrderedDict() def __new__(mcs, name, bases, namespace): @@ -134,6 +134,63 @@ class FormatMeta(type): return super().__new__(mcs, name, bases, namespace) +def is_public_non_field_attr(name, attr): + return (not name.startswith('_')) and (not isinstance(attr, HeaderFieldDescriptor) and (not isinstance(attr, classmethod))) + + +class SubFormatMeta(FormatMeta): + """A metaclass for a format class which has a subset of the fields in an existing format class. + + SubFormat classes can be used to reduce storage requirements and increase performance, since they can be + used to generate simpler HeaderPackers. + + SubFormat classes must be declared as: + + class MySubFormat(metaclass=SubFormatMeta, + format_class=MyFormatClass, + field_names=['first_field_name', + 'second_field_name']) + pass + """ + + def __new__(mcs, name, bases, namespace, parent_format, parent_field_names): + """ + Args: + name: The name of the actual class being created by this metaclass. + bases: The base classes of the actual class. + parent_format: An existing Format (?Header) on which (sort out terminology here) + of which this format has a subset of fields. + field_names: An iterable series of field names which this format should + duplicate from the parent_format. + """ + + # TODO: We need an inheritance aware getattr for class attributes! + # Copy the requested fields, by creating a new descriptor based + # on information retrieved from the existing descriptor + + for field_name in parent_field_names: + named_field = getattr(parent_format, field_name) + assert named_field.name == field_name + field_copy = field(named_field.value_type, + named_field.offset, + named_field.default, + named_field.documentation) + namespace[field_name] = field_copy + + # Copy other non-field class attributes + non_field_attributes = list(collect_attributes(parent_format, Header, is_public_non_field_attr)) + namespace.update((name, value) for _, name, value in non_field_attributes) + + # Add a reference back to the original format + namespace['_parent_format'] = parent_format + + return super().__new__(mcs, name, bases, namespace) + + def __init__(mcs, name, bases, namespace, parent_format, parent_field_names): + # Absorb the additional arguments + super().__init__(name, bases, namespace) + + class NamedField: """Instances of NamedField can be detected by the NamedDescriptorResolver metaclass.""" From 480bb024beec29906459d96caa6960d85184a310 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 16:43:18 +0200 Subject: [PATCH 10/15] Adds a reminder to use a SubFormat when building catalogs. --- segpy/toolkit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/segpy/toolkit.py b/segpy/toolkit.py index a741afb..d7ba87f 100644 --- a/segpy/toolkit.py +++ b/segpy/toolkit.py @@ -347,6 +347,8 @@ def catalog_traces(fh, bps, trace_header_format=TraceHeaderRev1, endian='>', pro alt_line_catalog_builder = CatalogBuilder() cdp_catalog_builder = CatalogBuilder() + # TODO: Use a SubHeaderFormat to only load the fields we're interestedin + for trace_number in count(): progress_callback(_READ_PROPORTION * pos_begin / length) fh.seek(pos_begin) From f7920298293fb2666d6b0d5fa07fb7583983a12a Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 16:44:16 +0200 Subject: [PATCH 11/15] Use SubFormats to make extract_trace_header_3d about four times faster when only extracting a single field. --- examples/extract_trace_header_field.py | 5 +- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 71 +++++++++++++++++--- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/examples/extract_trace_header_field.py b/examples/extract_trace_header_field.py index eaacf47..e8b2985 100644 --- a/examples/extract_trace_header_field.py +++ b/examples/extract_trace_header_field.py @@ -62,8 +62,9 @@ def extract_header_field(segy_filename, out_filename, field_name, null=None): with open(segy_filename, 'rb') as segy_file: segy_reader = create_reader(segy_file) - header_field_array = extract_trace_header_field_3d(segy_reader, header_field, null=null) - return header_field_array + + header_field_arrays = extract_trace_header_field_3d(segy_reader, [header_field], null) + return header_field_arrays def nullable_int(s): diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index b9b4f8c..f783efb 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -1,5 +1,8 @@ """Tools for interoperability between Segpy and Numpy arrays.""" +from collections import namedtuple import numpy as np +from segpy.header import Header, SubFormatMeta +from segpy.packer import make_header_packer from segpy.util import ensure_superset from segpy_numpy.dtypes import make_dtype @@ -8,26 +11,60 @@ from segpy_numpy.dtypes import make_dtype class DimensionalityError: pass -def extract_trace_header_field_3d(reader, field, null=None): + +# TODO: Add inline_numbers and xline_numbers arguments +def extract_trace_header_field_3d(reader, fields, null=None): """Extract a single trace header field from all trace headers as an array. Args: - reader: A SegYReader - field: A header field + reader: A SegYReader3D + + fields: A an iterable series where each item is either the string name of a field + or an object with a 'name' attribute which is the name of a field, such as a + NamedField. + + null: An optional null value for missing traces. The null value must be convertible + to all field value types. + + Returns: + An namedtuple object with attributes which are two-dimensional Numpy arrays. + If a null value was specified the arrays will be ndarrays, otherwise they + will be masked arrays. The attributes of the named tuple are in the same + order as the fields specified in the `fields` argument. + + Raises: + AttributeError: If the the named fields do not exist in the trace header definition. """ + field_names = [_extract_field_name(field) for field in fields] + + class SubHeader(metaclass=SubFormatMeta, + parent_format=reader.trace_header_format_class, + parent_field_names=field_names): + pass + + sub_header_packer = make_header_packer(SubHeader, reader.endian) + TraceHeaderArrays = namedtuple('TraceHeaderArrays', field_names) shape = (reader.num_inlines(), reader.num_xlines()) - dtype = make_dtype(field.value_type.SEG_Y_TYPE) - arr = _make_array(shape, dtype, null) - field_name = field.name + + arrays = (_make_array(shape, + make_dtype(getattr(SubHeader, field_name).value_type.SEG_Y_TYPE), + null) + for field_name in field_names) + + trace_header_arrays = TraceHeaderArrays(*arrays) + for inline_xline in reader.inline_xline_numbers(): inline_number, xline_number = inline_xline trace_index = reader.trace_index(inline_xline) - trace_header = reader.trace_header(trace_index) + trace_header = reader.trace_header(trace_index, sub_header_packer) inline_index = reader.inline_numbers().index(inline_number) xline_index = reader.xline_numbers().index(xline_number) - field_value = getattr(trace_header, field_name) - arr[inline_index, xline_index] = field_value - return arr + + for field_name, a in zip(field_names, trace_header_arrays): + field_value = getattr(trace_header, field_name) + a[inline_index, xline_index] = field_value + + return trace_header_arrays def extract_trace(reader, trace_index, sample_numbers): @@ -165,3 +202,17 @@ def _make_array(shape, dtype, null=None): array = np.empty(shape, dtype) array.fill(null) return array + +def _extract_field_name(field): + """Args: + field: If field in an object with a name attribute the name is returned. If field is a string it is returned + unmodified. + Raises: + TypeError: + """ + if isinstance(field, str): + return field + try: + return field.name + except AttributeError: + raise TypeError("{!r} neither is a string nor has a 'name' attribute".format(field)) \ No newline at end of file From 9652730586e99282485000e571a764b54d783df7 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 16:46:46 +0200 Subject: [PATCH 12/15] Move examples dependent on segpy-numpy to segpy-numpy/examples --- .../segpy-numpy/examples}/extract_trace_header_field.py | 0 {examples => segpy-ext/segpy-numpy/examples}/inline.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {examples => segpy-ext/segpy-numpy/examples}/extract_trace_header_field.py (100%) rename {examples => segpy-ext/segpy-numpy/examples}/inline.py (100%) diff --git a/examples/extract_trace_header_field.py b/segpy-ext/segpy-numpy/examples/extract_trace_header_field.py similarity index 100% rename from examples/extract_trace_header_field.py rename to segpy-ext/segpy-numpy/examples/extract_trace_header_field.py diff --git a/examples/inline.py b/segpy-ext/segpy-numpy/examples/inline.py similarity index 100% rename from examples/inline.py rename to segpy-ext/segpy-numpy/examples/inline.py From 2bee1e4f99dd03c8103b4d89c6d1905c8c5247e4 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 17:20:29 +0200 Subject: [PATCH 13/15] Adds inline/xline range support to extract_trace_header_3d. Adds extract_xline_3d --- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 158 ++++++++++++++++--- 1 file changed, 140 insertions(+), 18 deletions(-) diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index f783efb..d6dbc2f 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -13,15 +13,45 @@ class DimensionalityError: # TODO: Add inline_numbers and xline_numbers arguments -def extract_trace_header_field_3d(reader, fields, null=None): +def extract_trace_header_field_3d(reader_3d, fields, inline_numbers=None, xline_numbers=None, null=None): """Extract a single trace header field from all trace headers as an array. Args: - reader: A SegYReader3D + reader_3d: A SegYReader3D - fields: A an iterable series where each item is either the string name of a field - or an object with a 'name' attribute which is the name of a field, such as a - NamedField. + fields: A an iterable series where each item is either the name of a field as a string + or an object such as a NamedField with a 'name' attribute which in turn is the name + of a field as a string, such as a NamedField. + + inline_numbers: The inline numbers for which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the each crossline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + inline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate + traces from inline number 100 to inline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + inline numbers. For example inline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + xline_numbers: The crossline numbers at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces at within each inline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + crossline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate + traces from crossline number 100 to crossline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + crossline numbers. For example xline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. null: An optional null value for missing traces. The null value must be convertible to all field value types. @@ -37,14 +67,17 @@ def extract_trace_header_field_3d(reader, fields, null=None): """ field_names = [_extract_field_name(field) for field in fields] + inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers) + xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers) + shape = (len(inline_numbers), len(xline_numbers)) + class SubHeader(metaclass=SubFormatMeta, - parent_format=reader.trace_header_format_class, + parent_format=reader_3d.trace_header_format_class, parent_field_names=field_names): pass - sub_header_packer = make_header_packer(SubHeader, reader.endian) + sub_header_packer = make_header_packer(SubHeader, reader_3d.endian) TraceHeaderArrays = namedtuple('TraceHeaderArrays', field_names) - shape = (reader.num_inlines(), reader.num_xlines()) arrays = (_make_array(shape, make_dtype(getattr(SubHeader, field_name).value_type.SEG_Y_TYPE), @@ -53,16 +86,16 @@ def extract_trace_header_field_3d(reader, fields, null=None): trace_header_arrays = TraceHeaderArrays(*arrays) - for inline_xline in reader.inline_xline_numbers(): - inline_number, xline_number = inline_xline - trace_index = reader.trace_index(inline_xline) - trace_header = reader.trace_header(trace_index, sub_header_packer) - inline_index = reader.inline_numbers().index(inline_number) - xline_index = reader.xline_numbers().index(xline_number) + for inline_index, inline_number in enumerate(inline_numbers): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index((inline_number, xline_number)) + trace_header = reader_3d.trace_header(trace_index, sub_header_packer) - for field_name, a in zip(field_names, trace_header_arrays): - field_value = getattr(trace_header, field_name) - a[inline_index, xline_index] = field_value + for field_name, a in zip(field_names, trace_header_arrays): + field_value = getattr(trace_header, field_name) + a[inline_index, xline_index] = field_value return trace_header_arrays @@ -151,7 +184,7 @@ def extract_inline_3d(reader_3d, inline_number, xline_numbers=None, sample_numbe a regular array will be returned. The first (slowest changing) index will correspond to the traces (index zero will correspond to the first crossline number). The second (fastest changing) index will correspond to the samples (index zero will - correspond to the first sample number. + correspond to the first sample number). """ if inline_number not in reader_3d.inline_numbers(): raise ValueError("Inline number {} not present in {}".format(inline_number, reader_3d)) @@ -195,6 +228,95 @@ def _populate_inline_array_over_sample_range(reader, inline_number, xline_number array[xline_index, :] = trace_samples[source_slice] +def extract_xline_3d(reader_3d, xline_number, inline_numbers=None, sample_numbers=None, null=None): + """Extract an inline as a two-dimensional array. + + Args: + reader: A SegYReader3D object. + + xline_number: The number of the xline to be extracted. + + inline_numbers: The inline numbers within the crossline at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the crossline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + inline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate + traces from inline number 100 to inline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + inline numbers. For example inline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + sample_numbers: The sample numbers within each trace at which samples are to be extracted. + This argument can be specified in three ways: + + None (the default) - All samples within the trace will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those samples at + sample numbers corresponding to the items in the sequence will be extracted. The + samples will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example sample_numbers=range(100, 200, 2) will extract alternate + samples from sample number 100 to sample number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + sample numbers. For example sample_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred samples, irrespective of their numbers. + + null: A null value. When None is specified as the null value a masked array will be returned. + + Returns: + A two-dimensional array. If null is None a masked array will be returned, otherwise + a regular array will be returned. The first (slowest changing) index will correspond + to the traces (index zero will correspond to the first inline number). The + second (fastest changing) index will correspond to the samples (index zero will + correspond to the first sample number). + """ + if xline_number not in reader_3d.xline_numbers(): + raise ValueError("Crossline number {} not present in {}".format(xline_number, reader_3d)) + + inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers) + sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers) + shape = (len(inline_numbers), len(sample_numbers)) + dtype = make_dtype(reader_3d.data_sample_format) + array = _make_array(shape, dtype, null) + + if isinstance(sample_numbers, range): + _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array) + else: + _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array) + + return array + + +def _populate_xline_array_numbered_samples(reader, xline_number, inline_numbers, sample_numbers, array): + for inline_index, inline_number in enumerate(inline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader.has_trace_index(inline_xline_number): + trace_index = reader.trace_index(inline_xline_number) + num_trace_samples = reader.num_trace_samples(trace_index) + trace_sample_start = sample_numbers[0] + trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples) + trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + for sample_index, sample_number in enumerate(sample_numbers): + array[inline_index, sample_index] = trace_samples[sample_number - trace_sample_start] + + +def _populate_xline_array_over_sample_range(reader, xline_number, inline_numbers, sample_numbers, array): + for inline_index, inline_number in enumerate(inline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader.has_trace_index(inline_xline_number): + trace_index = reader.trace_index(inline_xline_number) + num_trace_samples = reader.num_trace_samples(trace_index) + trace_sample_stop = min(sample_numbers.stop, num_trace_samples) + trace_samples = reader.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) + source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step) + array[inline_index, :] = trace_samples[source_slice] + + def _make_array(shape, dtype, null=None): """Make an array""" if null is None: From 965b9482dab26a49ca379ffaf095441e10731cae Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 17:30:46 +0200 Subject: [PATCH 14/15] Adds extract_timeslice_3d to segpy_numpy --- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 107 +++++++++++++++---- 1 file changed, 87 insertions(+), 20 deletions(-) diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index d6dbc2f..1e9e3ce 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -203,27 +203,27 @@ def extract_inline_3d(reader_3d, inline_number, xline_numbers=None, sample_numbe return array -def _populate_inline_array_numbered_samples(reader, inline_number, xline_numbers, sample_numbers, array): +def _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array): for xline_index, xline_number in enumerate(xline_numbers): inline_xline_number = (inline_number, xline_number) - if reader.has_trace_index(inline_xline_number): - trace_index = reader.trace_index(inline_xline_number) - num_trace_samples = reader.num_trace_samples(trace_index) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) trace_sample_start = sample_numbers[0] trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples) - trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop) for sample_index, sample_number in enumerate(sample_numbers): array[xline_index, sample_index] = trace_samples[sample_number - trace_sample_start] -def _populate_inline_array_over_sample_range(reader, inline_number, xline_numbers, sample_numbers, array): +def _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array): for xline_index, xline_number in enumerate(xline_numbers): inline_xline_number = (inline_number, xline_number) - if reader.has_trace_index(inline_xline_number): - trace_index = reader.trace_index(inline_xline_number) - num_trace_samples = reader.num_trace_samples(trace_index) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) trace_sample_stop = min(sample_numbers.stop, num_trace_samples) - trace_samples = reader.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) + trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step) array[xline_index, :] = trace_samples[source_slice] @@ -292,31 +292,98 @@ def extract_xline_3d(reader_3d, xline_number, inline_numbers=None, sample_number return array -def _populate_xline_array_numbered_samples(reader, xline_number, inline_numbers, sample_numbers, array): +def _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array): for inline_index, inline_number in enumerate(inline_numbers): inline_xline_number = (inline_number, xline_number) - if reader.has_trace_index(inline_xline_number): - trace_index = reader.trace_index(inline_xline_number) - num_trace_samples = reader.num_trace_samples(trace_index) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) trace_sample_start = sample_numbers[0] trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples) - trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop) + trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop) for sample_index, sample_number in enumerate(sample_numbers): array[inline_index, sample_index] = trace_samples[sample_number - trace_sample_start] -def _populate_xline_array_over_sample_range(reader, xline_number, inline_numbers, sample_numbers, array): +def _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array): for inline_index, inline_number in enumerate(inline_numbers): inline_xline_number = (inline_number, xline_number) - if reader.has_trace_index(inline_xline_number): - trace_index = reader.trace_index(inline_xline_number) - num_trace_samples = reader.num_trace_samples(trace_index) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index(inline_xline_number) + num_trace_samples = reader_3d.num_trace_samples(trace_index) trace_sample_stop = min(sample_numbers.stop, num_trace_samples) - trace_samples = reader.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) + trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop) source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step) array[inline_index, :] = trace_samples[source_slice] +def extract_timeslice_3d(reader_3d, sample_number, inline_numbers=None, xline_numbers=None, null=None): + """Extract a single timeslice header field from all trace headers as an array. + + Args: + reader_3d: A SegYReader3D + + sample_number: The zero-based sample index. + + inline_numbers: The inline numbers for which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces within the each crossline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + inline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example inline_numbers=range(100, 200, 2) will extract alternate + traces from inline number 100 to inline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + inline numbers. For example inline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + xline_numbers: The crossline numbers at which traces are to be extracted. + This argument can be specified in three ways: + + None (the default) - All traces at within each inline will be be extracted. + + sequence - When a sequence, such as a range or a list is provided only those traces at + crossline numbers corresponding to the items in the sequence will be extracted. The + traces will always be extracted in increasing numeric order and duplicate entries + will be ignored. For example xline_numbers=range(100, 200, 2) will extract alternate + traces from crossline number 100 to crossline number 198 inclusive. + + slice - When a slice object is provided the slice will be applied to the sequence of all + crossline numbers. For example xline_numbers=slice(100, -100) will omit the first + one hundred and the last one hundred traces, irrespective of their numbers. + + null: An optional null value for missing traces. The null value must be convertible + to all field value types. + + Returns: + An namedtuple object with attributes which are two-dimensional Numpy arrays. + If a null value was specified the arrays will be ndarrays, otherwise they + will be masked arrays. The attributes of the named tuple are in the same + order as the fields specified in the `fields` argument. + + Raises: + AttributeError: If the the named fields do not exist in the trace header definition. + """ + + inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers) + xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers) + shape = (len(inline_numbers), len(xline_numbers)) + dtype = make_dtype(reader_3d.data_sample_format) + array = _make_array(shape, dtype, null) + sample_number_stop = sample_number + 1 + + for inline_index, inline_number in enumerate(inline_numbers): + for xline_index, xline_number in enumerate(xline_numbers): + inline_xline_number = (inline_number, xline_number) + if reader_3d.has_trace_index(inline_xline_number): + trace_index = reader_3d.trace_index((inline_number, xline_number)) + trace_samples = reader_3d.trace_samples(trace_index, sample_number, sample_number_stop) + array[inline_index, xline_index] = trace_samples[0] + return array + def _make_array(shape, dtype, null=None): """Make an array""" if null is None: From 2c5774c0b497acc33118d86542ade19b8073eb53 Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Sun, 14 Jun 2015 17:31:45 +0200 Subject: [PATCH 15/15] Removes unnecessary code and todos --- segpy-ext/segpy-numpy/segpy_numpy/extract.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/segpy-ext/segpy-numpy/segpy_numpy/extract.py b/segpy-ext/segpy-numpy/segpy_numpy/extract.py index 1e9e3ce..7ccb53a 100644 --- a/segpy-ext/segpy-numpy/segpy_numpy/extract.py +++ b/segpy-ext/segpy-numpy/segpy_numpy/extract.py @@ -8,11 +8,6 @@ from segpy.util import ensure_superset from segpy_numpy.dtypes import make_dtype -class DimensionalityError: - pass - - -# TODO: Add inline_numbers and xline_numbers arguments def extract_trace_header_field_3d(reader_3d, fields, inline_numbers=None, xline_numbers=None, null=None): """Extract a single trace header field from all trace headers as an array.