Merge branch 'ext-numpy'

2026-06-27 17:17:13 +08:00 · 2015-06-14 17:38:42 +02:00
parent 042b415ca1 2c5774c0b4
commit c7209f909a
12 changed files with 814 additions and 9 deletions
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+"""Extract a single field from the trace headers of  3D seismic volume to a 2D Numpy array.
+
+This utility assumes the inline and crossline numbers are evenly spaced.
+Each inline of the source data will be represented as a single row, and
+each crossline as a single column in the resulting 2D array.
+
+Usage: extract_trace_header_field.py [-h] [--null NULL]
+                    segy-file npy-file field-name
+
+Positional arguments:
+  segy-file      Path to an existing SEG Y file of 3D seismic data
+  npy-file       Path to the Numpy array file to be created for the timeslice
+  field-name     Zero based index of the time slice to be extracted
+
+Optional arguments:
+  -h, --help     show this help message and exit
+  --dtype DTYPE  Numpy data type. If not provided a dtype compatible with the
+                 SEG Y data will be used.
+  --null NULL    Sample value to use for missing or short traces.
+
+Example:
+
+  extract_trace_header_field.py stack_final_int8.sgy slice_800.npy ensemble_num
+"""
+
+import argparse
+import os
+import sys
+import traceback
+
+import numpy as np
+
+from numpy import s_
+
+from segpy.reader import create_reader
+from segpy.trace_header import TraceHeaderRev1
+from segpy_numpy.dtypes import make_dtype
+from segpy_numpy.extract import extract_inline_3d, extract_trace_header_field_3d
+
+
+class DimensionalityError(Exception):
+    pass
+
+
+
+def extract_header_field(segy_filename, out_filename, field_name, null=None):
+    """Extract a timeslice from a 3D SEG Y file to a Numpy NPY file.
+
+    Args:
+        segy_filename: Filename of a SEG Y file.
+
+        out_filename: Filename of the NPY file.
+
+        inline_index: The zero-based index (increasing with depth) of the slice to be extracted.
+
+        null: Optional sample value to use for missing or short traces. Defaults to zero.
+    """
+    header_field = getattr(TraceHeaderRev1, field_name)
+
+    with open(segy_filename, 'rb') as segy_file:
+
+        segy_reader = create_reader(segy_file)
+
+        header_field_arrays = extract_trace_header_field_3d(segy_reader, [header_field], null)
+        return header_field_arrays
+
+
+def nullable_int(s):
+    return None if not bool(s) else int(s)
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("segy_file", metavar="segy-file",
+                        help="Path to an existing SEG Y file of 3D seismic data")
+
+    parser.add_argument("npy_file", metavar="npy-file",
+                        help="Path to the Numpy array file to be created for the timeslice")
+
+    parser.add_argument("field_name", metavar="field-name", type=str,
+                        help="Name of the trace header field to be extracted", )
+
+    parser.add_argument("--null",  type=nullable_int, default="",
+                        help="Header value to use for missing or short traces.")
+
+    if argv is None:
+        argv = sys.argv[1:]
+
+    args = parser.parse_args(argv)
+
+    try:
+        extract_header_field(
+            segy_filename=args.segy_file,
+            out_filename=args.npy_file,
+            field_name=args.field_name,
+            null=args.null)
+    except (FileNotFoundError, IsADirectoryError) as e:
+        print(e, file=sys.stderr)
+        return os.EX_NOINPUT
+    except PermissionError as e:
+        print(e, file=sys.stderr)
+        return os.EX_NOPERM
+    except Exception as e:
+        traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
+        return os.EX_SOFTWARE
+    return os.EX_OK
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+
+
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+
+"""Extract an inline from a 3D seismic volume to a Numpy array.
+
+This utility assumes the inline and crossline numbers are evenly spaced.
+Each inline of the source data will be represented as a single row, and
+each crossline as a single column in the resulting 2D array.
+
+Usage: inline.py [-h] [--dtype DTYPE] [--null NULL]
+                    segy-file npy-file inline-number
+
+Positional arguments:
+  segy-file      Path to an existing SEG Y file of 3D seismic data
+  npy-file       Path to the Numpy array file to be created for the timeslice
+  slice-index    Zero based index of the time slice to be extracted
+
+Optional arguments:
+  -h, --help     show this help message and exit
+  --dtype DTYPE  Numpy data type. If not provided a dtype compatible with the
+                 SEG Y data will be used.
+  --null NULL    Sample value to use for missing or short traces.
+
+Example:
+
+  inline.py stack_final_int8.sgy slice_800.npy 800 --null=42.0 --dtype=f
+"""
+
+import argparse
+import os
+import sys
+import traceback
+
+import numpy as np
+
+from numpy import s_
+
+from segpy.reader import create_reader
+from segpy_numpy.dtypes import make_dtype
+from segpy_numpy.extract import extract_inline_3d
+
+
+class DimensionalityError(Exception):
+    pass
+
+
+def extract_inline(segy_filename, out_filename, inline_number, null=None):
+    """Extract a timeslice from a 3D SEG Y file to a Numpy NPY file.
+
+    Args:
+        segy_filename: Filename of a SEG Y file.
+
+        out_filename: Filename of the NPY file.
+
+        inline_index: The zero-based index (increasing with depth) of the slice to be extracted.
+
+        null: Optional sample value to use for missing or short traces. Defaults to zero.
+    """
+    with open(segy_filename, 'rb') as segy_file:
+
+        segy_reader = create_reader(segy_file)
+        inline_array = extract_inline_3d(segy_reader, inline_number, null=null)
+        return inline_array
+
+
+def nullable_float(s):
+    return None if not bool(s) else float(s)
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("segy_file", metavar="segy-file",
+                        help="Path to an existing SEG Y file of 3D seismic data")
+
+    parser.add_argument("npy_file", metavar="npy-file",
+                        help="Path to the Numpy array file to be created for the timeslice")
+
+    parser.add_argument("inline_number", metavar="inline-number", type=int,
+                        help="Zero based index of the inline to be extracted", )
+
+    parser.add_argument("--null",  type=nullable_float, default="",
+                        help="Sample value to use for missing or short traces.")
+
+    if argv is None:
+        argv = sys.argv[1:]
+
+    args = parser.parse_args(argv)
+
+    try:
+        extract_inline(
+            segy_filename=args.segy_file,
+            out_filename=args.npy_file,
+            inline_number=args.inline_number,
+            null=args.null)
+    except (FileNotFoundError, IsADirectoryError) as e:
+        print(e, file=sys.stderr)
+        return os.EX_NOINPUT
+    except PermissionError as e:
+        print(e, file=sys.stderr)
+        return os.EX_NOPERM
+    except Exception as e:
+        traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
+        return os.EX_SOFTWARE
+    return os.EX_OK
+
+
+if __name__ == '__main__':
+    sys.exit(main())
+
@@ -9,7 +9,7 @@ NUMPY_DTYPES = {'ibm':     numpy.dtype('f4'),
                'int8':    numpy.dtype('i1')}


-def make_dtype(data_sample_format):
+def make_dtype(data_sample_format): # TODO: What is the correct name for this arg?
    """Convert a SEG Y data sample format to a compatible numpy dtype.

    Note :
@@ -0,0 +1,402 @@
+"""Tools for interoperability between Segpy and Numpy arrays."""
+from collections import namedtuple
+import numpy as np
+from segpy.header import Header, SubFormatMeta
+from segpy.packer import make_header_packer
+
+from segpy.util import ensure_superset
+from segpy_numpy.dtypes import make_dtype
+
+
+def extract_trace_header_field_3d(reader_3d, fields, inline_numbers=None, xline_numbers=None, null=None):
+    """Extract a single trace header field from all trace headers as an array.
+
+    Args:
+        reader_3d: A SegYReader3D
+
+        fields: A an iterable series where each item is either the name of a field as a string
+            or an object such as a NamedField with a 'name' attribute which in turn is the name
+            of a field as a string, such as a NamedField.
+
+        inline_numbers: The inline numbers for which traces are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All traces within the each crossline will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those traces at
+                inline numbers corresponding to the items in the sequence will be extracted. The
+                traces will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example inline_numbers=range(100, 200, 2) will extract alternate
+                traces from inline number 100 to inline number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                inline numbers. For example inline_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred traces, irrespective of their numbers.
+
+        xline_numbers: The crossline numbers at which traces are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All traces at within each inline will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those traces at
+                crossline numbers corresponding to the items in the sequence will be extracted. The
+                traces will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example xline_numbers=range(100, 200, 2) will extract alternate
+                traces from crossline number 100 to crossline number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                crossline numbers. For example xline_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred traces, irrespective of their numbers.
+
+        null: An optional null value for missing traces. The null value must be convertible
+            to all field value types.
+
+    Returns:
+        An namedtuple object with attributes which are two-dimensional Numpy arrays.
+        If a null value was specified the arrays will be ndarrays, otherwise they
+        will be masked arrays.  The attributes of the named tuple are in the same
+        order as the fields specified in the `fields` argument.
+
+    Raises:
+        AttributeError: If the the named fields do not exist in the trace header definition.
+    """
+    field_names = [_extract_field_name(field) for field in fields]
+
+    inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers)
+    xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers)
+    shape = (len(inline_numbers), len(xline_numbers))
+
+    class SubHeader(metaclass=SubFormatMeta,
+                    parent_format=reader_3d.trace_header_format_class,
+                    parent_field_names=field_names):
+        pass
+
+    sub_header_packer = make_header_packer(SubHeader, reader_3d.endian)
+    TraceHeaderArrays = namedtuple('TraceHeaderArrays', field_names)
+
+    arrays = (_make_array(shape,
+                          make_dtype(getattr(SubHeader, field_name).value_type.SEG_Y_TYPE),
+                          null)
+              for field_name in field_names)
+
+    trace_header_arrays = TraceHeaderArrays(*arrays)
+
+    for inline_index, inline_number in enumerate(inline_numbers):
+        for xline_index, xline_number in enumerate(xline_numbers):
+            inline_xline_number = (inline_number, xline_number)
+            if reader_3d.has_trace_index(inline_xline_number):
+                trace_index = reader_3d.trace_index((inline_number, xline_number))
+                trace_header = reader_3d.trace_header(trace_index, sub_header_packer)
+
+                for field_name, a in zip(field_names, trace_header_arrays):
+                    field_value = getattr(trace_header, field_name)
+                    a[inline_index, xline_index] = field_value
+
+    return trace_header_arrays
+
+
+def extract_trace(reader, trace_index, sample_numbers):
+    """Extract an single trace as a one-dimensional array.
+
+    Args:
+        reader: A SegYReader3D object.
+
+        trace_index: The index of the trace to be extracted.
+
+        sample_numbers: The sample numbers within each trace at which samples are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All samples within the trace will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those samples at
+                sample numbers corresponding to the items in the sequence will be extracted. The
+                samples will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example sample_numbers=range(100, 200, 2) will extract alternate
+                samples from sample number 100 to sample number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                sample numbers. For example sample_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred samples, irrespective of their numbers.
+
+    Returns:
+        A one-dimensional array.
+    """
+    if not reader.has_trace_index(trace_index):
+        raise ValueError("Inline number {} not present in {}".format(trace_index, reader))
+
+    sample_numbers = ensure_superset(range(0, reader.max_num_trace_samples()), sample_numbers)
+
+    trace_sample_start = sample_numbers[0]
+    trace_sample_stop = min(sample_numbers[-1] + 1, reader.num_trace_samples(trace_index))
+    trace_samples = reader.trace_samples(trace_index, trace_sample_start, trace_sample_stop)
+    arr = np.fromiter((trace_samples[sample_number - trace_sample_start] for sample_number in sample_numbers),
+                      make_dtype(reader.data_sample_format))
+    return arr
+
+
+def extract_inline_3d(reader_3d, inline_number, xline_numbers=None, sample_numbers=None, null=None):
+    """Extract an inline as a two-dimensional array.
+
+    Args:
+        reader: A SegYReader3D object.
+
+        inline_number: The number of the inline to be extracted.
+
+        xline_numbers: The crossline numbers within the inline at which traces are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All traces within the inline will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those traces at
+                crossline numbers corresponding to the items in the sequence will be extracted. The
+                traces will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example xline_numbers=range(100, 200, 2) will extract alternate
+                traces from crossline number 100 to crossline number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                crossline numbers. For example xline_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred traces, irrespective of their numbers.
+
+        sample_numbers: The sample numbers within each trace at which samples are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All samples within the trace will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those samples at
+                sample numbers corresponding to the items in the sequence will be extracted. The
+                samples will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example sample_numbers=range(100, 200, 2) will extract alternate
+                samples from sample number 100 to sample number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                sample numbers. For example sample_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred samples, irrespective of their numbers.
+
+        null: A null value. When None is specified as the null value a masked array will be returned.
+
+    Returns:
+        A two-dimensional array. If null is None a masked array will be returned, otherwise
+        a regular array will be returned. The first (slowest changing) index will correspond
+        to the traces (index zero will correspond to the first crossline number). The
+        second (fastest changing) index will correspond to the samples (index zero will
+        correspond to the first sample number).
+    """
+    if inline_number not in reader_3d.inline_numbers():
+        raise ValueError("Inline number {} not present in {}".format(inline_number, reader_3d))
+
+    xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers)
+    sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers)
+    shape = (len(xline_numbers), len(sample_numbers))
+    dtype = make_dtype(reader_3d.data_sample_format)
+    array = _make_array(shape, dtype, null)
+
+    if isinstance(sample_numbers, range):
+        _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array)
+    else:
+        _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array)
+
+    return array
+
+
+def _populate_inline_array_numbered_samples(reader_3d, inline_number, xline_numbers, sample_numbers, array):
+    for xline_index, xline_number in enumerate(xline_numbers):
+        inline_xline_number = (inline_number, xline_number)
+        if reader_3d.has_trace_index(inline_xline_number):
+            trace_index = reader_3d.trace_index(inline_xline_number)
+            num_trace_samples = reader_3d.num_trace_samples(trace_index)
+            trace_sample_start = sample_numbers[0]
+            trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples)
+            trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop)
+            for sample_index, sample_number in enumerate(sample_numbers):
+                array[xline_index, sample_index] = trace_samples[sample_number - trace_sample_start]
+
+
+def _populate_inline_array_over_sample_range(reader_3d, inline_number, xline_numbers, sample_numbers, array):
+    for xline_index, xline_number in enumerate(xline_numbers):
+        inline_xline_number = (inline_number, xline_number)
+        if reader_3d.has_trace_index(inline_xline_number):
+            trace_index = reader_3d.trace_index(inline_xline_number)
+            num_trace_samples = reader_3d.num_trace_samples(trace_index)
+            trace_sample_stop = min(sample_numbers.stop, num_trace_samples)
+            trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop)
+            source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step)
+            array[xline_index, :] = trace_samples[source_slice]
+
+
+def extract_xline_3d(reader_3d, xline_number, inline_numbers=None, sample_numbers=None, null=None):
+    """Extract an inline as a two-dimensional array.
+
+    Args:
+        reader: A SegYReader3D object.
+
+        xline_number: The number of the xline to be extracted.
+
+        inline_numbers: The inline numbers within the crossline at which traces are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All traces within the crossline will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those traces at
+                inline numbers corresponding to the items in the sequence will be extracted. The
+                traces will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example inline_numbers=range(100, 200, 2) will extract alternate
+                traces from inline number 100 to inline number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                inline numbers. For example inline_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred traces, irrespective of their numbers.
+
+        sample_numbers: The sample numbers within each trace at which samples are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All samples within the trace will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those samples at
+                sample numbers corresponding to the items in the sequence will be extracted. The
+                samples will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example sample_numbers=range(100, 200, 2) will extract alternate
+                samples from sample number 100 to sample number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                sample numbers. For example sample_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred samples, irrespective of their numbers.
+
+        null: A null value. When None is specified as the null value a masked array will be returned.
+
+    Returns:
+        A two-dimensional array. If null is None a masked array will be returned, otherwise
+        a regular array will be returned. The first (slowest changing) index will correspond
+        to the traces (index zero will correspond to the first inline number). The
+        second (fastest changing) index will correspond to the samples (index zero will
+        correspond to the first sample number).
+    """
+    if xline_number not in reader_3d.xline_numbers():
+        raise ValueError("Crossline number {} not present in {}".format(xline_number, reader_3d))
+
+    inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers)
+    sample_numbers = ensure_superset(range(0, reader_3d.max_num_trace_samples()), sample_numbers)
+    shape = (len(inline_numbers), len(sample_numbers))
+    dtype = make_dtype(reader_3d.data_sample_format)
+    array = _make_array(shape, dtype, null)
+
+    if isinstance(sample_numbers, range):
+        _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array)
+    else:
+        _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array)
+
+    return array
+
+
+def _populate_xline_array_numbered_samples(reader_3d, xline_number, inline_numbers, sample_numbers, array):
+    for inline_index, inline_number in enumerate(inline_numbers):
+        inline_xline_number = (inline_number, xline_number)
+        if reader_3d.has_trace_index(inline_xline_number):
+            trace_index = reader_3d.trace_index(inline_xline_number)
+            num_trace_samples = reader_3d.num_trace_samples(trace_index)
+            trace_sample_start = sample_numbers[0]
+            trace_sample_stop = min(sample_numbers[-1] + 1, num_trace_samples)
+            trace_samples = reader_3d.trace_samples(trace_index, trace_sample_start, trace_sample_stop)
+            for sample_index, sample_number in enumerate(sample_numbers):
+                array[inline_index, sample_index] = trace_samples[sample_number - trace_sample_start]
+
+
+def _populate_xline_array_over_sample_range(reader_3d, xline_number, inline_numbers, sample_numbers, array):
+    for inline_index, inline_number in enumerate(inline_numbers):
+        inline_xline_number = (inline_number, xline_number)
+        if reader_3d.has_trace_index(inline_xline_number):
+            trace_index = reader_3d.trace_index(inline_xline_number)
+            num_trace_samples = reader_3d.num_trace_samples(trace_index)
+            trace_sample_stop = min(sample_numbers.stop, num_trace_samples)
+            trace_samples = reader_3d.trace_samples(trace_index, sample_numbers.start, trace_sample_stop)
+            source_slice = slice(sample_numbers.start, trace_sample_stop, sample_numbers.step)
+            array[inline_index, :] = trace_samples[source_slice]
+
+
+def extract_timeslice_3d(reader_3d, sample_number, inline_numbers=None, xline_numbers=None, null=None):
+    """Extract a single timeslice header field from all trace headers as an array.
+
+    Args:
+        reader_3d: A SegYReader3D
+
+        sample_number: The zero-based sample index.
+
+        inline_numbers: The inline numbers for which traces are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All traces within the each crossline will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those traces at
+                inline numbers corresponding to the items in the sequence will be extracted. The
+                traces will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example inline_numbers=range(100, 200, 2) will extract alternate
+                traces from inline number 100 to inline number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                inline numbers. For example inline_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred traces, irrespective of their numbers.
+
+        xline_numbers: The crossline numbers at which traces are to be extracted.
+            This argument can be specified in three ways:
+
+            None (the default) - All traces at within each inline will be be extracted.
+
+            sequence - When a sequence, such as a range or a list is provided only those traces at
+                crossline numbers corresponding to the items in the sequence will be extracted. The
+                traces will always be extracted in increasing numeric order and duplicate entries
+                will be ignored.  For example xline_numbers=range(100, 200, 2) will extract alternate
+                traces from crossline number 100 to crossline number 198 inclusive.
+
+            slice - When a slice object is provided the slice will be applied to the sequence of all
+                crossline numbers. For example xline_numbers=slice(100, -100) will omit the first
+                one hundred and the last one hundred traces, irrespective of their numbers.
+
+        null: An optional null value for missing traces. The null value must be convertible
+            to all field value types.
+
+    Returns:
+        An namedtuple object with attributes which are two-dimensional Numpy arrays.
+        If a null value was specified the arrays will be ndarrays, otherwise they
+        will be masked arrays.  The attributes of the named tuple are in the same
+        order as the fields specified in the `fields` argument.
+
+    Raises:
+        AttributeError: If the the named fields do not exist in the trace header definition.
+    """
+
+    inline_numbers = ensure_superset(reader_3d.inline_numbers(), inline_numbers)
+    xline_numbers = ensure_superset(reader_3d.xline_numbers(), xline_numbers)
+    shape = (len(inline_numbers), len(xline_numbers))
+    dtype = make_dtype(reader_3d.data_sample_format)
+    array = _make_array(shape, dtype, null)
+    sample_number_stop = sample_number + 1
+
+    for inline_index, inline_number in enumerate(inline_numbers):
+        for xline_index, xline_number in enumerate(xline_numbers):
+            inline_xline_number = (inline_number, xline_number)
+            if reader_3d.has_trace_index(inline_xline_number):
+                trace_index = reader_3d.trace_index((inline_number, xline_number))
+                trace_samples = reader_3d.trace_samples(trace_index, sample_number, sample_number_stop)
+                array[inline_index, xline_index] = trace_samples[0]
+    return array
+
+def _make_array(shape, dtype, null=None):
+    """Make an array"""
+    if null is None:
+        return np.ma.masked_all(shape, dtype)
+    array = np.empty(shape, dtype)
+    array.fill(null)
+    return array
+
+def _extract_field_name(field):
+    """Args:
+        field: If field in an object with a name attribute the name is returned. If field is a string it is returned
+            unmodified.
+      Raises:
+        TypeError:
+    """
+    if isinstance(field, str):
+        return field
+    try:
+        return field.name
+    except AttributeError:
+        raise TypeError("{!r} neither is a string nor has a 'name' attribute".format(field))
@@ -0,0 +1 @@
+
@@ -0,0 +1,10 @@
+import unittest
+
+
+class MyTestCase(unittest.TestCase):
+    def test_something(self):
+        self.assertEqual(True, False)
+
+
+if __name__ == '__main__':
+    unittest.main()
@@ -511,7 +511,8 @@ class LinearRegularCatalog(Mapping):
            key_max: The maximum key.
            key_stride: The difference between successive keys.
            value_start: The value corresponding to the minimum key.
-            value_max: The value corresponding to the maximum key.
+            value_stop: The value corresponding to the maximum key.
+            value_stride:

        Raises:
            ValueError: There is any inconsistency in the keys, strides,
@@ -4,7 +4,7 @@ from itertools import chain

 from segpy import __version__
 from segpy.docstring import docstring_property
-from segpy.util import underscores_to_camelcase, first_sentence, super_class
+from segpy.util import underscores_to_camelcase, first_sentence, super_class, collect_attributes


 class Header:
@@ -96,7 +96,7 @@ class FormatMeta(type):
    """

    @classmethod
-    def __prepare__(mcs, name, bases):
+    def __prepare__(mcs, name, bases, *args, **kwargs):
        return OrderedDict()

    def __new__(mcs, name, bases, namespace):
@@ -134,6 +134,63 @@ class FormatMeta(type):
        return super().__new__(mcs, name, bases, namespace)


+def is_public_non_field_attr(name, attr):
+    return (not name.startswith('_')) and (not isinstance(attr, HeaderFieldDescriptor) and (not isinstance(attr, classmethod)))
+
+
+class SubFormatMeta(FormatMeta):
+    """A metaclass for a format class which has a subset of the fields in an existing format class.
+
+    SubFormat classes can be used to reduce storage requirements and increase performance, since they can be
+    used to generate simpler HeaderPackers.
+
+    SubFormat classes must be declared as:
+
+    class MySubFormat(metaclass=SubFormatMeta,
+                     format_class=MyFormatClass,
+                     field_names=['first_field_name',
+                                  'second_field_name'])
+        pass
+    """
+
+    def __new__(mcs, name, bases, namespace, parent_format, parent_field_names):
+        """
+        Args:
+            name: The name of the actual class being created by this metaclass.
+            bases: The base classes of the actual class.
+            parent_format: An existing Format (?Header) on which (sort out terminology here)
+                of which this format has a subset of fields.
+            field_names: An iterable series of field names which this format should
+                duplicate from the parent_format.
+        """
+
+        # TODO: We need an inheritance aware getattr for class attributes!
+        # Copy the requested fields, by creating a new descriptor based
+        # on information retrieved from the existing descriptor
+
+        for field_name in parent_field_names:
+            named_field = getattr(parent_format, field_name)
+            assert named_field.name == field_name
+            field_copy = field(named_field.value_type,
+                               named_field.offset,
+                               named_field.default,
+                               named_field.documentation)
+            namespace[field_name] = field_copy
+
+        # Copy other non-field class attributes
+        non_field_attributes = list(collect_attributes(parent_format, Header, is_public_non_field_attr))
+        namespace.update((name, value) for _, name, value in non_field_attributes)
+
+        # Add a reference back to the original format
+        namespace['_parent_format'] = parent_format
+
+        return super().__new__(mcs, name, bases, namespace)
+
+    def __init__(mcs, name, bases, namespace, parent_format, parent_field_names):
+        # Absorb the additional arguments
+        super().__init__(name, bases, namespace)
+
+
 class NamedField:
    """Instances of NamedField can be detected by the NamedDescriptorResolver metaclass."""

@@ -292,6 +292,7 @@ class SegYReader(object):
        self._revision = extract_revision(self._binary_reel_header)
        self._bytes_per_sample = bytes_per_sample(
            self._binary_reel_header, self.revision)
+        self._max_num_trace_samples = None

    def __getstate__(self):
        """Copy the reader's state to a pickleable dictionary.
@@ -311,6 +312,8 @@ class SegYReader(object):
        file_pos = self._fh.tell()
        file_mode = self._fh.mode

+        _ = self.max_num_trace_samples()
+
        state = self.__dict__.copy()
        state['__version__'] = __version__
        state['_file_name'] = filename
@@ -363,7 +366,9 @@ class SegYReader(object):

    def max_num_trace_samples(self):
        """The number of samples in the trace_samples with the most samples."""
-        return max(self._trace_length_catalog.values())
+        if self._max_num_trace_samples is None:
+            self._max_num_trace_samples = max(self._trace_length_catalog.values())
+        return self._max_num_trace_samples

    def num_trace_samples(self, trace_index):
        """The number of samples in the specified trace_samples."""
@@ -416,12 +421,15 @@ class SegYReader(object):
            self._fh, start_pos, seg_y_type, num_samples_to_read, self._endian)
        return trace_values

-    def trace_header(self, trace_index):
+    def trace_header(self, trace_index, header_packer_override=None):
        """Read a specific trace_samples.

        Args:
            trace_index: An integer in the range zero to num_traces() - 1

+            header_packer_override: Override the default header packer (for example
+               to more efficiently extract only a few fields)
+
        Returns:
            A TraceHeader corresponding to the requested trace_samples.

@@ -431,10 +439,19 @@ class SegYReader(object):
        """
        if not (0 <= trace_index < self.num_traces()):
            raise ValueError("Trace index {} out of range".format(trace_index))
+        header_packer = self._trace_header_packer if header_packer_override is None else header_packer_override
        pos = self._trace_offset_catalog[trace_index]
-        trace_header = read_trace_header(self._fh, self._trace_header_packer, pos)
+        trace_header = read_trace_header(self._fh, header_packer, pos)
        return trace_header

+    @property
+    def trace_header_format_class(self):
+        """The trace header format class.
+
+        Instances of this class are what is returned from trace_header() unless the
+        header_packer has been overridden."""
+        return self._trace_header_packer.header_format_class
+
    @property
    def dimensionality(self):
        """The spatial dimensionality of the data.
@@ -6,8 +6,12 @@ from itertools import chain

 class SortedFrozenSet(Sequence, Set):

-    def __init__(self, items=None):
-        self._items = sorted(set(items)) if items is not None else []
+    def __new__(cls, items=None):
+        if type(items) == cls:
+            return items
+        obj = object.__new__(cls)
+        obj._items = sorted(set(items)) if items is not None else []
+        return obj

    def __contains__(self, item):
        try:
@@ -347,6 +347,8 @@ def catalog_traces(fh, bps, trace_header_format=TraceHeaderRev1, endian='>', pro
    alt_line_catalog_builder = CatalogBuilder()
    cdp_catalog_builder = CatalogBuilder()

+    # TODO: Use a SubHeaderFormat to only load the fields we're interestedin
+
    for trace_number in count():
        progress_callback(_READ_PROPORTION * pos_begin / length)
        fh.seek(pos_begin)
@@ -3,7 +3,9 @@ import time
 import os
 import sys

+from collections.abc import Set
 from itertools import (islice, cycle, tee, chain, repeat)
+
 from segpy.sorted_set import SortedFrozenSet

 UNKNOWN_FILENAME = '<unknown>'
@@ -346,6 +348,11 @@ def make_sorted_distinct_sequence(iterable):
        An immutable collection which supports the Sized, Iterable,
        Container and Sequence protocols.
    """
+    if isinstance(iterable, range):
+        if iterable.step > 0:
+            return iterable
+        else:
+            return reversed(iterable)
    sorted_set = SortedFrozenSet(iterable)
    if len(sorted_set) == 1:
        return single_item_range(sorted_set[0])
@@ -386,3 +393,85 @@ def hash_for_file(fh, *args):
        sha1.update(encoded_arg)
    digest = sha1.hexdigest()
    return digest
+
+
+def is_range_superset_of_range(superset_range, subset_range):
+    """Are all the elements of
+
+    """
+    if subset_range.start not in superset_range:
+        return False
+    if subset_range.step % superset_range.step != 0:
+        return False
+    if subset_range[-1] > superset_range[-1]:
+        return False
+    assert set(subset_range).issubset(set(superset_range))
+    return True
+
+
+def is_superset(superset, subset):
+    """A more general version of set.issuperset that is smart enough to work with ranges."""
+    if isinstance(subset, range) and isinstance(superset, range):
+        return is_range_superset_of_range(superset, subset)
+    if isinstance(superset, range):
+        return all(item in superset for item in subset)
+    if isinstance(superset, set):
+        return superset.issuperset(subset)
+    if isinstance(subset, set):
+        return subset.issubset(superset)
+    return set(superset).issuperset(subset)
+
+
+def ensure_superset(superset, subset):
+    """Ensure that one collection is a subset of another.
+
+    Args:
+        all_items: A sequence containing all items.
+
+        subset: Subset must either be a collection the elements of which are a subset of
+            all_items, or a slice object, in which case the subset items will be sliced
+            from all_items.
+    Returns:
+        A sorted, distinct collection which is a subset of all_items.
+
+    Raises:
+        ValueError: If the items in subset are not a subset of the items in all_items.
+    """
+    if subset is None:
+        return superset
+    elif isinstance(subset, slice):
+        return superset[subset]
+    else:
+        subset = make_sorted_distinct_sequence(subset)
+        if not is_superset(superset, subset):
+            raise ValueError("subset_or_slice {!r} is not a subset of all_items {!r}"
+                             .format(subset, superset))
+        return subset
+
+def true(*args, **kwargs):
+    return True
+
+
+def collect_attributes(derived_class, base_class=object, predicate=None):
+    """
+
+    Args:
+        derived_class: The class at which to start searching.
+        base_class: The class at which to stop searching
+        predicate: A predicate which accepts
+
+    Returns:
+        A generator of items containing the (class, attribute_name)
+    """
+    # TODO: Consider using the inspect module to do this
+    if predicate is None:
+        predicate = true
+
+    for cls in derived_class.__mro__:
+        for key, value in vars(cls).items():
+            if predicate(key, value):
+                yield cls, key, value
+        if cls is base_class:
+            break
+
+