From a50c947a9555a3430b5c722a201b8dcade595bdb Mon Sep 17 00:00:00 2001 From: Robert Smallshire Date: Wed, 6 May 2015 16:27:45 +0200 Subject: [PATCH] Removes cdp_range(), inline_range() and xline_range() methods which could have misleading results. Introduces cdp_numbers(), inline_numbers() and xline_numbers() which are always accurate. --- segpy/catalog.py | 3 +- segpy/reader.py | 96 ++++------ segpy/sorted_set.py | 71 ++++++++ segpy/util.py | 24 ++- test/test_sorted_set.py | 378 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 512 insertions(+), 60 deletions(-) create mode 100644 segpy/sorted_set.py create mode 100644 test/test_sorted_set.py diff --git a/segpy/catalog.py b/segpy/catalog.py index a5e7719..eb7d8ef 100644 --- a/segpy/catalog.py +++ b/segpy/catalog.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABCMeta from collections import Mapping, Sequence, OrderedDict from fractions import Fraction import reprlib +from segpy.sorted_set import SortedFrozenSet from segpy.util import contains_duplicates, measure_stride, minmax @@ -399,7 +400,7 @@ class ConstantCatalog(Catalog): value: A value associated with all keys. """ super(ConstantCatalog, self).__init__(value_min=value, value_max=value) - self._items = frozenset(keys) + self._items = SortedFrozenSet(keys) def __getitem__(self, key): if key not in self: diff --git a/segpy/reader.py b/segpy/reader.py index 3b0b58a..074e9f4 100644 --- a/segpy/reader.py +++ b/segpy/reader.py @@ -1,9 +1,10 @@ from __future__ import print_function from segpy.encoding import ASCII from segpy.packer import make_header_packer +from segpy.sorted_set import SortedFrozenSet from segpy.trace_header import TraceHeaderRev1 -from segpy.util import file_length, filename_from_handle +from segpy.util import file_length, filename_from_handle, make_sorted_distinct_sequence from segpy.datatypes import DATA_SAMPLE_FORMAT_TO_SEG_Y_TYPE, SEG_Y_TYPE_DESCRIPTION, SEG_Y_TYPE_TO_CTYPE, size_in_bytes from segpy.toolkit import (extract_revision, bytes_per_sample, @@ -413,67 +414,50 @@ class SegYReader3D(SegYReader): trace_offset_catalog, trace_length_catalog, trace_header_format, encoding, endian) self._line_catalog = line_catalog - self._num_inlines = None - self._num_xlines = None + self._inline_numbers = None + self._xline_numbers = None def _dimensionality(self): return 3 - def inline_range(self): - """A range encompassing inline numbers. + def inline_numbers(self): + """A sorted immutable collection of inline numbers. - The number of inlines within this range can be found with len(reader.inline_range()). + Test for membership in this collection to determine if a particular inline + exists or iterate over this collection to generate all inline numbers in + order. Returns: - A range() object with start set to the first inline number and stop set to - one beyond the last inline number. The range always has a step of one, although - this should not be taken as meaning that any intermediate inline number generated - by the range is valid. + A sorted immutable collection of inline numbers which supports the + Sized, Iterable, Container and Sequence protocols. """ - start = self._line_catalog.key_min()[0] - stop = self._line_catalog.key_max()[0] + 1 - return range(start, stop) + if self._inline_numbers is None: + self._inline_numbers = make_sorted_distinct_sequence(i for i, j in self._line_catalog) + return self._inline_numbers def num_inlines(self): - """The number of distinct inlines in the survey. + """The number of distinct inlines in the survey.""" + return len(self.inline_numbers()) - This number is not necessarily the same as the value returned by - len(reader.inline_range()) as there may be missing inlines within the range. - """ - if self._num_inlines is None: - try: - self._num_inlines = self._line_catalog.i_max - self._line_catalog.i_min + 1 - except AttributeError: - self._num_inlines = len(set(i for i, j in self._line_catalog)) - return self._num_inlines + def xline_numbers(self): + """A sorted immutable collection of crossline numbers. - def xline_range(self): - """A range encompassing crossline numbers. - - The number of crosslines within this range can be found with len(reader.crossline_range()). + Test for membership in this collection to determine if a particular crossline + exists or iterate over this collection to generate all crossline numbers in + order. Returns: - A range() object with start set to the first crossline number and stop set to - one beyond the last crossline number. The range always has a step of one, although - this should not be taken as meaning that any intermediate crossline number generated - by the range is valid. + A sorted immutable collection of crossline numbers which supports the + Sized, Iterable, Container and Sequence protocols. """ - start = self._line_catalog.key_min()[1] - stop = self._line_catalog.key_max()[1] + 1 - return range(start, stop) + if self._xline_numbers is None: + self._xline_numbers = make_sorted_distinct_sequence(j for i, j in self._line_catalog) + return self._xline_numbers def num_xlines(self): - """The number of distinct crosslines in the survey. + """The number of distinct crosslines in the survey.""" + return len(self.xline_numbers()) - This number is not necessarily the same as the value returned by - len(reader.xline_range()) as there may be missing crosslines within the range. - """ - if self._num_xlines is None: - try: - self._num_xlines = self._line_catalog.j_max - self._line_catalog.j_min + 1 - except AttributeError: - self._num_xlines = len(set(j for i, j in self._line_catalog)) - return self._num_xlines def inline_xline_numbers(self): """An iterator over all (inline_number, xline_number) tuples @@ -557,29 +541,25 @@ class SegYReader2D(SegYReader): trace_offset_catalog, trace_length_catalog, trace_header_format, encoding, endian) self._cdp_catalog = cdp_catalog + self._cdp_numbers = None def _dimensionality(self): return 2 def cdp_numbers(self): - """An iterator over all cdp numbers corresponding to traces. - """ - return iter(self._cdp_catalog) + """A sorted immutable collection of CDP numbers. - def cdp_range(self): - """A range encompassing CDP numbers. - - The number of CDPs within this range can be found with len(reader.cdp_range()). + Test for membership in this collection to determine if a particular CDP + exists or iterate over this collection to generate all CDP numbers in + order. Returns: - A range() object with start set to the first CDP number and stop set to - one beyond the last CDP number. The range always has a step of one, although - this should not be taken as meaning that any intermediate CDP number generated - by the range is valid. + A sorted immutable collection of CDP numbers which supports the + Sized, Iterable, Container and Sequence protocols. """ - start = self._cdp_catalog.value_min() - stop = self._cdp_catalog.value_max() + 1 - return range(start, stop) + if self._cdp_numbers is None: + self._cdp_numbers = make_sorted_distinct_sequence(self._cdp_catalog.keys()) + return self._cdp_numbers def num_cdps(self): """The number of distinct CDPs. diff --git a/segpy/sorted_set.py b/segpy/sorted_set.py new file mode 100644 index 0000000..507ae93 --- /dev/null +++ b/segpy/sorted_set.py @@ -0,0 +1,71 @@ + +from bisect import bisect_left +from collections.abc import Sequence, Set +from itertools import chain + + +class SortedFrozenSet(Sequence, Set): + + def __init__(self, items=None): + self._items = sorted(set(items)) if items is not None else [] + + def __contains__(self, item): + try: + self.index(item) + return True + except ValueError: + return False + + def __len__(self): + return len(self._items) + + def __iter__(self): + return iter(self._items) + + def __getitem__(self, index): + result = self._items[index] + return SortedFrozenSet(result) if isinstance(index, slice) else result + + def __repr__(self): + return "SortedFrozenSet({})".format(repr(self._items) if self._items else '') + + def __eq__(self, rhs): + if not isinstance(rhs, SortedFrozenSet): + return False + return self._items == rhs._items + + def index(self, item): + index = bisect_left(self._items, item) + if (index != len(self._items)) and self._items[index] == item: + return index + raise ValueError("{} not found".format(repr(item))) + + def count(self, item): + return int(item in self._items) + + def __add__(self, rhs): + return SortedFrozenSet(chain(self._items, rhs._items)) + + def __mul__(self, rhs): + return SortedFrozenSet(self) if rhs > 0 else SortedFrozenSet() + + def __rmul__(self, lhs): + return self * lhs + + def issubset(self, iterable): + return self <= SortedFrozenSet(iterable) + + def issuperset(self, iterable): + return self >= SortedFrozenSet(iterable) + + def intersection(self, iterable): + return self & SortedFrozenSet(iterable) + + def union(self, iterable): + return self | SortedFrozenSet(iterable) + + def symmetric_difference(self, iterable): + return self ^ SortedFrozenSet(iterable) + + def difference(self, iterable): + return self - SortedFrozenSet(iterable) diff --git a/segpy/util.py b/segpy/util.py index 28e0dec..f481d95 100644 --- a/segpy/util.py +++ b/segpy/util.py @@ -4,6 +4,7 @@ import os import sys from itertools import (islice, cycle, tee, chain, repeat) +from segpy.sorted_set import SortedFrozenSet NATIVE_ENDIANNESS = '<' if sys.byteorder == 'little' else '>' @@ -325,4 +326,25 @@ def flatten(sequence_of_sequences): def four_bytes(byte_str): a, b, c, d = byte_str[:4] - return a, b, c, d \ No newline at end of file + return a, b, c, d + + +def make_sorted_distinct_sequence(iterable): + """Create a sorted immutable sequence from an iterable series. + + Args: + iterable: An iterable series of comparable values. + + Returns: + An immutable collection which supports the Sized, Iterable, + Container and Sequence protocols. + """ + sorted_set = SortedFrozenSet(iterable) + if len(sorted_set) == 1: + return sorted_set + stride = measure_stride(sorted_set) + if stride is not None: + start = sorted_set[0] + stop = sorted_set[-1] + stride + return range(start, stop, stride) + return sorted_set \ No newline at end of file diff --git a/test/test_sorted_set.py b/test/test_sorted_set.py new file mode 100644 index 0000000..1fbd7be --- /dev/null +++ b/test/test_sorted_set.py @@ -0,0 +1,378 @@ +import unittest +from collections.abc import (Container, Sized, Iterable, Sequence) + +from segpy.sorted_set import SortedFrozenSet + + +class TestConstruction(unittest.TestCase): + + def test_empty(self): + s = SortedFrozenSet() + + def test_from_sequence(self): + s = SortedFrozenSet([7, 8, 3, 1]) + + def test_with_duplicates(self): + s = SortedFrozenSet([8, 8, 8]) + + def test_from_iterable(self): + def gen6842(): + yield 6 + yield 8 + yield 4 + yield 2 + g = gen6842() + s = SortedFrozenSet(g) + + def test_default_empty(self): + s = SortedFrozenSet() + + + +class TestContainerProtocol(unittest.TestCase): + + def setUp(self): + self.s = SortedFrozenSet([6, 7, 3, 9]) + + def test_positive_contained(self): + self.assertTrue(6 in self.s) + + def test_negative_contained(self): + self.assertFalse(2 in self.s) + + def test_positive_not_contained(self): + self.assertTrue(5 not in self.s) + + def test_negative_not_contained(self): + self.assertFalse(9 not in self.s) + + def test_sequence_protocol(self): + self.assertTrue(issubclass(SortedFrozenSet, Container)) + + +class TestSizedProtocol(unittest.TestCase): + + def test_empty(self): + s = SortedFrozenSet() + self.assertEqual(len(s), 0) + + def test_one(self): + s = SortedFrozenSet([42]) + self.assertEqual(len(s), 1) + + def test_ten(self): + s = SortedFrozenSet(range(10)) + self.assertEqual(len(s), 10) + + def test_with_duplicates(self): + s = SortedFrozenSet([5, 5, 5]) + self.assertEqual(len(s), 1) + + def test_protocol(self): + self.assertTrue(issubclass(SortedFrozenSet, Sized)) + + +class TestIterableProtocol(unittest.TestCase): + + def setUp(self): + self.s = SortedFrozenSet([7, 2, 1, 1, 9]) + + def test_iter(self): + i = iter(self.s) + self.assertEqual(next(i), 1) + self.assertEqual(next(i), 2) + self.assertEqual(next(i), 7) + self.assertEqual(next(i), 9) + self.assertRaises(StopIteration, lambda: next(i)) + + def test_for_loop(self): + index = 0 + expected = [1, 2, 7, 9] + for item in self.s: + self.assertEqual(item, expected[index]) + index += 1 + + def test_protocol(self): + self.assertTrue(issubclass(SortedFrozenSet, Iterable)) + +class TestSequenceProtocol(unittest.TestCase): + + def setUp(self): + self.s = SortedFrozenSet([1, 4, 9, 13, 15]) + + def test_index_zero(self): + self.assertEqual(self.s[0], 1) + + def test_index_four(self): + self.assertEqual(self.s[4], 15) + + def test_index_one_beyond_the_end(self): + self.assertRaises(IndexError, lambda: self.s[5]) + + def test_index_minus_one(self): + self.assertEqual(self.s[-1], 15) + + def test_index_minus_five(self): + self.assertEqual(self.s[-5], 1) + + def test_index_one_before_the_beginning(self): + self.assertRaises(IndexError, lambda: self.s[-6]) + + def test_slice_from_start(self): + self.assertEqual(self.s[:3], SortedFrozenSet([1, 4, 9])) + + def test_slice_to_end(self): + self.assertEqual(self.s[3:], SortedFrozenSet([13, 15])) + + def test_slice_empty(self): + self.assertEqual(self.s[10:], SortedFrozenSet()) + + def test_slice_arbitrary(self): + self.assertEqual(self.s[2:4], SortedFrozenSet([9, 13])) + + def test_slice_full(self): + self.assertEqual(self.s[:], self.s) + + def test_reversed(self): + s = SortedFrozenSet([1, 3, 5, 7]) + r = reversed(s) + self.assertEqual(next(r), 7) + self.assertEqual(next(r), 5) + self.assertEqual(next(r), 3) + self.assertEqual(next(r), 1) + self.assertRaises(StopIteration, lambda: next(r)) + + def test_index_positive(self): + s = SortedFrozenSet([1, 5, 8, 9]) + self.assertEqual(s.index(8), 2) + + def test_index_negative(self): + s = SortedFrozenSet([1, 5, 8, 9]) + self.assertRaises(ValueError, lambda: s.index(15)) + + def test_count_zero(self): + s = SortedFrozenSet([1, 5, 7, 9]) + self.assertEqual(s.count(11), 0) + + def test_count_one(self): + s = SortedFrozenSet([1, 5, 7, 9]) + self.assertEqual(s.count(7), 1) + + def test_protocol(self): + self.assertTrue(issubclass(SortedFrozenSet, Sequence)) + + def test_concatenate_disjoint(self): + s = SortedFrozenSet([1, 2, 3]) + t = SortedFrozenSet([4, 5, 6]) + self.assertEqual(s + t, SortedFrozenSet([1, 2, 3, 4, 5, 6])) + + def test_concatenate_equal(self): + s = SortedFrozenSet([2, 4, 6]) + self.assertEqual(s + s, s) + + def test_concatenate_intersecting(self): + s = SortedFrozenSet([1, 2, 3]) + t = SortedFrozenSet([3, 4, 5]) + self.assertEqual(s + t, SortedFrozenSet([1, 2, 3, 4, 5])) + + def test_repetition_zero_lhs(self): + s = SortedFrozenSet([4, 5, 6]) + self.assertEquals(0 * s, SortedFrozenSet()) + + def test_repetition_zero_rhs(self): + s = SortedFrozenSet([4, 5, 6]) + self.assertEquals(s * 0, SortedFrozenSet()) + + def test_repetition_nonzero_lhs(self): + s = SortedFrozenSet([4, 5, 6]) + self.assertEquals(100 * s, s) + + def test_repetition_nonzero_rhs(self): + s = SortedFrozenSet([4, 5, 6]) + self.assertEquals(s * 100, s) + +class TestReprProtocol(unittest.TestCase): + + def test_repr_empty(self): + s = SortedFrozenSet() + self.assertEqual(repr(s), "SortedFrozenSet()") + + def test_repr_one(self): + s = SortedFrozenSet([42, 40, 19]) + self.assertEqual(repr(s), "SortedFrozenSet([19, 40, 42])") + + +class TestEqualityProtocol(unittest.TestCase): + + def test_positive_equal(self): + self.assertTrue(SortedFrozenSet([4, 5, 6]) == SortedFrozenSet([6, 5, 4])) + + def test_negative_equal(self): + self.assertFalse(SortedFrozenSet([4, 5, 6]) == SortedFrozenSet([1, 2, 3])) + + def test_type_mismatch(self): + self.assertFalse(SortedFrozenSet([4, 5, 6]) == [4, 5, 6]) + + def test_identical(self): + s = SortedFrozenSet([10, 11, 12]) + self.assertTrue(s == s) + + +class TestInequalityProtocol(unittest.TestCase): + + def test_positive_inequal(self): + self.assertTrue(SortedFrozenSet([4, 5, 6]) != SortedFrozenSet([1, 2, 3])) + + def test_negative_inequal(self): + self.assertFalse(SortedFrozenSet([4, 5, 6]) != SortedFrozenSet([6, 5, 4])) + + def test_type_mismatch(self): + self.assertTrue(SortedFrozenSet([1, 2, 3]) != [1, 2, 3]) + + def test_identical(self): + s = SortedFrozenSet([10, 11, 12]) + self.assertFalse(s != s) + + +class TestRelationalSetProtocol(unittest.TestCase): + + def test_lt_positive(self): + s = SortedFrozenSet({1, 2}) + t = SortedFrozenSet({1, 2, 3}) + self.assertTrue(s < t) + + def test_lt_negative(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({1, 2, 3}) + self.assertFalse(s < t) + + def test_le_lt_positive(self): + s = SortedFrozenSet({1, 2}) + t = SortedFrozenSet({1, 2, 3}) + self.assertTrue(s <= t) + + def test_le_eq_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({1, 2, 3}) + self.assertTrue(s <= t) + + def test_le_negative(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({1, 2}) + self.assertFalse(s <= t) + + def test_gt_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({1, 2}) + self.assertTrue(s > t) + + def test_gt_negative(self): + s = SortedFrozenSet({1, 2}) + t = SortedFrozenSet({1, 2, 3}) + self.assertFalse(s > t) + + def test_ge_gt_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({1, 2}) + self.assertTrue(s > t) + + def test_ge_eq_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({1, 2, 3}) + self.assertTrue(s >= t) + + def test_ge_negative(self): + s = SortedFrozenSet({1, 2}) + t = SortedFrozenSet({1, 2, 3}) + self.assertFalse(s >= t) + + +class TestSetRelationalMethods(unittest.TestCase): + + def test_issubset_proper_positive(self): + s = SortedFrozenSet({1, 2}) + t = [1, 2, 3] + self.assertTrue(s.issubset(t)) + + def test_issubset_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = [1, 2, 3] + self.assertTrue(s.issubset(t)) + + def test_issubset_negative(self): + s = SortedFrozenSet({1, 2, 3}) + t = [1, 2] + self.assertFalse(s.issubset(t)) + + def test_issuperset_proper_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = [1, 2] + self.assertTrue(s.issuperset(t)) + + def test_issuperset_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = [1, 2, 3] + self.assertTrue(s.issuperset(t)) + + def test_issuperset_negative(self): + s = SortedFrozenSet({1, 2}) + t = [1, 2, 3] + self.assertFalse(s.issuperset(t)) + + def test_isdisjoint_positive(self): + s = SortedFrozenSet({1, 2, 3}) + t = [4, 5, 6] + self.assertTrue(s.isdisjoint(t)) + + def test_isdisjoint_negative(self): + s = SortedFrozenSet({1, 2, 3}) + t = [3, 4, 5] + self.assertFalse(s.isdisjoint(t)) + + +class TestOperationsSetProtocol(unittest.TestCase): + + def test_intersection(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({2, 3, 4}) + self.assertEqual(s & t, SortedFrozenSet({2, 3})) + + def test_union(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({2, 3, 4}) + self.assertEqual(s | t, SortedFrozenSet({1, 2, 3, 4})) + + def test_symmetric_difference(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({2, 3, 4}) + self.assertEqual(s ^ t, SortedFrozenSet({1, 4})) + + def test_difference(self): + s = SortedFrozenSet({1, 2, 3}) + t = SortedFrozenSet({2, 3, 4}) + self.assertEqual(s - t, SortedFrozenSet({1})) + +class TestSetOperationsMethods(unittest.TestCase): + + def test_intersection(self): + s = SortedFrozenSet({1, 2, 3}) + t = [2, 3, 4] + self.assertEqual(s.intersection(t), SortedFrozenSet({2, 3})) + + def test_union(self): + s = SortedFrozenSet({1, 2, 3}) + t = [2, 3, 4] + self.assertEqual(s.union(t), SortedFrozenSet({1, 2, 3, 4})) + + def test_symmetric_difference(self): + s = SortedFrozenSet({1, 2, 3}) + t = [2, 3, 4] + self.assertEqual(s.symmetric_difference(t), SortedFrozenSet({1, 4})) + + def test_difference(self): + s = SortedFrozenSet({1, 2, 3}) + t = [2, 3, 4] + self.assertEqual(s.difference(t), SortedFrozenSet({1})) + +if __name__ == '__main__': + unittest.main()