diff --git a/examples/loadsave.py b/examples/loadsave.py new file mode 100644 index 0000000..4577c1c --- /dev/null +++ b/examples/loadsave.py @@ -0,0 +1,50 @@ +"""A simple example which loads a SEG Y file and saves it again. + +Usage: + + loadsave.py + +""" +import os + +import sys +import traceback + +from segpy.reader import create_reader +from segpy.writer import write_segy + + +def load_save(in_filename, out_filename): + with open(in_filename, 'rb') as in_file, \ + open(out_filename, 'wb') as out_file: + + segy_reader = create_reader(in_file) + write_segy(out_file, segy_reader) + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + try: + in_filename = argv[0] + out_filename = argv[1] + except IndexError: + print(globals()['__doc__'], file=sys.stderr) + return os.EX_USAGE + + try: + load_save(in_filename, out_filename) + except (FileNotFoundError, IsADirectoryError) as e: + print(e, file=sys.stderr) + return os.EX_NOINPUT + except PermissionError as e: + print(e, file=sys.stderr) + return os.EX_NOPERM + except Exception as e: + traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr) + return os.EX_SOFTWARE + return os.EX_OK + +if __name__ == '__main__': + sys.exit(main()) diff --git a/segpy/reader.py b/segpy/reader.py index 38e3585..7302ada 100644 --- a/segpy/reader.py +++ b/segpy/reader.py @@ -13,7 +13,8 @@ from segpy.toolkit import (extract_revision, REEL_HEADER_NUM_BYTES, TRACE_HEADER_NUM_BYTES, read_textual_reel_header, - read_extended_textual_headers) + read_extended_textual_headers, + guess_textual_header_encoding) def create_reader(fh, encoding=None, endian='>', progress=None): @@ -84,9 +85,13 @@ def create_reader(fh, encoding=None, endian='>', progress=None): "SEG Y file {!r} of {} bytes is too short".format( filename_from_handle(fh), num_file_bytes)) + if endian not in ('<', '>'): raise ValueError("Unrecognised endian value {!r}".format(endian)) + if encoding is None: + encoding = guess_textual_header_encoding(fh) + textual_reel_header = read_textual_reel_header(fh, encoding) binary_reel_header = read_binary_reel_header(fh, endian) extended_textual_header = read_extended_textual_headers(fh, binary_reel_header, encoding) @@ -97,14 +102,14 @@ def create_reader(fh, encoding=None, endian='>', progress=None): if cdp_catalog is not None and line_catalog is None: return SegYReader2D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, - trace_length_catalog, cdp_catalog) + trace_length_catalog, cdp_catalog, encoding, endian) if cdp_catalog is None and line_catalog is not None: return SegYReader3D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, - trace_length_catalog, line_catalog) + trace_length_catalog, line_catalog, encoding, endian) - return SegYReader(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, - endian) + return SegYReader(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, + trace_length_catalog, encoding, endian) class SegYReader(object): @@ -121,6 +126,7 @@ class SegYReader(object): extended_textual_headers, trace_offset_catalog, trace_length_catalog, + encoding, endian='>'): """Initialize a SegYReader around a file-like-object. @@ -145,12 +151,15 @@ class SegYReader(object): trace_length_catalog: A mapping from zero-based trace_samples index to the number of samples in that trace_samples. + encoding: Either ASCII or EBCDIC. + endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) """ self._fh = fh self._endian = endian + self._encoding = encoding self._trace_header_format = compile_trace_header_format(self._endian) self._textual_reel_header = textual_reel_header @@ -329,6 +338,18 @@ class SegYReader(object): """ return CTYPE_DESCRIPTION[CTYPES[self.data_sample_format]] + @property + def encoding(self): + """The encoding, of the data in the underlying file. Either ASCII ('ascii'), + EBCDIC ('cp037') or None.""" + return self._encoding + + @property + def endian(self): + """The endianness of the data in the underlying file. Either '>' for big-endian or '<' for + little endian or None.""" + return self._endian + class SegYReader3D(SegYReader): """A reader for 3D seismic data. @@ -346,6 +367,7 @@ class SegYReader3D(SegYReader): trace_offset_catalog, trace_length_catalog, line_catalog, + encoding, endian='>'): """Initialize a SegYReader3D around a file-like-object. @@ -368,11 +390,13 @@ class SegYReader3D(SegYReader): line_catalog: A mapping from (xline, inline) tuples to trace_indexes. + encoding: Either ASCII or EBCDIC. + endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) """ super(SegYReader3D, self).__init__(fh, textual_reel_header, binary_reel_header, extended_textual_headers, - trace_offset_catalog, trace_length_catalog, endian) + trace_offset_catalog, trace_length_catalog, encoding, endian) self._line_catalog = line_catalog self._num_inlines = None self._num_xlines = None @@ -483,7 +507,9 @@ class SegYReader2D(SegYReader): extended_textual_headers, trace_offset_catalog, trace_length_catalog, - cdp_catalog, endian='>'): + cdp_catalog, + encoding, + endian='>'): """Initialize a SegYReader2D around a file-like-object. Note: @@ -504,11 +530,13 @@ class SegYReader2D(SegYReader): cdp_catalog: A mapping from CDP numbers to trace_indexes. + encoding: Either ASCII or EBCDIC. + endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) """ super(SegYReader2D, self).__init__(fh, textual_reel_header, binary_reel_header, extended_textual_headers, - trace_offset_catalog, trace_length_catalog, endian) + trace_offset_catalog, trace_length_catalog, encoding, endian) self._cdp_catalog = cdp_catalog def _dimensionality(self): diff --git a/segpy/toolkit.py b/segpy/toolkit.py index dd8bb79..a3223ff 100644 --- a/segpy/toolkit.py +++ b/segpy/toolkit.py @@ -117,21 +117,27 @@ def trace_length_bytes(binary_reel_header, bps): return samples_per_trace(binary_reel_header) * bps + TRACE_HEADER_NUM_BYTES -def read_textual_reel_header(fh, encoding=None): +def guess_textual_header_encoding(fh): + fh.seek(0) + raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES) + encoding = guess_encoding(raw_header) + return encoding + + +def read_textual_reel_header(fh, encoding): """Read the SEG Y card image header, also known as the textual header Args: fh: A file-like object open in binary mode positioned such that the beginning of the textual header will be the next byte to read. - encoding: Optional encoding of the header in the file. If None (the - default) a reliable heuristic will be used to guess the encoding. - Either 'cp037' for EBCDIC or 'ascii' for ASCII. + encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII. Returns: A tuple of forty Unicode strings (Python 2: unicode, Python 3: str) containing the transcoded header data. """ + fh.seek(0) raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES) num_bytes_read = len(raw_header) @@ -139,9 +145,6 @@ def read_textual_reel_header(fh, encoding=None): raise EOFError("Only {} bytes of {} byte textual reel header could be read" .format(num_bytes_read, TEXTUAL_HEADER_NUM_BYTES)) - if encoding is None: - encoding = guess_encoding(raw_header) - lines = tuple(bytes(raw_line).decode(encoding) for raw_line in batched(raw_header, CARD_LENGTH)) return lines @@ -156,6 +159,7 @@ def read_binary_reel_header(fh, endian='>'): endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) """ + fh.seek(TEXTUAL_HEADER_NUM_BYTES) reel_header = {} for key in HEADER_DEF: pos = HEADER_DEF[key]['pos'] @@ -590,14 +594,28 @@ def write_textual_reel_header(fh, lines, encoding): or omitted lines will be padded with spaces. encoding: Typically 'cp037' for EBCDIC or 'ascii' for ASCII. + + Post-condition: + The file pointer in fh will be positioned at the first byte following the textual + header. + + Raises: + UnsupportedEncodingError: If encoding is neither EBCDIC nor ASCII. + UnicodeError: If the data provided in lines cannot be encoded with the encoding. """ - # TODO: Seek + if not is_supported_encoding(encoding): + raise UnsupportedEncodingError("Writing textual reel header", encoding) + + fh.seek(0) + padded_lines = [line.encode(encoding).ljust(CARD_LENGTH, ' '.encode(encoding))[:CARD_LENGTH] for line in pad(lines, padding='', size=CARDS_PER_HEADER)] - header = ''.join(padded_lines) + header = b''.join(padded_lines) assert len(header) == 3200 fh.write(header) + fh.seek(TEXTUAL_HEADER_NUM_BYTES) + def write_binary_reel_header(fh, binary_reel_header, endian='>'): """Write the binary_reel_header to the given file-like object. @@ -608,14 +626,21 @@ def write_binary_reel_header(fh, binary_reel_header, endian='>'): binary_reel_header: A dictionary of values using a subset of the keys in binary_reel_header_definition.HEADER_DEF associated with compatible values. + + Post-condition: + The file pointer for fh will be positioned at the first byte following + the binary reel header. """ - # TODO: Seek + + for key in HEADER_DEF: pos = HEADER_DEF[key]['pos'] ctype = HEADER_DEF[key]['type'] value = binary_reel_header[key] if key in binary_reel_header else HEADER_DEF[key]['def'] write_binary_values(fh, [value], ctype, pos) + fh.seek(REEL_HEADER_NUM_BYTES) + def format_extended_textual_header(text, encoding, include_text_stop=False): """Format a string into pages and line suitable for an extended textual header. @@ -673,16 +698,21 @@ def write_extended_textual_headers(fh, pages, encoding): encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII. + Post-condition: + The file pointer in fh will be position at the first byte after the extended textual headers, which is + also the first byte of the first trace-header. + Raises: ValueError: If the provided header data has the wrong shape. UnicodeError: If the textual data could not be encoded into the specified encoding. """ - # TODO: Seek if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing extended textual header", encoding) + fh.seek(REEL_HEADER_NUM_BYTES) + encoded_pages = [] for page_index, page in enumerate(pages): encoded_page = [] @@ -726,12 +756,8 @@ def write_trace_header(fh, trace_header, trace_header_format, pos=None): fh.write(buf) -def write_trace_samples(fh, values, ctype='l', pos=None): - write_binary_values(fh, values, ctype, pos) - - -def write_binary_values(fh, values, ctype='l', pos=None): - """Write a series on values to a file. +def write_trace_samples(fh, samples, ctype='l', pos=None, endian='>'): + """Write a trace samples to a file Args: fh: A file-like-object open for writing in binary mode. @@ -742,6 +768,28 @@ def write_binary_values(fh, values, ctype='l', pos=None): pos: An optional offset from the beginning of the file. If omitted, any writing is done at the current file position. + + endian: '>' for big-endian data (the standard and default), '<' + for little-endian (non-standard) + """ + write_binary_values(fh, samples, ctype, pos, endian) + + +def write_binary_values(fh, values, ctype='l', pos=None, endian='>'): + """Write a series of values to a file. + + Args: + fh: A file-like-object open for writing in binary mode. + + values: An iterable series of values. + + ctype: The SEG Y data type. + + pos: An optional offset from the beginning of the file. If omitted, + any writing is done at the current file position. + + endian: '>' for big-endian data (the standard and default), '<' + for little-endian (non-standard) """ fmt = CTYPES[ctype] @@ -750,7 +798,7 @@ def write_binary_values(fh, values, ctype='l', pos=None): buf = (pack_ibm_floats(values) if fmt == 'ibm' - else pack_values(values, fmt)) + else pack_values(values, fmt, endian)) fh.write(buf) diff --git a/segpy/writer.py b/segpy/writer.py index 6f0fbf5..db97d69 100644 --- a/segpy/writer.py +++ b/segpy/writer.py @@ -50,9 +50,9 @@ def write_segy(fh, trace_header_format = compile_trace_header_format(endian) - for trace_index in seg_y_data.trace_indexes: + for trace_index in seg_y_data.trace_indexes(): write_trace_header(fh, seg_y_data.trace_header(trace_index), trace_header_format) - write_trace_samples(fh, seg_y_data.trace_samples(trace_index), seg_y_data.data_sample_format) + write_trace_samples(fh, seg_y_data.trace_samples(trace_index), seg_y_data.data_sample_format, endian=endian)