Fixes issues revealed by the new loadsave.py example program.

This commit is contained in:
Robert Smallshire
2015-01-29 13:26:25 +01:00
parent ee1e05a9ff
commit e74f4498aa
4 changed files with 154 additions and 28 deletions
+50
View File
@@ -0,0 +1,50 @@
"""A simple example which loads a SEG Y file and saves it again.
Usage:
loadsave.py <in.segy> <out.segy>
"""
import os
import sys
import traceback
from segpy.reader import create_reader
from segpy.writer import write_segy
def load_save(in_filename, out_filename):
with open(in_filename, 'rb') as in_file, \
open(out_filename, 'wb') as out_file:
segy_reader = create_reader(in_file)
write_segy(out_file, segy_reader)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
try:
in_filename = argv[0]
out_filename = argv[1]
except IndexError:
print(globals()['__doc__'], file=sys.stderr)
return os.EX_USAGE
try:
load_save(in_filename, out_filename)
except (FileNotFoundError, IsADirectoryError) as e:
print(e, file=sys.stderr)
return os.EX_NOINPUT
except PermissionError as e:
print(e, file=sys.stderr)
return os.EX_NOPERM
except Exception as e:
traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
return os.EX_SOFTWARE
return os.EX_OK
if __name__ == '__main__':
sys.exit(main())
+36 -8
View File
@@ -13,7 +13,8 @@ from segpy.toolkit import (extract_revision,
REEL_HEADER_NUM_BYTES,
TRACE_HEADER_NUM_BYTES,
read_textual_reel_header,
read_extended_textual_headers)
read_extended_textual_headers,
guess_textual_header_encoding)
def create_reader(fh, encoding=None, endian='>', progress=None):
@@ -84,9 +85,13 @@ def create_reader(fh, encoding=None, endian='>', progress=None):
"SEG Y file {!r} of {} bytes is too short".format(
filename_from_handle(fh),
num_file_bytes))
if endian not in ('<', '>'):
raise ValueError("Unrecognised endian value {!r}".format(endian))
if encoding is None:
encoding = guess_textual_header_encoding(fh)
textual_reel_header = read_textual_reel_header(fh, encoding)
binary_reel_header = read_binary_reel_header(fh, endian)
extended_textual_header = read_extended_textual_headers(fh, binary_reel_header, encoding)
@@ -97,14 +102,14 @@ def create_reader(fh, encoding=None, endian='>', progress=None):
if cdp_catalog is not None and line_catalog is None:
return SegYReader2D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog,
trace_length_catalog, cdp_catalog)
trace_length_catalog, cdp_catalog, encoding, endian)
if cdp_catalog is None and line_catalog is not None:
return SegYReader3D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog,
trace_length_catalog, line_catalog)
trace_length_catalog, line_catalog, encoding, endian)
return SegYReader(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog,
endian)
return SegYReader(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog,
trace_length_catalog, encoding, endian)
class SegYReader(object):
@@ -121,6 +126,7 @@ class SegYReader(object):
extended_textual_headers,
trace_offset_catalog,
trace_length_catalog,
encoding,
endian='>'):
"""Initialize a SegYReader around a file-like-object.
@@ -145,12 +151,15 @@ class SegYReader(object):
trace_length_catalog: A mapping from zero-based trace_samples index to the
number of samples in that trace_samples.
encoding: Either ASCII or EBCDIC.
endian: '>' for big-endian data (the standard and default), '<' for
little-endian (non-standard)
"""
self._fh = fh
self._endian = endian
self._encoding = encoding
self._trace_header_format = compile_trace_header_format(self._endian)
self._textual_reel_header = textual_reel_header
@@ -329,6 +338,18 @@ class SegYReader(object):
"""
return CTYPE_DESCRIPTION[CTYPES[self.data_sample_format]]
@property
def encoding(self):
"""The encoding, of the data in the underlying file. Either ASCII ('ascii'),
EBCDIC ('cp037') or None."""
return self._encoding
@property
def endian(self):
"""The endianness of the data in the underlying file. Either '>' for big-endian or '<' for
little endian or None."""
return self._endian
class SegYReader3D(SegYReader):
"""A reader for 3D seismic data.
@@ -346,6 +367,7 @@ class SegYReader3D(SegYReader):
trace_offset_catalog,
trace_length_catalog,
line_catalog,
encoding,
endian='>'):
"""Initialize a SegYReader3D around a file-like-object.
@@ -368,11 +390,13 @@ class SegYReader3D(SegYReader):
line_catalog: A mapping from (xline, inline) tuples to
trace_indexes.
encoding: Either ASCII or EBCDIC.
endian: '>' for big-endian data (the standard and default), '<' for
little-endian (non-standard)
"""
super(SegYReader3D, self).__init__(fh, textual_reel_header, binary_reel_header, extended_textual_headers,
trace_offset_catalog, trace_length_catalog, endian)
trace_offset_catalog, trace_length_catalog, encoding, endian)
self._line_catalog = line_catalog
self._num_inlines = None
self._num_xlines = None
@@ -483,7 +507,9 @@ class SegYReader2D(SegYReader):
extended_textual_headers,
trace_offset_catalog,
trace_length_catalog,
cdp_catalog, endian='>'):
cdp_catalog,
encoding,
endian='>'):
"""Initialize a SegYReader2D around a file-like-object.
Note:
@@ -504,11 +530,13 @@ class SegYReader2D(SegYReader):
cdp_catalog: A mapping from CDP numbers to trace_indexes.
encoding: Either ASCII or EBCDIC.
endian: '>' for big-endian data (the standard and default), '<' for
little-endian (non-standard)
"""
super(SegYReader2D, self).__init__(fh, textual_reel_header, binary_reel_header, extended_textual_headers,
trace_offset_catalog, trace_length_catalog, endian)
trace_offset_catalog, trace_length_catalog, encoding, endian)
self._cdp_catalog = cdp_catalog
def _dimensionality(self):
+66 -18
View File
@@ -117,21 +117,27 @@ def trace_length_bytes(binary_reel_header, bps):
return samples_per_trace(binary_reel_header) * bps + TRACE_HEADER_NUM_BYTES
def read_textual_reel_header(fh, encoding=None):
def guess_textual_header_encoding(fh):
fh.seek(0)
raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES)
encoding = guess_encoding(raw_header)
return encoding
def read_textual_reel_header(fh, encoding):
"""Read the SEG Y card image header, also known as the textual header
Args:
fh: A file-like object open in binary mode positioned such that the
beginning of the textual header will be the next byte to read.
encoding: Optional encoding of the header in the file. If None (the
default) a reliable heuristic will be used to guess the encoding.
Either 'cp037' for EBCDIC or 'ascii' for ASCII.
encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII.
Returns:
A tuple of forty Unicode strings (Python 2: unicode, Python 3: str)
containing the transcoded header data.
"""
fh.seek(0)
raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES)
num_bytes_read = len(raw_header)
@@ -139,9 +145,6 @@ def read_textual_reel_header(fh, encoding=None):
raise EOFError("Only {} bytes of {} byte textual reel header could be read"
.format(num_bytes_read, TEXTUAL_HEADER_NUM_BYTES))
if encoding is None:
encoding = guess_encoding(raw_header)
lines = tuple(bytes(raw_line).decode(encoding) for raw_line in batched(raw_header, CARD_LENGTH))
return lines
@@ -156,6 +159,7 @@ def read_binary_reel_header(fh, endian='>'):
endian: '>' for big-endian data (the standard and default), '<' for
little-endian (non-standard)
"""
fh.seek(TEXTUAL_HEADER_NUM_BYTES)
reel_header = {}
for key in HEADER_DEF:
pos = HEADER_DEF[key]['pos']
@@ -590,14 +594,28 @@ def write_textual_reel_header(fh, lines, encoding):
or omitted lines will be padded with spaces.
encoding: Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
Post-condition:
The file pointer in fh will be positioned at the first byte following the textual
header.
Raises:
UnsupportedEncodingError: If encoding is neither EBCDIC nor ASCII.
UnicodeError: If the data provided in lines cannot be encoded with the encoding.
"""
# TODO: Seek
if not is_supported_encoding(encoding):
raise UnsupportedEncodingError("Writing textual reel header", encoding)
fh.seek(0)
padded_lines = [line.encode(encoding).ljust(CARD_LENGTH, ' '.encode(encoding))[:CARD_LENGTH]
for line in pad(lines, padding='', size=CARDS_PER_HEADER)]
header = ''.join(padded_lines)
header = b''.join(padded_lines)
assert len(header) == 3200
fh.write(header)
fh.seek(TEXTUAL_HEADER_NUM_BYTES)
def write_binary_reel_header(fh, binary_reel_header, endian='>'):
"""Write the binary_reel_header to the given file-like object.
@@ -608,14 +626,21 @@ def write_binary_reel_header(fh, binary_reel_header, endian='>'):
binary_reel_header: A dictionary of values using a subset of the keys
in binary_reel_header_definition.HEADER_DEF associated with
compatible values.
Post-condition:
The file pointer for fh will be positioned at the first byte following
the binary reel header.
"""
# TODO: Seek
for key in HEADER_DEF:
pos = HEADER_DEF[key]['pos']
ctype = HEADER_DEF[key]['type']
value = binary_reel_header[key] if key in binary_reel_header else HEADER_DEF[key]['def']
write_binary_values(fh, [value], ctype, pos)
fh.seek(REEL_HEADER_NUM_BYTES)
def format_extended_textual_header(text, encoding, include_text_stop=False):
"""Format a string into pages and line suitable for an extended textual header.
@@ -673,16 +698,21 @@ def write_extended_textual_headers(fh, pages, encoding):
encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII.
Post-condition:
The file pointer in fh will be position at the first byte after the extended textual headers, which is
also the first byte of the first trace-header.
Raises:
ValueError: If the provided header data has the wrong shape.
UnicodeError: If the textual data could not be encoded into the specified encoding.
"""
# TODO: Seek
if not is_supported_encoding(encoding):
raise UnsupportedEncodingError("Writing extended textual header", encoding)
fh.seek(REEL_HEADER_NUM_BYTES)
encoded_pages = []
for page_index, page in enumerate(pages):
encoded_page = []
@@ -726,12 +756,8 @@ def write_trace_header(fh, trace_header, trace_header_format, pos=None):
fh.write(buf)
def write_trace_samples(fh, values, ctype='l', pos=None):
write_binary_values(fh, values, ctype, pos)
def write_binary_values(fh, values, ctype='l', pos=None):
"""Write a series on values to a file.
def write_trace_samples(fh, samples, ctype='l', pos=None, endian='>'):
"""Write a trace samples to a file
Args:
fh: A file-like-object open for writing in binary mode.
@@ -742,6 +768,28 @@ def write_binary_values(fh, values, ctype='l', pos=None):
pos: An optional offset from the beginning of the file. If omitted,
any writing is done at the current file position.
endian: '>' for big-endian data (the standard and default), '<'
for little-endian (non-standard)
"""
write_binary_values(fh, samples, ctype, pos, endian)
def write_binary_values(fh, values, ctype='l', pos=None, endian='>'):
"""Write a series of values to a file.
Args:
fh: A file-like-object open for writing in binary mode.
values: An iterable series of values.
ctype: The SEG Y data type.
pos: An optional offset from the beginning of the file. If omitted,
any writing is done at the current file position.
endian: '>' for big-endian data (the standard and default), '<'
for little-endian (non-standard)
"""
fmt = CTYPES[ctype]
@@ -750,7 +798,7 @@ def write_binary_values(fh, values, ctype='l', pos=None):
buf = (pack_ibm_floats(values)
if fmt == 'ibm'
else pack_values(values, fmt))
else pack_values(values, fmt, endian))
fh.write(buf)
+2 -2
View File
@@ -50,9 +50,9 @@ def write_segy(fh,
trace_header_format = compile_trace_header_format(endian)
for trace_index in seg_y_data.trace_indexes:
for trace_index in seg_y_data.trace_indexes():
write_trace_header(fh, seg_y_data.trace_header(trace_index), trace_header_format)
write_trace_samples(fh, seg_y_data.trace_samples(trace_index), seg_y_data.data_sample_format)
write_trace_samples(fh, seg_y_data.trace_samples(trace_index), seg_y_data.data_sample_format, endian=endian)