Support for reading textual (normal and extended) headers. Support for alternative inline/xline numbering in 3D seismic.

This commit is contained in:
Robert Smallshire
2014-10-23 11:39:16 +02:00
parent d50dc0b097
commit d2f1beac9d
+213 -26
View File
@@ -6,39 +6,55 @@ import struct
from catalog import CatalogBuilder
from datatypes import CTYPES, size_in_bytes
from reel_header_definition import HEADER_DEF
from encoding import guess_encoding
from binary_reel_header_definition import HEADER_DEF
from ibm_float import ibm2ieee, ieee2ibm
from revisions import canonicalize_revision
from trace_header_definition import TRACE_HEADER_DEF
from util import file_length
from util import file_length, batched
from portability import EMPTY_BYTE_STRING
REEL_HEADER_NUM_BYTES = 3600
TEXTUAL_HEADER_NUM_BYTES = 3200
BINARY_HEADER_NUM_BYTES = 400
REEL_HEADER_NUM_BYTES = TEXTUAL_HEADER_NUM_BYTES + BINARY_HEADER_NUM_BYTES
TRACE_HEADER_NUM_BYTES = 240
def extract_revision(reel_header):
def extract_revision(binary_reel_header):
"""Obtain the SEG Y revision from the reel header.
Args:
reel_header: A dictionary containing a reel header, such as obtained
from read_reel_header()
binary_reel_header: A dictionary containing a reel header, such as obtained
from read_binary_reel_header()
Returns:
One of the constants revisions.SEGY_REVISION_0 or
revisions.SEGY_REVISION_1
"""
raw_revision = reel_header['SegyFormatRevisionNumber']
raw_revision = binary_reel_header['SegyFormatRevisionNumber']
return canonicalize_revision(raw_revision)
def bytes_per_sample(reel_header, revision):
def num_extended_textual_headers(binary_reel_header):
"""Obtain the number of 3200 byte extended textual file headers.
A value of zero indicates there are no Extended Textual File Header records
(i.e. this file has no Extended Textual File Header(s)). A value of -1 indicates
that there are a variable number of Extended Textual File Header records and the
end of the Extended Textual File Header is denoted by an ((SEG: EndText)) stanza
in the final record. A positive value indicates that there are exactly that many
Extended Textual File Header records.
"""
num_ext_headers = binary_reel_header['NumberOfExtTextualHeaders']
return num_ext_headers
def bytes_per_sample(binary_reel_header, revision):
"""Determine the number of bytes per sample from the reel header.
Args:
reel_header: A dictionary containing a reel header, such as obtained
from read_reel_header()
binary_reel_header: A dictionary containing a reel header, such as obtained
from read_binary_reel_header()
revision: One of the constants revisions.SEGY_REVISION_0 or
revisions.SEGY_REVISION_1
@@ -46,12 +62,12 @@ def bytes_per_sample(reel_header, revision):
Returns:
An integer number of bytes per sample.
"""
dsf = reel_header['DataSampleFormat']
dsf = binary_reel_header['DataSampleFormat']
bps = HEADER_DEF["DataSampleFormat"]["bps"][revision][dsf]
return bps
def samples_per_trace(reel_header):
def samples_per_trace(binary_reel_header):
"""Determine the number of samples per trace from the reel header.
Note: There is no requirement for all traces to be of the same length,
@@ -61,16 +77,16 @@ def samples_per_trace(reel_header):
trace headers.
Args:
reel_header: A dictionary containing a reel header, such as obtained
from read_reel_header()
binary_reel_header: A dictionary containing a reel header, such as obtained
from read_binary_reel_header()
Returns:
An integer number of samples per trace
"""
return reel_header['ns']
return binary_reel_header['ns']
def trace_length_bytes(reel_header, bps):
def trace_length_bytes(binary_reel_header, bps):
"""Determine the trace length in bytes from the reel header.
Note: There is no requirement for all traces to be of the same length,
@@ -80,22 +96,51 @@ def trace_length_bytes(reel_header, bps):
trace headers.
Args:
reel_header: A dictionary containing a reel header, such as obtained
from read_reel_header()
binary_reel_header: A dictionary containing a reel header, such as obtained
from read_binary_reel_header()
bps: The number of bytes per sample, such as obtained from a call to
bytes_per_sample()
"""
return samples_per_trace(reel_header) * bps + TRACE_HEADER_NUM_BYTES
return samples_per_trace(binary_reel_header) * bps + TRACE_HEADER_NUM_BYTES
def read_reel_header(fh, endian='>'):
"""Read the SEG Y reel header, also known as the binary header.
def read_textual_reel_header(fh, encoding=None):
"""Read the SEG Y card image header, also known as the textual header
Args:
fh: A file-like-object open in binary mode positioned such that the
beginning of the reel header will be the next byte to be read.
fh: A file-like object open in binary mode positioned such that the
beginning of the textual header will be the next byte to read.
encoding: Optional encoding of the header in the file. If None (the
default) a reliable heuristic will be used to guess the encoding.
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
Returns:
A tuple of forty Unicode strings (Python 2: unicode, Python 3: str)
containing the transcoded header data.
"""
raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES)
num_bytes_read = len(raw_header)
if num_bytes_read < TEXTUAL_HEADER_NUM_BYTES:
raise EOFError("Only {} bytes of {} byte textual reel header could be read"
.format(num_bytes_read, TEXTUAL_HEADER_NUM_BYTES))
if encoding is None:
encoding = guess_encoding(raw_header)
lines = tuple(bytes(raw_line).decode(encoding) for raw_line in batched(raw_header, 80))
return lines
def read_binary_reel_header(fh, endian='>'):
"""Read the SEG Y binary reel header.
Args:
fh: A file-like object open in binary mode. Binary header is assumed to
be at an offset of 3200 bytes from the beginning of the file.
endian: '>' for big-endian data (the standard and default), '<' for
little-endian (non-standard)
@@ -109,6 +154,114 @@ def read_reel_header(fh, endian='>'):
return reel_header
def has_end_text_stanza(ext_header):
"""Determine whether the header is the end text stanza.
Args:
ext_header: A sequence of forty 80 character Unicode strings.
Returns:
True if the header is the SEG Y Revision 1 end text header,
otherwise False.
"""
return "((SEG: EndText))" in ext_header[0]
def read_extended_headers_until_end(fh, encoding):
"""Read an unspecified number of extended textual headers, until the end-text header is found.
Args:
fh: A file-like object open in binary mode. The first of any extended textual headers
is assumed to be at an offset of 3600 bytes from the beginning of the file
(immediately following the binary reel header).
encoding: Optional encoding of the header in the file. If None (the
default) a reliable heuristic will be used to guess the encoding.
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
Returns:
A list of tuples each containing forty 80-character Unicode strings.
"""
extended_headers = []
while True:
ext_header = read_textual_reel_header(fh, encoding)
if has_end_text_stanza(ext_header):
break
extended_headers.append(ext_header)
return extended_headers
def read_extended_headers_counted(fh, num_expected, encoding):
"""Read a specified number of extended textual headers.
If an end-text stanza is located prematurely (in anything other than the last expected header)
reading will be terminated and a warning logged.
Args:
fh: A file-like object open in binary mode. The first of any extended textual headers
is assumed to be at an offset of 3600 bytes from the beginning of the file
(immediately following the binary reel header).
num_expected: A non-negative integer of headers.
encoding: Optional encoding of the header in the file. If None (the
default) a reliable heuristic will be used to guess the encoding.
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
Returns:
A list of tuples each containing forty 80-character Unicode strings.
"""
assert num_expected >= 0
extended_headers = []
for i in range(num_expected):
ext_header = read_textual_reel_header(fh, encoding)
if has_end_text_stanza(ext_header):
if i != num_expected - 1:
print("Unexpected end-text extended header") # TODO: Log this
break
extended_headers.append(ext_header)
return extended_headers
def read_extended_textual_headers(fh, binary_reel_header, encoding):
"""Read any extended textual reel headers.
Args:
fh: A file-like object open in binary mode. The first of any extended textual headers
is assumed to be at an offset of 3600 bytes from the beginning of the file
(immediately following the binary reel header).
binary_reel_header: A dictionary containing data read from the binary
reel header by the read_binary_reel_header() function.
encoding: Optional encoding of the header in the file. If None (the
default) a reliable heuristic will be used to guess the encoding.
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
Returns:
A Unicode string containing the concatenated contents of any extended headers. If there
were no extended headers, the string will be empty.
Postcondition:
As a post-condition to this function, the file-pointer of fh will be
positioned immediately after the last extended textual header, which
should be the start of the first trace header.
"""
fh.seek(REEL_HEADER_NUM_BYTES)
declared_num_ext_headers = num_extended_textual_headers(binary_reel_header)
extended_headers = []
if declared_num_ext_headers == -1:
extended_headers.extend(read_extended_headers_until_end(fh, encoding))
elif declared_num_ext_headers > 0:
extended_headers.extend(read_extended_headers_counted(fh, declared_num_ext_headers, encoding))
# Concatenate the extended headers
extended_textual_header = ''.join(line for header in extended_headers for line in header).strip(' ')
return extended_textual_header
_READ_PROPORTION = 0.75 # The proportion of time spent in catalog_traces
# reading the file. Determined empirically.
@@ -134,7 +287,8 @@ def catalog_traces(fh, bps, endian='>', progress=None):
trace index.
Args:
fh: A file-like-object open in binary mode.
fh: A file-like-object open in binary mode, positioned at the
start of the first trace header.
bps: The number of bytes per sample, such as obtained by a call
to bytes_per_sample()
@@ -164,11 +318,12 @@ def catalog_traces(fh, bps, endian='>', progress=None):
length = file_length(fh)
pos_begin = REEL_HEADER_NUM_BYTES
pos_begin = fh.tell()
trace_offset_catalog_builder = CatalogBuilder()
trace_length_catalog_builder = CatalogBuilder()
line_catalog_builder = CatalogBuilder()
alt_line_catalog_builder = CatalogBuilder()
cdp_catalog_builder = CatalogBuilder()
for trace_number in itertools.count():
@@ -186,6 +341,9 @@ def catalog_traces(fh, bps, endian='>', progress=None):
line_catalog_builder.add((trace_header.Inline3D,
trace_header.Crossline3D),
trace_number)
alt_line_catalog_builder.add((trace_header.TraceSequenceFile,
trace_header.cdp),
trace_number)
cdp_catalog_builder.add(trace_header.cdp, trace_number)
pos_end = pos_begin + TRACE_HEADER_NUM_BYTES + samples_bytes
pos_begin = pos_end
@@ -202,6 +360,12 @@ def catalog_traces(fh, bps, endian='>', progress=None):
progress_callback(_READ_PROPORTION + (_READ_PROPORTION * 3 / 4))
line_catalog = line_catalog_builder.create()
if line_catalog is None:
# Some 3D files put Inline and Crossline numbers in (TraceSequenceFile, cdp) pair
line_catalog = alt_line_catalog_builder.create()
progress_callback(1)
return (trace_offset_catalog,
@@ -209,6 +373,7 @@ def catalog_traces(fh, bps, endian='>', progress=None):
cdp_catalog,
line_catalog)
def read_trace_header(fh, trace_header_format, pos=None):
"""Read a trace header.
@@ -307,6 +472,21 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
# swapping ourselves.
def write_binary_reel_header(fh, binary_reel_header, endian='>'):
"""Write the binary_reel_header to the given file-like object.
Args:
fh: A file-like object open in binary mode for writing.
binary_reel_header: A dictionary of
"""
for key in HEADER_DEF:
pos = HEADER_DEF[key]['pos']
ctype = HEADER_DEF[key]['type']
value = binary_reel_header[key] if key in binary_reel_header else HEADER_DEF[key]['def']
write_binary_values(fh, [value], ctype, pos)
def write_trace_header(fh, trace_header, trace_header_format, pos=None):
"""Write a TraceHeader to file.
@@ -385,6 +565,11 @@ def pack_values(values, fmt, endian='>'):
return struct.pack(c_format, *values)
# TODO: Consider generalising the below to also produce a ReelHeader record. Then modify
# read_binary_reel_header() to return such a record, and write_binary_reel_header() to accept such
# a record.
_TraceAttributeSpec = namedtuple('Record', ['name', 'pos', 'type'])
@@ -440,3 +625,5 @@ def _compile_trace_header_record():
TraceHeader = _compile_trace_header_record()