mirror of
https://github.com/wassname/segpy.git
synced 2026-07-02 22:47:36 +08:00
Support for reading textual (normal and extended) headers. Support for alternative inline/xline numbering in 3D seismic.
This commit is contained in:
+213
-26
@@ -6,39 +6,55 @@ import struct
|
||||
|
||||
from catalog import CatalogBuilder
|
||||
from datatypes import CTYPES, size_in_bytes
|
||||
from reel_header_definition import HEADER_DEF
|
||||
from encoding import guess_encoding
|
||||
from binary_reel_header_definition import HEADER_DEF
|
||||
from ibm_float import ibm2ieee, ieee2ibm
|
||||
from revisions import canonicalize_revision
|
||||
from trace_header_definition import TRACE_HEADER_DEF
|
||||
from util import file_length
|
||||
from util import file_length, batched
|
||||
from portability import EMPTY_BYTE_STRING
|
||||
|
||||
|
||||
REEL_HEADER_NUM_BYTES = 3600
|
||||
TEXTUAL_HEADER_NUM_BYTES = 3200
|
||||
BINARY_HEADER_NUM_BYTES = 400
|
||||
REEL_HEADER_NUM_BYTES = TEXTUAL_HEADER_NUM_BYTES + BINARY_HEADER_NUM_BYTES
|
||||
TRACE_HEADER_NUM_BYTES = 240
|
||||
|
||||
|
||||
def extract_revision(reel_header):
|
||||
def extract_revision(binary_reel_header):
|
||||
"""Obtain the SEG Y revision from the reel header.
|
||||
|
||||
Args:
|
||||
reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_reel_header()
|
||||
binary_reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_binary_reel_header()
|
||||
|
||||
Returns:
|
||||
One of the constants revisions.SEGY_REVISION_0 or
|
||||
revisions.SEGY_REVISION_1
|
||||
"""
|
||||
raw_revision = reel_header['SegyFormatRevisionNumber']
|
||||
raw_revision = binary_reel_header['SegyFormatRevisionNumber']
|
||||
return canonicalize_revision(raw_revision)
|
||||
|
||||
|
||||
def bytes_per_sample(reel_header, revision):
|
||||
def num_extended_textual_headers(binary_reel_header):
|
||||
"""Obtain the number of 3200 byte extended textual file headers.
|
||||
|
||||
A value of zero indicates there are no Extended Textual File Header records
|
||||
(i.e. this file has no Extended Textual File Header(s)). A value of -1 indicates
|
||||
that there are a variable number of Extended Textual File Header records and the
|
||||
end of the Extended Textual File Header is denoted by an ((SEG: EndText)) stanza
|
||||
in the final record. A positive value indicates that there are exactly that many
|
||||
Extended Textual File Header records.
|
||||
"""
|
||||
num_ext_headers = binary_reel_header['NumberOfExtTextualHeaders']
|
||||
return num_ext_headers
|
||||
|
||||
|
||||
def bytes_per_sample(binary_reel_header, revision):
|
||||
"""Determine the number of bytes per sample from the reel header.
|
||||
|
||||
Args:
|
||||
reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_reel_header()
|
||||
binary_reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_binary_reel_header()
|
||||
|
||||
revision: One of the constants revisions.SEGY_REVISION_0 or
|
||||
revisions.SEGY_REVISION_1
|
||||
@@ -46,12 +62,12 @@ def bytes_per_sample(reel_header, revision):
|
||||
Returns:
|
||||
An integer number of bytes per sample.
|
||||
"""
|
||||
dsf = reel_header['DataSampleFormat']
|
||||
dsf = binary_reel_header['DataSampleFormat']
|
||||
bps = HEADER_DEF["DataSampleFormat"]["bps"][revision][dsf]
|
||||
return bps
|
||||
|
||||
|
||||
def samples_per_trace(reel_header):
|
||||
def samples_per_trace(binary_reel_header):
|
||||
"""Determine the number of samples per trace from the reel header.
|
||||
|
||||
Note: There is no requirement for all traces to be of the same length,
|
||||
@@ -61,16 +77,16 @@ def samples_per_trace(reel_header):
|
||||
trace headers.
|
||||
|
||||
Args:
|
||||
reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_reel_header()
|
||||
binary_reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_binary_reel_header()
|
||||
|
||||
Returns:
|
||||
An integer number of samples per trace
|
||||
"""
|
||||
return reel_header['ns']
|
||||
return binary_reel_header['ns']
|
||||
|
||||
|
||||
def trace_length_bytes(reel_header, bps):
|
||||
def trace_length_bytes(binary_reel_header, bps):
|
||||
"""Determine the trace length in bytes from the reel header.
|
||||
|
||||
Note: There is no requirement for all traces to be of the same length,
|
||||
@@ -80,22 +96,51 @@ def trace_length_bytes(reel_header, bps):
|
||||
trace headers.
|
||||
|
||||
Args:
|
||||
reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_reel_header()
|
||||
binary_reel_header: A dictionary containing a reel header, such as obtained
|
||||
from read_binary_reel_header()
|
||||
|
||||
bps: The number of bytes per sample, such as obtained from a call to
|
||||
bytes_per_sample()
|
||||
|
||||
"""
|
||||
return samples_per_trace(reel_header) * bps + TRACE_HEADER_NUM_BYTES
|
||||
return samples_per_trace(binary_reel_header) * bps + TRACE_HEADER_NUM_BYTES
|
||||
|
||||
|
||||
def read_reel_header(fh, endian='>'):
|
||||
"""Read the SEG Y reel header, also known as the binary header.
|
||||
def read_textual_reel_header(fh, encoding=None):
|
||||
"""Read the SEG Y card image header, also known as the textual header
|
||||
|
||||
Args:
|
||||
fh: A file-like-object open in binary mode positioned such that the
|
||||
beginning of the reel header will be the next byte to be read.
|
||||
fh: A file-like object open in binary mode positioned such that the
|
||||
beginning of the textual header will be the next byte to read.
|
||||
|
||||
encoding: Optional encoding of the header in the file. If None (the
|
||||
default) a reliable heuristic will be used to guess the encoding.
|
||||
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
|
||||
|
||||
Returns:
|
||||
A tuple of forty Unicode strings (Python 2: unicode, Python 3: str)
|
||||
containing the transcoded header data.
|
||||
"""
|
||||
raw_header = fh.read(TEXTUAL_HEADER_NUM_BYTES)
|
||||
|
||||
num_bytes_read = len(raw_header)
|
||||
if num_bytes_read < TEXTUAL_HEADER_NUM_BYTES:
|
||||
raise EOFError("Only {} bytes of {} byte textual reel header could be read"
|
||||
.format(num_bytes_read, TEXTUAL_HEADER_NUM_BYTES))
|
||||
|
||||
if encoding is None:
|
||||
encoding = guess_encoding(raw_header)
|
||||
|
||||
lines = tuple(bytes(raw_line).decode(encoding) for raw_line in batched(raw_header, 80))
|
||||
return lines
|
||||
|
||||
|
||||
def read_binary_reel_header(fh, endian='>'):
|
||||
"""Read the SEG Y binary reel header.
|
||||
|
||||
Args:
|
||||
fh: A file-like object open in binary mode. Binary header is assumed to
|
||||
be at an offset of 3200 bytes from the beginning of the file.
|
||||
|
||||
endian: '>' for big-endian data (the standard and default), '<' for
|
||||
little-endian (non-standard)
|
||||
@@ -109,6 +154,114 @@ def read_reel_header(fh, endian='>'):
|
||||
return reel_header
|
||||
|
||||
|
||||
|
||||
def has_end_text_stanza(ext_header):
|
||||
"""Determine whether the header is the end text stanza.
|
||||
|
||||
Args:
|
||||
ext_header: A sequence of forty 80 character Unicode strings.
|
||||
|
||||
Returns:
|
||||
True if the header is the SEG Y Revision 1 end text header,
|
||||
otherwise False.
|
||||
"""
|
||||
return "((SEG: EndText))" in ext_header[0]
|
||||
|
||||
|
||||
def read_extended_headers_until_end(fh, encoding):
|
||||
"""Read an unspecified number of extended textual headers, until the end-text header is found.
|
||||
|
||||
Args:
|
||||
fh: A file-like object open in binary mode. The first of any extended textual headers
|
||||
is assumed to be at an offset of 3600 bytes from the beginning of the file
|
||||
(immediately following the binary reel header).
|
||||
|
||||
encoding: Optional encoding of the header in the file. If None (the
|
||||
default) a reliable heuristic will be used to guess the encoding.
|
||||
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
|
||||
|
||||
Returns:
|
||||
A list of tuples each containing forty 80-character Unicode strings.
|
||||
"""
|
||||
extended_headers = []
|
||||
while True:
|
||||
ext_header = read_textual_reel_header(fh, encoding)
|
||||
if has_end_text_stanza(ext_header):
|
||||
break
|
||||
extended_headers.append(ext_header)
|
||||
return extended_headers
|
||||
|
||||
|
||||
def read_extended_headers_counted(fh, num_expected, encoding):
|
||||
"""Read a specified number of extended textual headers.
|
||||
|
||||
If an end-text stanza is located prematurely (in anything other than the last expected header)
|
||||
reading will be terminated and a warning logged.
|
||||
|
||||
Args:
|
||||
fh: A file-like object open in binary mode. The first of any extended textual headers
|
||||
is assumed to be at an offset of 3600 bytes from the beginning of the file
|
||||
(immediately following the binary reel header).
|
||||
|
||||
num_expected: A non-negative integer of headers.
|
||||
|
||||
encoding: Optional encoding of the header in the file. If None (the
|
||||
default) a reliable heuristic will be used to guess the encoding.
|
||||
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
|
||||
|
||||
Returns:
|
||||
A list of tuples each containing forty 80-character Unicode strings.
|
||||
"""
|
||||
assert num_expected >= 0
|
||||
extended_headers = []
|
||||
for i in range(num_expected):
|
||||
ext_header = read_textual_reel_header(fh, encoding)
|
||||
if has_end_text_stanza(ext_header):
|
||||
if i != num_expected - 1:
|
||||
print("Unexpected end-text extended header") # TODO: Log this
|
||||
break
|
||||
extended_headers.append(ext_header)
|
||||
|
||||
return extended_headers
|
||||
|
||||
|
||||
def read_extended_textual_headers(fh, binary_reel_header, encoding):
|
||||
"""Read any extended textual reel headers.
|
||||
|
||||
Args:
|
||||
fh: A file-like object open in binary mode. The first of any extended textual headers
|
||||
is assumed to be at an offset of 3600 bytes from the beginning of the file
|
||||
(immediately following the binary reel header).
|
||||
|
||||
binary_reel_header: A dictionary containing data read from the binary
|
||||
reel header by the read_binary_reel_header() function.
|
||||
|
||||
encoding: Optional encoding of the header in the file. If None (the
|
||||
default) a reliable heuristic will be used to guess the encoding.
|
||||
Typically 'cp037' for EBCDIC or 'ascii' for ASCII.
|
||||
|
||||
Returns:
|
||||
A Unicode string containing the concatenated contents of any extended headers. If there
|
||||
were no extended headers, the string will be empty.
|
||||
|
||||
Postcondition:
|
||||
As a post-condition to this function, the file-pointer of fh will be
|
||||
positioned immediately after the last extended textual header, which
|
||||
should be the start of the first trace header.
|
||||
"""
|
||||
fh.seek(REEL_HEADER_NUM_BYTES)
|
||||
declared_num_ext_headers = num_extended_textual_headers(binary_reel_header)
|
||||
extended_headers = []
|
||||
if declared_num_ext_headers == -1:
|
||||
extended_headers.extend(read_extended_headers_until_end(fh, encoding))
|
||||
elif declared_num_ext_headers > 0:
|
||||
extended_headers.extend(read_extended_headers_counted(fh, declared_num_ext_headers, encoding))
|
||||
|
||||
# Concatenate the extended headers
|
||||
extended_textual_header = ''.join(line for header in extended_headers for line in header).strip(' ')
|
||||
return extended_textual_header
|
||||
|
||||
|
||||
_READ_PROPORTION = 0.75 # The proportion of time spent in catalog_traces
|
||||
# reading the file. Determined empirically.
|
||||
|
||||
@@ -134,7 +287,8 @@ def catalog_traces(fh, bps, endian='>', progress=None):
|
||||
trace index.
|
||||
|
||||
Args:
|
||||
fh: A file-like-object open in binary mode.
|
||||
fh: A file-like-object open in binary mode, positioned at the
|
||||
start of the first trace header.
|
||||
|
||||
bps: The number of bytes per sample, such as obtained by a call
|
||||
to bytes_per_sample()
|
||||
@@ -164,11 +318,12 @@ def catalog_traces(fh, bps, endian='>', progress=None):
|
||||
|
||||
length = file_length(fh)
|
||||
|
||||
pos_begin = REEL_HEADER_NUM_BYTES
|
||||
pos_begin = fh.tell()
|
||||
|
||||
trace_offset_catalog_builder = CatalogBuilder()
|
||||
trace_length_catalog_builder = CatalogBuilder()
|
||||
line_catalog_builder = CatalogBuilder()
|
||||
alt_line_catalog_builder = CatalogBuilder()
|
||||
cdp_catalog_builder = CatalogBuilder()
|
||||
|
||||
for trace_number in itertools.count():
|
||||
@@ -186,6 +341,9 @@ def catalog_traces(fh, bps, endian='>', progress=None):
|
||||
line_catalog_builder.add((trace_header.Inline3D,
|
||||
trace_header.Crossline3D),
|
||||
trace_number)
|
||||
alt_line_catalog_builder.add((trace_header.TraceSequenceFile,
|
||||
trace_header.cdp),
|
||||
trace_number)
|
||||
cdp_catalog_builder.add(trace_header.cdp, trace_number)
|
||||
pos_end = pos_begin + TRACE_HEADER_NUM_BYTES + samples_bytes
|
||||
pos_begin = pos_end
|
||||
@@ -202,6 +360,12 @@ def catalog_traces(fh, bps, endian='>', progress=None):
|
||||
progress_callback(_READ_PROPORTION + (_READ_PROPORTION * 3 / 4))
|
||||
|
||||
line_catalog = line_catalog_builder.create()
|
||||
|
||||
if line_catalog is None:
|
||||
# Some 3D files put Inline and Crossline numbers in (TraceSequenceFile, cdp) pair
|
||||
line_catalog = alt_line_catalog_builder.create()
|
||||
|
||||
|
||||
progress_callback(1)
|
||||
|
||||
return (trace_offset_catalog,
|
||||
@@ -209,6 +373,7 @@ def catalog_traces(fh, bps, endian='>', progress=None):
|
||||
cdp_catalog,
|
||||
line_catalog)
|
||||
|
||||
|
||||
def read_trace_header(fh, trace_header_format, pos=None):
|
||||
"""Read a trace header.
|
||||
|
||||
@@ -307,6 +472,21 @@ def unpack_values(buf, count, item_size, fmt, endian='>'):
|
||||
# swapping ourselves.
|
||||
|
||||
|
||||
def write_binary_reel_header(fh, binary_reel_header, endian='>'):
|
||||
"""Write the binary_reel_header to the given file-like object.
|
||||
|
||||
Args:
|
||||
fh: A file-like object open in binary mode for writing.
|
||||
|
||||
binary_reel_header: A dictionary of
|
||||
"""
|
||||
for key in HEADER_DEF:
|
||||
pos = HEADER_DEF[key]['pos']
|
||||
ctype = HEADER_DEF[key]['type']
|
||||
value = binary_reel_header[key] if key in binary_reel_header else HEADER_DEF[key]['def']
|
||||
write_binary_values(fh, [value], ctype, pos)
|
||||
|
||||
|
||||
def write_trace_header(fh, trace_header, trace_header_format, pos=None):
|
||||
"""Write a TraceHeader to file.
|
||||
|
||||
@@ -385,6 +565,11 @@ def pack_values(values, fmt, endian='>'):
|
||||
return struct.pack(c_format, *values)
|
||||
|
||||
|
||||
# TODO: Consider generalising the below to also produce a ReelHeader record. Then modify
|
||||
# read_binary_reel_header() to return such a record, and write_binary_reel_header() to accept such
|
||||
# a record.
|
||||
|
||||
|
||||
_TraceAttributeSpec = namedtuple('Record', ['name', 'pos', 'type'])
|
||||
|
||||
|
||||
@@ -440,3 +625,5 @@ def _compile_trace_header_record():
|
||||
|
||||
|
||||
TraceHeader = _compile_trace_header_record()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user