mirror of
https://github.com/wassname/seq2seq-time.git
synced 2026-06-27 16:46:54 +08:00
tidy data
This commit is contained in:
@@ -8,7 +8,7 @@ PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
|
||||
PROFILE = default
|
||||
PROJECT_NAME = seq2seq-time
|
||||
PYTHON_INTERPRETER = seq2seq-time
|
||||
PYTHON_INTERPRETER = python
|
||||
|
||||
#################################################################################
|
||||
# COMMANDS #
|
||||
@@ -70,7 +70,7 @@ test:
|
||||
doc_reqs:
|
||||
conda env export --no-builds --from-history --name $(PROJECT_NAME) > requirements/environment.min.yaml
|
||||
conda env export --name $(PROJECT_NAME) > requirements/environment.max.yaml
|
||||
$(PYTHON_INTERPRETER) -m pip freeze > requirements/requirements.txt --name
|
||||
$(PYTHON_INTERPRETER) -m pip freeze > requirements/requirements.txt
|
||||
|
||||
#################################################################################
|
||||
# Self Documenting Commands #
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,725 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here I download and preprocess current data\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"see\n",
|
||||
"- from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c\n",
|
||||
"- http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:39:41.682896Z",
|
||||
"start_time": "2020-10-26T03:39:40.104951Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"import xarray as xr\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from urllib import request\n",
|
||||
"import os, shutil\n",
|
||||
"from matplotlib import pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-25T07:01:29.004548Z",
|
||||
"start_time": "2020-10-25T07:01:29.001734Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:39:42.553735Z",
|
||||
"start_time": "2020-10-26T03:39:41.685439Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from torchvision.datasets.utils import download_url, extract_archive, download_and_extract_archive"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:39:42.560919Z",
|
||||
"start_time": "2020-10-26T03:39:42.556898Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uptide\n",
|
||||
"\n",
|
||||
"# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis\n",
|
||||
"default_tidal_constituents = [\n",
|
||||
" 'M2', 'S2', 'N2', 'K2', # Semi-diurnal\n",
|
||||
" 'K1', 'O1', 'P1', 'Q1', # Diurnal\n",
|
||||
" 'M4', 'M6', 'S4', 'MK3', # Short period\n",
|
||||
" 'MM', 'SSA', 'SA' # Long period\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"def generate_tidal_periods(t:pd.Series, constituents:list=default_tidal_constituents):\n",
|
||||
" tide = uptide.Tides(constituents)\n",
|
||||
" t0 = t[0]\n",
|
||||
" td = t-t0\n",
|
||||
" td = td.dt.total_seconds().to_numpy().astype(int)\n",
|
||||
" tide.set_initial_time(t0)\n",
|
||||
"\n",
|
||||
" # calc tides\n",
|
||||
" amplitudes=np.ones_like(td)\n",
|
||||
" phases=np.zeros_like(td)\n",
|
||||
" eta = {}\n",
|
||||
" for name, f, amplitude, omega, phase, phi, u in zip(tide.constituents, tide.f, amplitudes, tide.omega,\n",
|
||||
" phases, tide.phi, tide.u):\n",
|
||||
" eta[name] = f*amplitude*np.cos(omega*td-phase+phi+u)\n",
|
||||
" df_eta = pd.DataFrame(eta, index=t)\n",
|
||||
" return df_eta"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:59:16.671092Z",
|
||||
"start_time": "2020-10-26T03:59:16.655680Z"
|
||||
},
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 'ANMN Two Rocks, WA, 204m mooring, Jul2009 - Dec2009. Preprocessed with DepthPP.'\n",
|
||||
"\n",
|
||||
"def get_current_timeseries(\n",
|
||||
" cache_folder=Path(\"../data/raw/IMOS_ANMN/\"), \n",
|
||||
" outfile=Path('../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc')\n",
|
||||
" ):\n",
|
||||
" if not outfile.exists():\n",
|
||||
"\n",
|
||||
" files = [\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20090715T080000Z_WATR20_FV01_WATR20-0907-Continental-194_END-20090716T181317Z_C-20191122T052830Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20100409T080000Z_WATR20_FV01_WATR20-1004-Continental-194_END-20100430T084500Z_C-20191122T053845Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20101222T080000Z_WATR20_FV01_WATR20-1012-Continental-194_END-20110518T051500Z_C-20200916T020035Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20110608T080000Z_WATR20_FV01_WATR20-1106-Continental-194_END-20111122T035000Z_C-20200916T025619Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20111221T060300Z_WATR20_FV01_WATR20-1112-Continental-194_END-20120704T050500Z_C-20200916T043212Z.nc\", \n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20120726T044000Z_WATR20_FV01_WATR20-1207-Continental-194_END-20130204T044000Z_C-20200916T032027Z.nc\",\n",
|
||||
"\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20130221T080000Z_WATR20_FV01_WATR20-1302-Continental-194_END-20131003T035000Z_C-20180529T020609Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20131111T080000Z_WATR20_FV01_WATR20-1311-Continental-194_END-20140519T035000Z_C-20200114T033335Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20140710T080000Z_WATR20_FV01_WATR20-1407-Continental-194_END-20150121T021500Z_C-20180529T055902Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20150213T080000Z_WATR20_FV01_WATR20-1502-Continental-194_END-20150424T134002Z_C-20200114T035347Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20150914T080000Z_WATR20_FV01_WATR20-1509-Continental-194_END-20160331T043000Z_C-20180601T013623Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20160427T080000Z_WATR20_FV01_WATR20-1604-Continental-194_END-20160531T021800Z_C-20180531T071709Z.nc\",\n",
|
||||
" # \"IMOS_ANMN-WA_AETVZ_20170512T080000Z_WATR20_FV01_WATR20-1705-Continental-194_END-20170717T014558Z_C-20190805T004647Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20171204T080000Z_WATR20_FV01_WATR20-1712-Continental-194_END-20180618T030000Z_C-20180620T233149Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20180802T080000Z_WATR20_FV01_WATR20-1807-Continental-194_END-20190225T054500Z_C-20190227T001343Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20190307T080000Z_WATR20_FV01_WATR20-1903-Continental-194_END-20190911T003144Z_C-20200114T045053Z.nc\",\n",
|
||||
" \"IMOS_ANMN-WA_AETVZ_20190926T080000Z_WATR20_FV01_WATR20-1909-Continental-194_END-20200326T030000Z_C-20200420T064334Z.nc\",\n",
|
||||
" ]\n",
|
||||
" base=\"http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/\"\n",
|
||||
"\n",
|
||||
" # Download files\n",
|
||||
" [download_url(base+f, cache_folder) for f in files]\n",
|
||||
"\n",
|
||||
" # load and merge\n",
|
||||
" xds=[xr.open_dataset(cache_folder/f) for f in files]\n",
|
||||
" vars=['VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL', 'PITCH']\n",
|
||||
" xds2= [x[vars].isel(HEIGHT_ABOVE_SENSOR=18) for x in xds]\n",
|
||||
" xd = xr.concat(xds2, dim='TIME')\n",
|
||||
" xd = xd.where(xd.DEPTH>150) # remove outliers\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" xd['TIME'] = xd['TIME'].dt.round('10T')\n",
|
||||
" xd = xd.dropna(dim='TIME', subset=['VCUR', 'UCUR', 'WCUR'])\n",
|
||||
" # xd = xd.resample(TIME='30T').first()\n",
|
||||
" # Add tides, these are features that can be forecast\n",
|
||||
"\n",
|
||||
" # Generate tidal freqs\n",
|
||||
" t = xd.TIME.to_series()\n",
|
||||
" df_eta = generate_tidal_periods(t)\n",
|
||||
"\n",
|
||||
" # Add tidal freqs\n",
|
||||
" xd = xd.merge(df_eta)\n",
|
||||
"\n",
|
||||
" # Cache to nc\n",
|
||||
" xd.to_netcdf(outfile)\n",
|
||||
" print(f'wrote \"{outfile}\" with size {outfile.stat().st_size*1e-6:2.2f} MB')\n",
|
||||
" return outfile"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T04:04:08.230047Z",
|
||||
"start_time": "2020-10-26T04:04:08.099310Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>VCUR</th>\n",
|
||||
" <th>UCUR</th>\n",
|
||||
" <th>WCUR</th>\n",
|
||||
" <th>TEMP</th>\n",
|
||||
" <th>PRES_REL</th>\n",
|
||||
" <th>DEPTH</th>\n",
|
||||
" <th>ROLL</th>\n",
|
||||
" <th>PITCH</th>\n",
|
||||
" <th>LATITUDE</th>\n",
|
||||
" <th>LONGITUDE</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>O1</th>\n",
|
||||
" <th>P1</th>\n",
|
||||
" <th>Q1</th>\n",
|
||||
" <th>M4</th>\n",
|
||||
" <th>M6</th>\n",
|
||||
" <th>S4</th>\n",
|
||||
" <th>MK3</th>\n",
|
||||
" <th>MM</th>\n",
|
||||
" <th>SSA</th>\n",
|
||||
" <th>SA</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>TIME</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>2009-07-15 08:00:00</th>\n",
|
||||
" <td>-0.396391</td>\n",
|
||||
" <td>0.089687</td>\n",
|
||||
" <td>-0.009671</td>\n",
|
||||
" <td>18.549999</td>\n",
|
||||
" <td>205.076004</td>\n",
|
||||
" <td>203.550812</td>\n",
|
||||
" <td>4.6</td>\n",
|
||||
" <td>-3.4</td>\n",
|
||||
" <td>-31.728650</td>\n",
|
||||
" <td>115.037217</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.286288</td>\n",
|
||||
" <td>0.116457</td>\n",
|
||||
" <td>-1.014973</td>\n",
|
||||
" <td>-0.146817</td>\n",
|
||||
" <td>-0.801534</td>\n",
|
||||
" <td>-0.500000</td>\n",
|
||||
" <td>0.370082</td>\n",
|
||||
" <td>0.132683</td>\n",
|
||||
" <td>-0.686775</td>\n",
|
||||
" <td>-0.395743</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2009-07-15 08:10:00</th>\n",
|
||||
" <td>-0.407620</td>\n",
|
||||
" <td>0.085398</td>\n",
|
||||
" <td>-0.019875</td>\n",
|
||||
" <td>18.650000</td>\n",
|
||||
" <td>205.078003</td>\n",
|
||||
" <td>203.552795</td>\n",
|
||||
" <td>4.6</td>\n",
|
||||
" <td>-2.4</td>\n",
|
||||
" <td>-31.728650</td>\n",
|
||||
" <td>115.037217</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.242810</td>\n",
|
||||
" <td>0.159551</td>\n",
|
||||
" <td>-1.031149</td>\n",
|
||||
" <td>-0.304345</td>\n",
|
||||
" <td>-0.900573</td>\n",
|
||||
" <td>-0.642788</td>\n",
|
||||
" <td>0.494417</td>\n",
|
||||
" <td>0.134147</td>\n",
|
||||
" <td>-0.686601</td>\n",
|
||||
" <td>-0.395853</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2009-07-15 08:20:00</th>\n",
|
||||
" <td>-0.365314</td>\n",
|
||||
" <td>0.104038</td>\n",
|
||||
" <td>0.000991</td>\n",
|
||||
" <td>18.730000</td>\n",
|
||||
" <td>205.076996</td>\n",
|
||||
" <td>203.551788</td>\n",
|
||||
" <td>4.8</td>\n",
|
||||
" <td>-2.7</td>\n",
|
||||
" <td>-31.728650</td>\n",
|
||||
" <td>115.037217</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.198932</td>\n",
|
||||
" <td>0.202343</td>\n",
|
||||
" <td>-1.045759</td>\n",
|
||||
" <td>-0.453239</td>\n",
|
||||
" <td>-0.942304</td>\n",
|
||||
" <td>-0.766044</td>\n",
|
||||
" <td>0.610654</td>\n",
|
||||
" <td>0.135610</td>\n",
|
||||
" <td>-0.686427</td>\n",
|
||||
" <td>-0.395963</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2009-07-15 08:30:00</th>\n",
|
||||
" <td>-0.406632</td>\n",
|
||||
" <td>0.119376</td>\n",
|
||||
" <td>-0.003729</td>\n",
|
||||
" <td>18.799999</td>\n",
|
||||
" <td>205.067001</td>\n",
|
||||
" <td>203.541901</td>\n",
|
||||
" <td>4.7</td>\n",
|
||||
" <td>-2.4</td>\n",
|
||||
" <td>-31.728650</td>\n",
|
||||
" <td>115.037217</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.154727</td>\n",
|
||||
" <td>0.244751</td>\n",
|
||||
" <td>-1.058780</td>\n",
|
||||
" <td>-0.589276</td>\n",
|
||||
" <td>-0.924071</td>\n",
|
||||
" <td>-0.866025</td>\n",
|
||||
" <td>0.716890</td>\n",
|
||||
" <td>0.137073</td>\n",
|
||||
" <td>-0.686253</td>\n",
|
||||
" <td>-0.396072</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2009-07-15 08:40:00</th>\n",
|
||||
" <td>-0.383744</td>\n",
|
||||
" <td>0.090066</td>\n",
|
||||
" <td>-0.008921</td>\n",
|
||||
" <td>18.860001</td>\n",
|
||||
" <td>205.065994</td>\n",
|
||||
" <td>203.540894</td>\n",
|
||||
" <td>4.9</td>\n",
|
||||
" <td>-2.9</td>\n",
|
||||
" <td>-31.728650</td>\n",
|
||||
" <td>115.037217</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.110268</td>\n",
|
||||
" <td>0.286697</td>\n",
|
||||
" <td>-1.070194</td>\n",
|
||||
" <td>-0.708598</td>\n",
|
||||
" <td>-0.847034</td>\n",
|
||||
" <td>-0.939693</td>\n",
|
||||
" <td>0.811384</td>\n",
|
||||
" <td>0.138535</td>\n",
|
||||
" <td>-0.686080</td>\n",
|
||||
" <td>-0.396182</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2020-03-26 01:00:00</th>\n",
|
||||
" <td>-0.436635</td>\n",
|
||||
" <td>-0.784922</td>\n",
|
||||
" <td>-0.012147</td>\n",
|
||||
" <td>16.610001</td>\n",
|
||||
" <td>197.384003</td>\n",
|
||||
" <td>195.919662</td>\n",
|
||||
" <td>-2.9</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>-31.728717</td>\n",
|
||||
" <td>115.042133</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>-0.734741</td>\n",
|
||||
" <td>0.190139</td>\n",
|
||||
" <td>0.964792</td>\n",
|
||||
" <td>0.882484</td>\n",
|
||||
" <td>0.770444</td>\n",
|
||||
" <td>0.505439</td>\n",
|
||||
" <td>1.028587</td>\n",
|
||||
" <td>-0.881951</td>\n",
|
||||
" <td>0.990514</td>\n",
|
||||
" <td>0.997626</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2020-03-26 01:30:00</th>\n",
|
||||
" <td>-0.355067</td>\n",
|
||||
" <td>-0.845100</td>\n",
|
||||
" <td>-0.005201</td>\n",
|
||||
" <td>16.629999</td>\n",
|
||||
" <td>197.408005</td>\n",
|
||||
" <td>195.943497</td>\n",
|
||||
" <td>-2.7</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>-31.728717</td>\n",
|
||||
" <td>115.042133</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>-0.629257</td>\n",
|
||||
" <td>0.316317</td>\n",
|
||||
" <td>0.895545</td>\n",
|
||||
" <td>0.957914</td>\n",
|
||||
" <td>0.933774</td>\n",
|
||||
" <td>0.006292</td>\n",
|
||||
" <td>0.851981</td>\n",
|
||||
" <td>-0.880483</td>\n",
|
||||
" <td>0.990416</td>\n",
|
||||
" <td>0.997601</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2020-03-26 02:00:00</th>\n",
|
||||
" <td>-0.568277</td>\n",
|
||||
" <td>-0.816935</td>\n",
|
||||
" <td>-0.024944</td>\n",
|
||||
" <td>16.660000</td>\n",
|
||||
" <td>197.412994</td>\n",
|
||||
" <td>195.948425</td>\n",
|
||||
" <td>-2.6</td>\n",
|
||||
" <td>2.9</td>\n",
|
||||
" <td>-31.728717</td>\n",
|
||||
" <td>115.042133</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>-0.514470</td>\n",
|
||||
" <td>0.437113</td>\n",
|
||||
" <td>0.814067</td>\n",
|
||||
" <td>0.793395</td>\n",
|
||||
" <td>0.584762</td>\n",
|
||||
" <td>-0.494541</td>\n",
|
||||
" <td>0.551159</td>\n",
|
||||
" <td>-0.878996</td>\n",
|
||||
" <td>0.990316</td>\n",
|
||||
" <td>0.997576</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2020-03-26 02:30:00</th>\n",
|
||||
" <td>-0.306141</td>\n",
|
||||
" <td>-0.773147</td>\n",
|
||||
" <td>-0.028096</td>\n",
|
||||
" <td>16.719999</td>\n",
|
||||
" <td>197.419006</td>\n",
|
||||
" <td>195.954407</td>\n",
|
||||
" <td>-2.6</td>\n",
|
||||
" <td>2.7</td>\n",
|
||||
" <td>-31.728717</td>\n",
|
||||
" <td>115.042133</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>-0.392074</td>\n",
|
||||
" <td>0.550470</td>\n",
|
||||
" <td>0.721473</td>\n",
|
||||
" <td>0.430136</td>\n",
|
||||
" <td>-0.085096</td>\n",
|
||||
" <td>-0.862862</td>\n",
|
||||
" <td>0.169980</td>\n",
|
||||
" <td>-0.877489</td>\n",
|
||||
" <td>0.990217</td>\n",
|
||||
" <td>0.997551</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2020-03-26 03:00:00</th>\n",
|
||||
" <td>-0.218563</td>\n",
|
||||
" <td>-0.757217</td>\n",
|
||||
" <td>0.013233</td>\n",
|
||||
" <td>16.790001</td>\n",
|
||||
" <td>197.429001</td>\n",
|
||||
" <td>195.964340</td>\n",
|
||||
" <td>-2.8</td>\n",
|
||||
" <td>2.8</td>\n",
|
||||
" <td>-31.728717</td>\n",
|
||||
" <td>115.042133</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>-0.263881</td>\n",
|
||||
" <td>0.654460</td>\n",
|
||||
" <td>0.619026</td>\n",
|
||||
" <td>-0.040868</td>\n",
|
||||
" <td>-0.708264</td>\n",
|
||||
" <td>-0.999980</td>\n",
|
||||
" <td>-0.235982</td>\n",
|
||||
" <td>-0.875962</td>\n",
|
||||
" <td>0.990116</td>\n",
|
||||
" <td>0.997526</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>239075 rows × 25 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" VCUR UCUR WCUR TEMP PRES_REL \\\n",
|
||||
"TIME \n",
|
||||
"2009-07-15 08:00:00 -0.396391 0.089687 -0.009671 18.549999 205.076004 \n",
|
||||
"2009-07-15 08:10:00 -0.407620 0.085398 -0.019875 18.650000 205.078003 \n",
|
||||
"2009-07-15 08:20:00 -0.365314 0.104038 0.000991 18.730000 205.076996 \n",
|
||||
"2009-07-15 08:30:00 -0.406632 0.119376 -0.003729 18.799999 205.067001 \n",
|
||||
"2009-07-15 08:40:00 -0.383744 0.090066 -0.008921 18.860001 205.065994 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"2020-03-26 01:00:00 -0.436635 -0.784922 -0.012147 16.610001 197.384003 \n",
|
||||
"2020-03-26 01:30:00 -0.355067 -0.845100 -0.005201 16.629999 197.408005 \n",
|
||||
"2020-03-26 02:00:00 -0.568277 -0.816935 -0.024944 16.660000 197.412994 \n",
|
||||
"2020-03-26 02:30:00 -0.306141 -0.773147 -0.028096 16.719999 197.419006 \n",
|
||||
"2020-03-26 03:00:00 -0.218563 -0.757217 0.013233 16.790001 197.429001 \n",
|
||||
"\n",
|
||||
" DEPTH ROLL PITCH LATITUDE LONGITUDE ... \\\n",
|
||||
"TIME ... \n",
|
||||
"2009-07-15 08:00:00 203.550812 4.6 -3.4 -31.728650 115.037217 ... \n",
|
||||
"2009-07-15 08:10:00 203.552795 4.6 -2.4 -31.728650 115.037217 ... \n",
|
||||
"2009-07-15 08:20:00 203.551788 4.8 -2.7 -31.728650 115.037217 ... \n",
|
||||
"2009-07-15 08:30:00 203.541901 4.7 -2.4 -31.728650 115.037217 ... \n",
|
||||
"2009-07-15 08:40:00 203.540894 4.9 -2.9 -31.728650 115.037217 ... \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"2020-03-26 01:00:00 195.919662 -2.9 3.0 -31.728717 115.042133 ... \n",
|
||||
"2020-03-26 01:30:00 195.943497 -2.7 3.0 -31.728717 115.042133 ... \n",
|
||||
"2020-03-26 02:00:00 195.948425 -2.6 2.9 -31.728717 115.042133 ... \n",
|
||||
"2020-03-26 02:30:00 195.954407 -2.6 2.7 -31.728717 115.042133 ... \n",
|
||||
"2020-03-26 03:00:00 195.964340 -2.8 2.8 -31.728717 115.042133 ... \n",
|
||||
"\n",
|
||||
" O1 P1 Q1 M4 M6 \\\n",
|
||||
"TIME \n",
|
||||
"2009-07-15 08:00:00 0.286288 0.116457 -1.014973 -0.146817 -0.801534 \n",
|
||||
"2009-07-15 08:10:00 0.242810 0.159551 -1.031149 -0.304345 -0.900573 \n",
|
||||
"2009-07-15 08:20:00 0.198932 0.202343 -1.045759 -0.453239 -0.942304 \n",
|
||||
"2009-07-15 08:30:00 0.154727 0.244751 -1.058780 -0.589276 -0.924071 \n",
|
||||
"2009-07-15 08:40:00 0.110268 0.286697 -1.070194 -0.708598 -0.847034 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"2020-03-26 01:00:00 -0.734741 0.190139 0.964792 0.882484 0.770444 \n",
|
||||
"2020-03-26 01:30:00 -0.629257 0.316317 0.895545 0.957914 0.933774 \n",
|
||||
"2020-03-26 02:00:00 -0.514470 0.437113 0.814067 0.793395 0.584762 \n",
|
||||
"2020-03-26 02:30:00 -0.392074 0.550470 0.721473 0.430136 -0.085096 \n",
|
||||
"2020-03-26 03:00:00 -0.263881 0.654460 0.619026 -0.040868 -0.708264 \n",
|
||||
"\n",
|
||||
" S4 MK3 MM SSA SA \n",
|
||||
"TIME \n",
|
||||
"2009-07-15 08:00:00 -0.500000 0.370082 0.132683 -0.686775 -0.395743 \n",
|
||||
"2009-07-15 08:10:00 -0.642788 0.494417 0.134147 -0.686601 -0.395853 \n",
|
||||
"2009-07-15 08:20:00 -0.766044 0.610654 0.135610 -0.686427 -0.395963 \n",
|
||||
"2009-07-15 08:30:00 -0.866025 0.716890 0.137073 -0.686253 -0.396072 \n",
|
||||
"2009-07-15 08:40:00 -0.939693 0.811384 0.138535 -0.686080 -0.396182 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"2020-03-26 01:00:00 0.505439 1.028587 -0.881951 0.990514 0.997626 \n",
|
||||
"2020-03-26 01:30:00 0.006292 0.851981 -0.880483 0.990416 0.997601 \n",
|
||||
"2020-03-26 02:00:00 -0.494541 0.551159 -0.878996 0.990316 0.997576 \n",
|
||||
"2020-03-26 02:30:00 -0.862862 0.169980 -0.877489 0.990217 0.997551 \n",
|
||||
"2020-03-26 03:00:00 -0.999980 -0.235982 -0.875962 0.990116 0.997526 \n",
|
||||
"\n",
|
||||
"[239075 rows x 25 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"xd.to_dataframe().drop(columns=['HEIGHT_ABOVE_SENSOR', 'NOMINAL_DEPTH'])#.columns#[['VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL', 'PITCH']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:44:41.020269Z",
|
||||
"start_time": "2020-10-26T03:44:41.017322Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# for x in xds:\n",
|
||||
"# x.DEPTH.plot()\n",
|
||||
"# plt.ylim(190, 210)\n",
|
||||
"\n",
|
||||
"# plt.show()\n",
|
||||
"# for x in xds:\n",
|
||||
"# x.plot.scatter('LONGITUDE', 'LONGITUDE')\n",
|
||||
"# plt.show()\n",
|
||||
"\n",
|
||||
"# xd['VCUR'].plot(alpha=0.5)\n",
|
||||
"# xd['UCUR'].plot(alpha=0.5)\n",
|
||||
"# xd['WCUR'].plot(alpha=0.5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:51:16.821117Z",
|
||||
"start_time": "2020-10-26T03:51:16.606212Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:51:17.614829Z",
|
||||
"start_time": "2020-10-26T03:51:17.204376Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"PosixPath('../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc')"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-10-26T03:51:18.335001Z",
|
||||
"start_time": "2020-10-26T03:51:18.328504Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"43.107293"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "seq2seq-time",
|
||||
"language": "python",
|
||||
"name": "seq2seq-time"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.8"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "Table of Contents",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": false,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "219.011px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,190 @@
|
||||
absl-py @ file:///home/conda/feedstock_root/build_artifacts/absl-py_1602289403781/work
|
||||
aiohttp @ file:///tmp/build/80754af9/aiohttp_1602530305083/work
|
||||
appdirs==1.4.4
|
||||
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1602546578258/work
|
||||
async-generator==1.10
|
||||
async-timeout==3.0.1
|
||||
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1599308529326/work
|
||||
awscli @ file:///home/conda/feedstock_root/build_artifacts/awscli_1602890549104/work
|
||||
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
|
||||
backports.functools-lru-cache==1.6.1
|
||||
black @ file:///home/conda/feedstock_root/build_artifacts/black-recipe_1599478779128/work
|
||||
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1600454382015/work
|
||||
blinker==1.4
|
||||
bokeh @ file:///home/conda/feedstock_root/build_artifacts/bokeh_1602690186583/work
|
||||
botocore @ file:///home/conda/feedstock_root/build_artifacts/botocore_1602884371056/work
|
||||
bpe==1.0
|
||||
brotlipy==0.7.0
|
||||
cachetools @ file:///home/conda/feedstock_root/build_artifacts/cachetools_1593420445823/work
|
||||
certifi==2020.6.20
|
||||
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1602537219008/work
|
||||
cftime @ file:///home/conda/feedstock_root/build_artifacts/cftime_1602504440833/work
|
||||
chardet @ file:///home/conda/feedstock_root/build_artifacts/chardet_1602255309768/work
|
||||
click==7.1.2
|
||||
cloudpickle @ file:///home/conda/feedstock_root/build_artifacts/cloudpickle_1598400192773/work
|
||||
colorama==0.4.3
|
||||
colorcet==2.0.2
|
||||
confuse @ file:///home/conda/feedstock_root/build_artifacts/confuse_1593279073800/work
|
||||
cryptography @ file:///home/conda/feedstock_root/build_artifacts/cryptography_1602614063317/work
|
||||
cycler==0.10.0
|
||||
cytoolz==0.11.0
|
||||
dask @ file:///home/conda/feedstock_root/build_artifacts/dask-core_1602029610262/work
|
||||
datashader @ file:///home/conda/feedstock_root/build_artifacts/datashader_1597664023361/work
|
||||
datashape==0.5.4
|
||||
decorator==4.4.2
|
||||
defusedxml==0.6.0
|
||||
distributed @ file:///home/conda/feedstock_root/build_artifacts/distributed_1602493186453/work
|
||||
docutils==0.15.2
|
||||
entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1602701733603/work/dist/entrypoints-0.3-py2.py3-none-any.whl
|
||||
fastparquet @ file:///home/conda/feedstock_root/build_artifacts/fastparquet_1594909864671/work
|
||||
fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1602700749102/work
|
||||
future @ file:///home/conda/feedstock_root/build_artifacts/future_1602538316704/work
|
||||
google-auth @ file:///tmp/build/80754af9/google-auth_1601995530934/work
|
||||
google-auth-oauthlib==0.4.1
|
||||
grpcio @ file:///home/conda/feedstock_root/build_artifacts/grpcio_1596715635580/work
|
||||
HeapDict==1.0.1
|
||||
holoviews @ file:///home/conda/feedstock_root/build_artifacts/holoviews_1600439907620/work
|
||||
htmlmin==0.1.12
|
||||
hypothesis==4.32.3
|
||||
idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1593328102638/work
|
||||
ImageHash @ file:///home/conda/feedstock_root/build_artifacts/imagehash_1588182723834/work
|
||||
importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1600910428305/work
|
||||
iniconfig @ file:///tmp/build/80754af9/iniconfig_1602780191262/work
|
||||
ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1602682802500/work/dist/ipykernel-5.3.4-py3-none-any.whl
|
||||
ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1602640393953/work
|
||||
ipython-genutils==0.2.0
|
||||
ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1599554010055/work
|
||||
jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1602395235501/work
|
||||
Jinja2==2.11.2
|
||||
jmespath @ file:///home/conda/feedstock_root/build_artifacts/jmespath_1589369830981/work
|
||||
joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1601671685479/work
|
||||
jsonschema==3.2.0
|
||||
jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1598486169312/work
|
||||
jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1602537277085/work
|
||||
jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1601375948261/work
|
||||
kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/kiwisolver_1602517221725/work
|
||||
llvmlite==0.34.0
|
||||
locket==0.2.0
|
||||
Markdown @ file:///home/conda/feedstock_root/build_artifacts/markdown_1602544730470/work
|
||||
MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1602267316845/work
|
||||
matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1602600750896/work
|
||||
mccabe==0.6.1
|
||||
missingno==0.4.2
|
||||
mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1602381812692/work
|
||||
more-itertools @ file:///home/conda/feedstock_root/build_artifacts/more-itertools_1598643641143/work
|
||||
msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1602380760823/work
|
||||
multidict @ file:///tmp/build/80754af9/multidict_1600456400975/work
|
||||
multipledispatch==0.6.0
|
||||
mypy @ file:///home/conda/feedstock_root/build_artifacts/mypy_1602270162469/work
|
||||
mypy-extensions==0.4.3
|
||||
nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1602859080374/work
|
||||
nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1602715396354/work
|
||||
nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1602732862338/work
|
||||
nc-time-axis==1.2.0
|
||||
nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1601342677072/work
|
||||
netCDF4 @ file:///home/conda/feedstock_root/build_artifacts/netcdf4_1602508544050/work
|
||||
networkx @ file:///home/conda/feedstock_root/build_artifacts/networkx_1598210780226/work
|
||||
notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1602720128568/work
|
||||
numba @ file:///home/conda/feedstock_root/build_artifacts/numba_1599084798687/work
|
||||
numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1602429044575/work
|
||||
oauthlib==3.1.0
|
||||
olefile @ file:///home/conda/feedstock_root/build_artifacts/olefile_1602866521163/work
|
||||
packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1589925210001/work
|
||||
pandas @ file:///home/conda/feedstock_root/build_artifacts/pandas_1602502751364/work
|
||||
pandas-profiling @ file:///home/conda/feedstock_root/build_artifacts/pandas-profiling_1599137999474/work
|
||||
pandocfilters==1.4.2
|
||||
panel @ file:///home/conda/feedstock_root/build_artifacts/panel_1592920888719/work
|
||||
param==1.9.3
|
||||
parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1595548966091/work
|
||||
partd==1.1.0
|
||||
pathspec==0.8.0
|
||||
patsy==0.5.1
|
||||
pexpect==4.8.0
|
||||
phik @ file:///home/conda/feedstock_root/build_artifacts/phik_1590331950347/work
|
||||
pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602535628301/work
|
||||
Pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1602708615436/work
|
||||
pluggy @ file:///home/conda/feedstock_root/build_artifacts/pluggy_1602337415071/work
|
||||
prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1590412252446/work
|
||||
prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1602524994744/work
|
||||
protobuf==3.13.0
|
||||
psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1602264040045/work
|
||||
ptyprocess==0.6.0
|
||||
py @ file:///home/conda/feedstock_root/build_artifacts/py_1593088446458/work
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycodestyle @ file:///home/conda/feedstock_root/build_artifacts/pycodestyle_1589305246696/work
|
||||
pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1593275161868/work
|
||||
pyct @ file:///tmp/build/80754af9/pyct_1600458283986/work
|
||||
pydocstyle @ file:///home/conda/feedstock_root/build_artifacts/pydocstyle_1598747747227/work
|
||||
pyflakes==2.2.0
|
||||
Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1600347314331/work
|
||||
PyJWT==1.7.1
|
||||
pylama==7.7.1
|
||||
pyOpenSSL==19.1.0
|
||||
pyparsing==2.4.7
|
||||
PyQt5==5.12.3
|
||||
PyQt5-sip==4.19.18
|
||||
PyQtChart==5.12
|
||||
PyQtWebEngine==5.12.1
|
||||
pyrsistent @ file:///home/conda/feedstock_root/build_artifacts/pyrsistent_1602259985647/work
|
||||
PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1602326924965/work
|
||||
pytest==6.1.1
|
||||
python-dateutil==2.8.1
|
||||
pytorch-fast-transformers==0.3.0
|
||||
pytorch-lightning @ file:///home/conda/feedstock_root/build_artifacts/pytorch-lightning_1602786328955/work
|
||||
pytorch-lightning-bolts==0.2.5
|
||||
pytz==2020.1
|
||||
pyviz-comms @ file:///home/conda/feedstock_root/build_artifacts/pyviz_comms_1594121601757/work
|
||||
PyWavelets @ file:///home/conda/feedstock_root/build_artifacts/pywavelets_1602504439149/work
|
||||
PyYAML==5.3.1
|
||||
pyzmq==19.0.2
|
||||
regex @ file:///home/conda/feedstock_root/build_artifacts/regex_1602771401882/work
|
||||
requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1592425495151/work
|
||||
requests-oauthlib @ file:///home/conda/feedstock_root/build_artifacts/requests-oauthlib_1595492159598/work
|
||||
rsa @ file:///home/conda/feedstock_root/build_artifacts/rsa_1591990902901/work
|
||||
s3transfer @ file:///home/conda/feedstock_root/build_artifacts/s3transfer_1602631002642/work
|
||||
scikit-learn @ file:///home/conda/feedstock_root/build_artifacts/scikit-learn_1596546074663/work
|
||||
scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy_1602862657152/work
|
||||
seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-base_1599592695803/work
|
||||
Send2Trash==1.5.0
|
||||
# Editable Git install with no remote (seq2seq-time==0.1.0)
|
||||
-e /media/wassname/Storage5/projects2/3ST/seq2seq-time
|
||||
six @ file:///home/conda/feedstock_root/build_artifacts/six_1590081179328/work
|
||||
sklearn==0.0
|
||||
sklearn-pandas==2.0.2
|
||||
snowballstemmer==2.0.0
|
||||
sortedcontainers @ file:///home/conda/feedstock_root/build_artifacts/sortedcontainers_1591999956871/work
|
||||
statsmodels @ file:///home/conda/feedstock_root/build_artifacts/statsmodels_1602599914091/work
|
||||
tangled-up-in-unicode @ file:///home/conda/feedstock_root/build_artifacts/tangled-up-in-unicode_1589363771888/work
|
||||
tblib @ file:///tmp/build/80754af9/tblib_1597928476713/work
|
||||
tensorboard @ file:///home/conda/feedstock_root/build_artifacts/tensorboard_1595378845776/work/tensorboard-2.3.0-py3-none-any.whl
|
||||
tensorboard-plugin-wit @ file:///home/conda/feedstock_root/build_artifacts/tensorboard-plugin-wit_1592816951245/work/tensorboard_plugin_wit-1.6.0.post3-py3-none-any.whl
|
||||
terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1602679584439/work
|
||||
testpath==0.4.4
|
||||
threadpoolctl @ file:///tmp/tmp79xdzxkt/threadpoolctl-2.1.0-py3-none-any.whl
|
||||
thrift==0.11.0
|
||||
toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1589469402899/work
|
||||
toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1600973991856/work
|
||||
torch==1.6.0
|
||||
torchsummaryX==1.3.0
|
||||
torchvision==0.7.0
|
||||
tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1602488893411/work
|
||||
tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1602171507552/work
|
||||
traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1602771532708/work
|
||||
typed-ast==1.4.1
|
||||
typing-extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1602702424206/work
|
||||
ucimlr @ git+https://github.com/isacarnekvist/ucimlr@329ed0586effeb2d57f179f3abb0da9862feed01
|
||||
unlzw==0.1.1
|
||||
uptide==1.0
|
||||
urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1595434816409/work
|
||||
visions @ file:///home/conda/feedstock_root/build_artifacts/visions_1597645571032/work
|
||||
wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1600965781394/work
|
||||
webencodings==0.5.1
|
||||
Werkzeug==1.0.1
|
||||
widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1602769155190/work
|
||||
xarray @ file:///home/conda/feedstock_root/build_artifacts/xarray_1600638299066/work
|
||||
xlrd==1.2.0
|
||||
yapf @ file:///home/conda/feedstock_root/build_artifacts/yapf_1595950469082/work
|
||||
yarl @ file:///home/conda/feedstock_root/build_artifacts/yarl_1602671471836/work
|
||||
zict==2.0.0
|
||||
zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1602852756910/work
|
||||
|
||||
@@ -0,0 +1,309 @@
|
||||
from typing import List, Tuple
|
||||
from torchvision.datasets.utils import download_url, extract_archive, download_and_extract_archive
|
||||
import os
|
||||
from tqdm.auto import tqdm
|
||||
from pathlib import Path
|
||||
from sklearn_pandas import DataFrameMapper
|
||||
import xarray as xr
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from .dataset import Seq2SeqDataSet
|
||||
from .util import normalize_encode_dataframe, timeseries_split
|
||||
from .tidal import generate_tidal_periods
|
||||
|
||||
|
||||
class RegressionForecastData:
|
||||
columns_forecast = None # The input colums which can be included in future (e.g. week or weather forecast)
|
||||
columns_target = None # Target columns
|
||||
|
||||
def __init__(self, datasets_root):
|
||||
self.datasets_root = datasets_root
|
||||
|
||||
# Process data
|
||||
self.df = self.download()
|
||||
self.df_norm, self.scaler = self.normalize(self.df)
|
||||
self.output_scaler = next(filter(lambda r:r[0][0] in self.columns_target, self.scaler.features))[-1]
|
||||
self.df_train, self.df_test = self.split(self.df_norm)
|
||||
|
||||
# Check processing
|
||||
self.check()
|
||||
|
||||
def download(self) -> pd.DataFrame:
|
||||
"""Implement this method to download data and return raw df"""
|
||||
raise NotImplementedError()
|
||||
return df
|
||||
|
||||
def normalize(self, df) -> Tuple[pd.DataFrame, DataFrameMapper]:
|
||||
df_norm, scaler = normalize_encode_dataframe(df)
|
||||
return df_norm, scaler
|
||||
|
||||
def split(self, df_norm: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
||||
df_train, df_test = timeseries_split(df_norm)
|
||||
return df_train, df_test
|
||||
|
||||
def check(self) -> None:
|
||||
"""Check the resulting dataframe"""
|
||||
assert isinstance(self.df.index, pd.DatetimeIndex), 'index must be datetime'
|
||||
assert self.df.index.freq is not None, 'df must have freq'
|
||||
assert self.columns_forecast is not None
|
||||
assert self.columns_target is not None
|
||||
assert ~set(self.columns_target).issubset(set(self.columns_forecast)), 'target columns should not be in forecast'
|
||||
assert set(self.columns_forecast).issubset(set(self.df.columns)), 'columns_forecast must be in df'
|
||||
assert set(self.columns_target).issubset(set(self.df.columns)), 'columns_target must be in df'
|
||||
|
||||
def to_datasets(self, window_past: int, window_future: int, valid:bool=False) -> Tuple[Seq2SeqDataSet, Seq2SeqDataSet]:
|
||||
"""Convert to torch datasets"""
|
||||
ds_train = Seq2SeqDataSet(df_train, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
|
||||
ds_test = Seq2SeqDataSet(df_test, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
|
||||
return ds_train, ds_test
|
||||
|
||||
def __repr__(self):
|
||||
return f'<{type(self).__name__} {self.df.shape if (self.df is not None) else None}>'
|
||||
|
||||
class GasSensor(RegressionForecastData):
|
||||
"""
|
||||
See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation
|
||||
"""
|
||||
|
||||
columns_target = ['R1 (MOhm)']
|
||||
columns_forecast = ['Flow rate (mL/min)', 'Heater voltage (V)']
|
||||
|
||||
def download(self):
|
||||
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00487/gas-sensor-array-temperature-modulation.zip'
|
||||
|
||||
# download if needed
|
||||
extract_path = self.datasets_root/'GasSensor'
|
||||
files = sorted(extract_path.glob('*.csv'))
|
||||
if len(files)<13:
|
||||
print('download_and_extract_archive')
|
||||
download_and_extract_archive(url, self.datasets_root, extract_path)
|
||||
|
||||
# Load csv's
|
||||
files = sorted(extract_path.glob('*.csv'))
|
||||
dfs = []
|
||||
for f in files:
|
||||
now = pd.to_datetime(f.stem, format='%Y%m%d_%H%M%S')
|
||||
df = pd.read_csv(f)
|
||||
df.index = pd.to_timedelta(df['Time (s)'], unit='s') + now
|
||||
dfs.append(df)
|
||||
self.df = pd.concat(dfs).dropna(subset=self.columns_target)
|
||||
|
||||
df = df[[ 'CO (ppm)', 'Humidity (%r.h.)', 'Temperature (C)',
|
||||
'Flow rate (mL/min)', 'Heater voltage (V)', 'R1 (MOhm)']]
|
||||
df = df.resample('0.3S').first()
|
||||
|
||||
return df
|
||||
|
||||
|
||||
class MetroInterstateTraffic(RegressionForecastData):
|
||||
"""
|
||||
See: https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume
|
||||
"""
|
||||
|
||||
columns_target = ['traffic_volume']
|
||||
columns_forecast = ['holiday', 'month', 'day', 'week', 'hour',
|
||||
'minute', 'dayofweek']
|
||||
|
||||
def download(self):
|
||||
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00492/Metro_Interstate_Traffic_Volume.csv.gz'
|
||||
|
||||
# download if needed
|
||||
filename = '00492_Metro_Interstate_Traffic_Volume.csv.gz'
|
||||
local_path = self.datasets_root/filename
|
||||
if not local_path.exists():
|
||||
download_url(url, self.datasets_root, filename)
|
||||
df = (pd.read_csv(local_path, index_col='date_time', parse_dates=['date_time'])
|
||||
.dropna(subset=self.columns_target)
|
||||
.resample('1H').first()
|
||||
)
|
||||
|
||||
# Make holiday a bool
|
||||
df['holiday'] = ~df['holiday'].isna()
|
||||
df['weather_main'] = df['weather_main'].fillna('none')
|
||||
df['weather_description'] = df['weather_description'].fillna('none')
|
||||
|
||||
# Add time features
|
||||
time = df.index.to_series()
|
||||
df["month"] = time.dt.month
|
||||
df['day'] = time.dt.day
|
||||
df['week'] = time.dt.isocalendar().week
|
||||
df['hour'] = time.dt.hour
|
||||
df['minute'] = time.dt.minute
|
||||
df['dayofweek'] = time.dt.dayofweek
|
||||
|
||||
return df
|
||||
|
||||
class AppliancesEnergyPrediction(RegressionForecastData):
|
||||
"""
|
||||
See: https://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
|
||||
"""
|
||||
|
||||
columns_target = ['log_Appliances']
|
||||
columns_forecast = ['month', 'day', 'week', 'hour',
|
||||
'minute', 'dayofweek']
|
||||
|
||||
def download(self):
|
||||
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv'
|
||||
|
||||
# download if needed
|
||||
filename = '00374_AppliancesEnergyPrediction.csv'
|
||||
local_path = self.datasets_root/filename
|
||||
if not local_path.exists():
|
||||
download_url(url, self.datasets_root, filename)
|
||||
df = pd.read_csv(local_path, index_col='date', parse_dates=['date'])
|
||||
|
||||
# log target
|
||||
df['log_Appliances'] = np.log(df['Appliances'] + 1e-5)
|
||||
df = df.drop(columns=['Appliances'])
|
||||
df = df.dropna(subset=self.columns_target).resample('10T').first()
|
||||
|
||||
# Add time features
|
||||
time = df.index.to_series()
|
||||
df["month"] = time.dt.month
|
||||
df['day'] = time.dt.day
|
||||
df['week'] = time.dt.isocalendar().week
|
||||
df['hour'] = time.dt.hour
|
||||
df['minute'] = time.dt.minute
|
||||
df['dayofweek'] = time.dt.dayofweek
|
||||
|
||||
return df
|
||||
|
||||
class BejingPM25(RegressionForecastData):
|
||||
"""
|
||||
See: http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
|
||||
"""
|
||||
|
||||
columns_target = ['log_pm2.5']
|
||||
columns_forecast = ['month', 'day', 'week', 'hour',
|
||||
'minute', 'dayofweek']
|
||||
|
||||
def download(self):
|
||||
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00381/PRSA_data_2010.1.1-2014.12.31.csv'
|
||||
|
||||
# download if needed
|
||||
filename = '00381_BejingPM25.csv'
|
||||
local_path = self.datasets_root/filename
|
||||
if not local_path.exists():
|
||||
download_url(url, self.datasets_root, filename)
|
||||
df = pd.read_csv(local_path)
|
||||
df.index = pd.to_datetime(df[['year', 'month', 'day', 'hour']]).dt.tz_localize('Asia/Shanghai')
|
||||
df = df.drop(columns=['year', 'month', 'day', 'hour', 'No'])
|
||||
|
||||
# log target
|
||||
df['log_pm2.5'] = np.log(df['pm2.5'] + 1e-5)
|
||||
df = df.drop(columns=['pm2.5'])
|
||||
|
||||
df.dropna(subset=self.columns_target, inplace=True)
|
||||
df = df.resample('1H').first()
|
||||
|
||||
df['cbwd'] = df['cbwd'].fillna('none')
|
||||
|
||||
|
||||
|
||||
# Add time features
|
||||
time = df.index.to_series()
|
||||
df["month"] = time.dt.month
|
||||
df['day'] = time.dt.day
|
||||
df['week'] = time.dt.isocalendar().week
|
||||
df['hour'] = time.dt.hour
|
||||
df['minute'] = time.dt.minute
|
||||
df['dayofweek'] = time.dt.dayofweek
|
||||
|
||||
# df['log_pm2.5'] = np.log(df['pm2.5']+1e-5)
|
||||
|
||||
return df
|
||||
|
||||
def get_current_timeseries(
|
||||
cache_folder=Path("../data/raw/IMOS_ANMN/"),
|
||||
outfile=Path(
|
||||
'../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc'
|
||||
)):
|
||||
"""
|
||||
Download Current data from the IMOS and pre-process.
|
||||
"""
|
||||
if not outfile.exists():
|
||||
|
||||
files = [
|
||||
"IMOS_ANMN-WA_AETVZ_20090715T080000Z_WATR20_FV01_WATR20-0907-Continental-194_END-20090716T181317Z_C-20191122T052830Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20100409T080000Z_WATR20_FV01_WATR20-1004-Continental-194_END-20100430T084500Z_C-20191122T053845Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20101222T080000Z_WATR20_FV01_WATR20-1012-Continental-194_END-20110518T051500Z_C-20200916T020035Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20110608T080000Z_WATR20_FV01_WATR20-1106-Continental-194_END-20111122T035000Z_C-20200916T025619Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20111221T060300Z_WATR20_FV01_WATR20-1112-Continental-194_END-20120704T050500Z_C-20200916T043212Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20120726T044000Z_WATR20_FV01_WATR20-1207-Continental-194_END-20130204T044000Z_C-20200916T032027Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20130221T080000Z_WATR20_FV01_WATR20-1302-Continental-194_END-20131003T035000Z_C-20180529T020609Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20131111T080000Z_WATR20_FV01_WATR20-1311-Continental-194_END-20140519T035000Z_C-20200114T033335Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20140710T080000Z_WATR20_FV01_WATR20-1407-Continental-194_END-20150121T021500Z_C-20180529T055902Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20150213T080000Z_WATR20_FV01_WATR20-1502-Continental-194_END-20150424T134002Z_C-20200114T035347Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20150914T080000Z_WATR20_FV01_WATR20-1509-Continental-194_END-20160331T043000Z_C-20180601T013623Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20160427T080000Z_WATR20_FV01_WATR20-1604-Continental-194_END-20160531T021800Z_C-20180531T071709Z.nc",
|
||||
# "IMOS_ANMN-WA_AETVZ_20170512T080000Z_WATR20_FV01_WATR20-1705-Continental-194_END-20170717T014558Z_C-20190805T004647Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20171204T080000Z_WATR20_FV01_WATR20-1712-Continental-194_END-20180618T030000Z_C-20180620T233149Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20180802T080000Z_WATR20_FV01_WATR20-1807-Continental-194_END-20190225T054500Z_C-20190227T001343Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20190307T080000Z_WATR20_FV01_WATR20-1903-Continental-194_END-20190911T003144Z_C-20200114T045053Z.nc",
|
||||
"IMOS_ANMN-WA_AETVZ_20190926T080000Z_WATR20_FV01_WATR20-1909-Continental-194_END-20200326T030000Z_C-20200420T064334Z.nc",
|
||||
]
|
||||
base = "http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/"
|
||||
|
||||
# Download files
|
||||
[download_url(base + f, cache_folder) for f in files]
|
||||
|
||||
# load and merge
|
||||
xds = [xr.open_dataset(cache_folder / f) for f in files]
|
||||
vars = [
|
||||
'VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL',
|
||||
'PITCH'
|
||||
]
|
||||
xds2 = [x[vars].isel(HEIGHT_ABOVE_SENSOR=18) for x in xds]
|
||||
xd = xr.concat(xds2, dim='TIME')
|
||||
xd = xd.where(xd.DEPTH > 150) # remove outliers
|
||||
|
||||
xd['TIME'] = xd['TIME'].dt.round('10T')
|
||||
xd = xd.dropna(dim='TIME', subset=['VCUR', 'UCUR', 'WCUR'])
|
||||
|
||||
# Generate tidal freqs
|
||||
t = xd.TIME.to_series()
|
||||
df_eta = generate_tidal_periods(t)
|
||||
|
||||
# Add tidal freqs
|
||||
xd = xd.merge(df_eta)
|
||||
|
||||
# Cache to nc
|
||||
xd.to_netcdf(outfile)
|
||||
print(
|
||||
f'wrote "{outfile}" with size {outfile.stat().st_size*1e-6:2.2f} MB'
|
||||
)
|
||||
return outfile
|
||||
|
||||
|
||||
class IMOSCurrentsVel(RegressionForecastData):
|
||||
"""
|
||||
|
||||
Current Speed at ANMN Two Rocks, WA, 204m mooring
|
||||
|
||||
see:
|
||||
- http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/
|
||||
from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c
|
||||
and http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html
|
||||
And https://en.wikipedia.org/wiki/Theory_of_tides
|
||||
"""
|
||||
|
||||
columns_target = ['SPD']
|
||||
columns_forecast = [
|
||||
'M2', 'S2', 'N2', 'K2', 'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4',
|
||||
'MK3', 'MM', 'SSA', 'SA'
|
||||
]
|
||||
|
||||
def download(self):
|
||||
outfile = self.datasets_root / 'MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc'
|
||||
get_current_timeseries(outfile=outfile)
|
||||
|
||||
# made in previous notebook
|
||||
xd = xr.load_dataset(outfile)
|
||||
df = xd.to_dataframe().drop(
|
||||
columns=['HEIGHT_ABOVE_SENSOR', 'NOMINAL_DEPTH'])
|
||||
df['SPD'] = np.sqrt(df.VCUR**2 + df.UCUR**2)
|
||||
df.dropna(subset=self.columns_target, inplace=True)
|
||||
df = df.resample('30T').first()
|
||||
|
||||
return df
|
||||
@@ -20,11 +20,11 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
|
||||
Returns x_past, y_past, x_future, etc.
|
||||
"""
|
||||
|
||||
def __init__(self, df: pd.DataFrame, window_past=40, window_future=10, columns_target=['energy(kWh/hh)'], columns_blank=[],):
|
||||
def __init__(self, df: pd.DataFrame, window_past=40, window_future=10, columns_target=['energy(kWh/hh)'], columns_past=[],):
|
||||
"""
|
||||
Args:
|
||||
- df: DataFrame with time index, already scaled
|
||||
- columns_blank: The columns we will blank, in the future
|
||||
- columns_past: The columns we will blank, in the future
|
||||
"""
|
||||
super().__init__()
|
||||
assert isinstance(df.index, pd.DatetimeIndex), 'should have a datetime index'
|
||||
@@ -38,7 +38,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
|
||||
self.columns_target = columns_target
|
||||
|
||||
# For speed
|
||||
self._icol_blank = [df.drop(columns = columns_target).columns.tolist().index(n) for n in columns_blank]
|
||||
self._icol_blank = [df.drop(columns = columns_target).columns.tolist().index(n) for n in columns_past]
|
||||
self._x = self.df.drop(columns = self.columns_target).values
|
||||
self._y = self.df[columns_target].values
|
||||
|
||||
@@ -64,6 +64,8 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
|
||||
|
||||
# Stop it cheating by using future weather measurements. Fill in with last value
|
||||
x_future[:, self._icol_blank] = x_past[0, self._icol_blank]
|
||||
|
||||
# x_future[:, self._icol_blank] = 0
|
||||
return x_past, y_past, x_future, y_future
|
||||
|
||||
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import click
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('input_filepath', type=click.Path(exists=True))
|
||||
@click.argument('output_filepath', type=click.Path())
|
||||
def main(input_filepath, output_filepath):
|
||||
""" Runs data processing scripts to turn raw data from (../raw) into
|
||||
cleaned data ready to be analyzed (saved in ../processed).
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info('making final data set from raw data')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=log_fmt)
|
||||
|
||||
# not used in this stub but often useful for finding various files
|
||||
project_dir = Path(__file__).resolve().parents[2]
|
||||
|
||||
# find .env automagically by walking up directories until it's found, then
|
||||
# load up the .env entries as environment variables
|
||||
load_dotenv(find_dotenv())
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,43 @@
|
||||
import uptide
|
||||
import pandas as pd
|
||||
|
||||
# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis
|
||||
default_tidal_constituents = [
|
||||
'M2',
|
||||
'S2',
|
||||
'N2',
|
||||
'K2', # Semi-diurnal
|
||||
'K1',
|
||||
'O1',
|
||||
'P1',
|
||||
'Q1', # Diurnal
|
||||
'M4',
|
||||
'M6',
|
||||
'S4',
|
||||
'MK3', # Short period
|
||||
'MM',
|
||||
'SSA',
|
||||
'SA' # Long period
|
||||
]
|
||||
|
||||
|
||||
def generate_tidal_periods(t: pd.Series,
|
||||
constituents: list = default_tidal_constituents):
|
||||
tide = uptide.Tides(constituents)
|
||||
t0 = t[0]
|
||||
td = t - t0
|
||||
td = td.dt.total_seconds().to_numpy().astype(int)
|
||||
tide.set_initial_time(t0)
|
||||
|
||||
# calc tides
|
||||
amplitudes = np.ones_like(td)
|
||||
phases = np.zeros_like(td)
|
||||
eta = {}
|
||||
for name, f, amplitude, omega, phase, phi, u in zip(
|
||||
tide.constituents, tide.f, amplitudes, tide.omega, phases,
|
||||
tide.phi, tide.u):
|
||||
eta[name] = f * amplitude * np.cos(omega * td - phase + phi + u)
|
||||
df_eta = pd.DataFrame(eta, index=t)
|
||||
return df_eta
|
||||
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
import sklearn
|
||||
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
|
||||
from sklearn_pandas import DataFrameMapper
|
||||
|
||||
def normalize_encode_dataframe(df, encoder=OrdinalEncoder):
|
||||
"""Normalise numeric data, encode categorical data."""
|
||||
columns_input_numeric = list(df._get_numeric_data().columns)
|
||||
columns_categorical = list(set(df.columns)-set(columns_input_numeric))
|
||||
|
||||
transformers= [([n], StandardScaler()) for n in columns_input_numeric] + \
|
||||
[([n], encoder()) for n in columns_categorical]
|
||||
scaler = DataFrameMapper(transformers, df_out=True)
|
||||
df_norm = scaler.fit_transform(df)
|
||||
return df_norm, scaler
|
||||
|
||||
def timeseries_split(df, test_fraction=0.2):
|
||||
"""Split timeseries data with test in the future"""
|
||||
i = int(len(df)*test_fraction)
|
||||
return df.iloc[:i], df.iloc[i:]
|
||||
Reference in New Issue
Block a user