tidy data

2026-06-27 16:46:54 +08:00 · 2020-10-26 14:41:07 +08:00
parent 49fdfdc3ab
commit 3ee1c5bbb8
11 changed files with 2405 additions and 4369 deletions
@@ -8,7 +8,7 @@ PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
 PROFILE = default
 PROJECT_NAME = seq2seq-time
-PYTHON_INTERPRETER = seq2seq-time
+PYTHON_INTERPRETER = python

 #################################################################################
 # COMMANDS                                                                      #
@@ -70,7 +70,7 @@ test:
 doc_reqs:
 	conda env export --no-builds --from-history --name $(PROJECT_NAME) > requirements/environment.min.yaml
 	conda env export  --name $(PROJECT_NAME) > requirements/environment.max.yaml
-	$(PYTHON_INTERPRETER) -m pip freeze > requirements/requirements.txt --name
+	$(PYTHON_INTERPRETER) -m pip freeze > requirements/requirements.txt

 #################################################################################
 # Self Documenting Commands                                                     #
@@ -1,725 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here I download and preprocess current data\n",
-    "\n",
-    "\n",
-    "see\n",
-    "- from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c\n",
-    "- http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:39:41.682896Z",
-     "start_time": "2020-10-26T03:39:40.104951Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "import xarray as xr\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from urllib import request\n",
-    "import os, shutil\n",
-    "from matplotlib import pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-25T07:01:29.004548Z",
-     "start_time": "2020-10-25T07:01:29.001734Z"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:39:42.553735Z",
-     "start_time": "2020-10-26T03:39:41.685439Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from torchvision.datasets.utils import download_url, extract_archive, download_and_extract_archive"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:39:42.560919Z",
-     "start_time": "2020-10-26T03:39:42.556898Z"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import uptide\n",
-    "\n",
-    "# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis\n",
-    "default_tidal_constituents = [\n",
-    "        'M2', 'S2', 'N2', 'K2', # Semi-diurnal\n",
-    "        'K1', 'O1', 'P1', 'Q1',  # Diurnal\n",
-    "        'M4', 'M6', 'S4', 'MK3', # Short period\n",
-    "        'MM', 'SSA', 'SA' # Long period\n",
-    "    ]\n",
-    "\n",
-    "def generate_tidal_periods(t:pd.Series, constituents:list=default_tidal_constituents):\n",
-    "    tide = uptide.Tides(constituents)\n",
-    "    t0 = t[0]\n",
-    "    td = t-t0\n",
-    "    td = td.dt.total_seconds().to_numpy().astype(int)\n",
-    "    tide.set_initial_time(t0)\n",
-    "\n",
-    "    # calc tides\n",
-    "    amplitudes=np.ones_like(td)\n",
-    "    phases=np.zeros_like(td)\n",
-    "    eta = {}\n",
-    "    for name, f, amplitude, omega, phase, phi, u in zip(tide.constituents, tide.f, amplitudes, tide.omega,\n",
-    "                                                  phases, tide.phi, tide.u):\n",
-    "        eta[name] = f*amplitude*np.cos(omega*td-phase+phi+u)\n",
-    "    df_eta = pd.DataFrame(eta, index=t)\n",
-    "    return df_eta"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:59:16.671092Z",
-     "start_time": "2020-10-26T03:59:16.655680Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# 'ANMN Two Rocks, WA, 204m mooring, Jul2009 - Dec2009. Preprocessed with DepthPP.'\n",
-    "\n",
-    "def get_current_timeseries(\n",
-    "    cache_folder=Path(\"../data/raw/IMOS_ANMN/\"), \n",
-    "    outfile=Path('../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc')\n",
-    "    ):\n",
-    "    if not outfile.exists():\n",
-    "\n",
-    "        files = [\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20090715T080000Z_WATR20_FV01_WATR20-0907-Continental-194_END-20090716T181317Z_C-20191122T052830Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20100409T080000Z_WATR20_FV01_WATR20-1004-Continental-194_END-20100430T084500Z_C-20191122T053845Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20101222T080000Z_WATR20_FV01_WATR20-1012-Continental-194_END-20110518T051500Z_C-20200916T020035Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20110608T080000Z_WATR20_FV01_WATR20-1106-Continental-194_END-20111122T035000Z_C-20200916T025619Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20111221T060300Z_WATR20_FV01_WATR20-1112-Continental-194_END-20120704T050500Z_C-20200916T043212Z.nc\", \n",
-    "            \"IMOS_ANMN-WA_AETVZ_20120726T044000Z_WATR20_FV01_WATR20-1207-Continental-194_END-20130204T044000Z_C-20200916T032027Z.nc\",\n",
-    "\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20130221T080000Z_WATR20_FV01_WATR20-1302-Continental-194_END-20131003T035000Z_C-20180529T020609Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20131111T080000Z_WATR20_FV01_WATR20-1311-Continental-194_END-20140519T035000Z_C-20200114T033335Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20140710T080000Z_WATR20_FV01_WATR20-1407-Continental-194_END-20150121T021500Z_C-20180529T055902Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20150213T080000Z_WATR20_FV01_WATR20-1502-Continental-194_END-20150424T134002Z_C-20200114T035347Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20150914T080000Z_WATR20_FV01_WATR20-1509-Continental-194_END-20160331T043000Z_C-20180601T013623Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20160427T080000Z_WATR20_FV01_WATR20-1604-Continental-194_END-20160531T021800Z_C-20180531T071709Z.nc\",\n",
-    "        #     \"IMOS_ANMN-WA_AETVZ_20170512T080000Z_WATR20_FV01_WATR20-1705-Continental-194_END-20170717T014558Z_C-20190805T004647Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20171204T080000Z_WATR20_FV01_WATR20-1712-Continental-194_END-20180618T030000Z_C-20180620T233149Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20180802T080000Z_WATR20_FV01_WATR20-1807-Continental-194_END-20190225T054500Z_C-20190227T001343Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20190307T080000Z_WATR20_FV01_WATR20-1903-Continental-194_END-20190911T003144Z_C-20200114T045053Z.nc\",\n",
-    "            \"IMOS_ANMN-WA_AETVZ_20190926T080000Z_WATR20_FV01_WATR20-1909-Continental-194_END-20200326T030000Z_C-20200420T064334Z.nc\",\n",
-    "        ]\n",
-    "        base=\"http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/\"\n",
-    "\n",
-    "        # Download files\n",
-    "        [download_url(base+f, cache_folder) for f in files]\n",
-    "\n",
-    "        # load and merge\n",
-    "        xds=[xr.open_dataset(cache_folder/f) for f in files]\n",
-    "        vars=['VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL', 'PITCH']\n",
-    "        xds2= [x[vars].isel(HEIGHT_ABOVE_SENSOR=18) for x in xds]\n",
-    "        xd = xr.concat(xds2, dim='TIME')\n",
-    "        xd = xd.where(xd.DEPTH>150) # remove outliers\n",
-    "\n",
-    "\n",
-    "        xd['TIME'] = xd['TIME'].dt.round('10T')\n",
-    "        xd = xd.dropna(dim='TIME', subset=['VCUR', 'UCUR', 'WCUR'])\n",
-    "        # xd = xd.resample(TIME='30T').first()\n",
-    "        # Add tides, these are features that can be forecast\n",
-    "\n",
-    "        # Generate tidal freqs\n",
-    "        t = xd.TIME.to_series()\n",
-    "        df_eta = generate_tidal_periods(t)\n",
-    "\n",
-    "        # Add tidal freqs\n",
-    "        xd = xd.merge(df_eta)\n",
-    "\n",
-    "        # Cache to nc\n",
-    "        xd.to_netcdf(outfile)\n",
-    "        print(f'wrote \"{outfile}\" with size {outfile.stat().st_size*1e-6:2.2f} MB')\n",
-    "    return outfile"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T04:04:08.230047Z",
-     "start_time": "2020-10-26T04:04:08.099310Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>VCUR</th>\n",
-       "      <th>UCUR</th>\n",
-       "      <th>WCUR</th>\n",
-       "      <th>TEMP</th>\n",
-       "      <th>PRES_REL</th>\n",
-       "      <th>DEPTH</th>\n",
-       "      <th>ROLL</th>\n",
-       "      <th>PITCH</th>\n",
-       "      <th>LATITUDE</th>\n",
-       "      <th>LONGITUDE</th>\n",
-       "      <th>...</th>\n",
-       "      <th>O1</th>\n",
-       "      <th>P1</th>\n",
-       "      <th>Q1</th>\n",
-       "      <th>M4</th>\n",
-       "      <th>M6</th>\n",
-       "      <th>S4</th>\n",
-       "      <th>MK3</th>\n",
-       "      <th>MM</th>\n",
-       "      <th>SSA</th>\n",
-       "      <th>SA</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>TIME</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2009-07-15 08:00:00</th>\n",
-       "      <td>-0.396391</td>\n",
-       "      <td>0.089687</td>\n",
-       "      <td>-0.009671</td>\n",
-       "      <td>18.549999</td>\n",
-       "      <td>205.076004</td>\n",
-       "      <td>203.550812</td>\n",
-       "      <td>4.6</td>\n",
-       "      <td>-3.4</td>\n",
-       "      <td>-31.728650</td>\n",
-       "      <td>115.037217</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.286288</td>\n",
-       "      <td>0.116457</td>\n",
-       "      <td>-1.014973</td>\n",
-       "      <td>-0.146817</td>\n",
-       "      <td>-0.801534</td>\n",
-       "      <td>-0.500000</td>\n",
-       "      <td>0.370082</td>\n",
-       "      <td>0.132683</td>\n",
-       "      <td>-0.686775</td>\n",
-       "      <td>-0.395743</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2009-07-15 08:10:00</th>\n",
-       "      <td>-0.407620</td>\n",
-       "      <td>0.085398</td>\n",
-       "      <td>-0.019875</td>\n",
-       "      <td>18.650000</td>\n",
-       "      <td>205.078003</td>\n",
-       "      <td>203.552795</td>\n",
-       "      <td>4.6</td>\n",
-       "      <td>-2.4</td>\n",
-       "      <td>-31.728650</td>\n",
-       "      <td>115.037217</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.242810</td>\n",
-       "      <td>0.159551</td>\n",
-       "      <td>-1.031149</td>\n",
-       "      <td>-0.304345</td>\n",
-       "      <td>-0.900573</td>\n",
-       "      <td>-0.642788</td>\n",
-       "      <td>0.494417</td>\n",
-       "      <td>0.134147</td>\n",
-       "      <td>-0.686601</td>\n",
-       "      <td>-0.395853</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2009-07-15 08:20:00</th>\n",
-       "      <td>-0.365314</td>\n",
-       "      <td>0.104038</td>\n",
-       "      <td>0.000991</td>\n",
-       "      <td>18.730000</td>\n",
-       "      <td>205.076996</td>\n",
-       "      <td>203.551788</td>\n",
-       "      <td>4.8</td>\n",
-       "      <td>-2.7</td>\n",
-       "      <td>-31.728650</td>\n",
-       "      <td>115.037217</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.198932</td>\n",
-       "      <td>0.202343</td>\n",
-       "      <td>-1.045759</td>\n",
-       "      <td>-0.453239</td>\n",
-       "      <td>-0.942304</td>\n",
-       "      <td>-0.766044</td>\n",
-       "      <td>0.610654</td>\n",
-       "      <td>0.135610</td>\n",
-       "      <td>-0.686427</td>\n",
-       "      <td>-0.395963</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2009-07-15 08:30:00</th>\n",
-       "      <td>-0.406632</td>\n",
-       "      <td>0.119376</td>\n",
-       "      <td>-0.003729</td>\n",
-       "      <td>18.799999</td>\n",
-       "      <td>205.067001</td>\n",
-       "      <td>203.541901</td>\n",
-       "      <td>4.7</td>\n",
-       "      <td>-2.4</td>\n",
-       "      <td>-31.728650</td>\n",
-       "      <td>115.037217</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.154727</td>\n",
-       "      <td>0.244751</td>\n",
-       "      <td>-1.058780</td>\n",
-       "      <td>-0.589276</td>\n",
-       "      <td>-0.924071</td>\n",
-       "      <td>-0.866025</td>\n",
-       "      <td>0.716890</td>\n",
-       "      <td>0.137073</td>\n",
-       "      <td>-0.686253</td>\n",
-       "      <td>-0.396072</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2009-07-15 08:40:00</th>\n",
-       "      <td>-0.383744</td>\n",
-       "      <td>0.090066</td>\n",
-       "      <td>-0.008921</td>\n",
-       "      <td>18.860001</td>\n",
-       "      <td>205.065994</td>\n",
-       "      <td>203.540894</td>\n",
-       "      <td>4.9</td>\n",
-       "      <td>-2.9</td>\n",
-       "      <td>-31.728650</td>\n",
-       "      <td>115.037217</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.110268</td>\n",
-       "      <td>0.286697</td>\n",
-       "      <td>-1.070194</td>\n",
-       "      <td>-0.708598</td>\n",
-       "      <td>-0.847034</td>\n",
-       "      <td>-0.939693</td>\n",
-       "      <td>0.811384</td>\n",
-       "      <td>0.138535</td>\n",
-       "      <td>-0.686080</td>\n",
-       "      <td>-0.396182</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2020-03-26 01:00:00</th>\n",
-       "      <td>-0.436635</td>\n",
-       "      <td>-0.784922</td>\n",
-       "      <td>-0.012147</td>\n",
-       "      <td>16.610001</td>\n",
-       "      <td>197.384003</td>\n",
-       "      <td>195.919662</td>\n",
-       "      <td>-2.9</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>-31.728717</td>\n",
-       "      <td>115.042133</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.734741</td>\n",
-       "      <td>0.190139</td>\n",
-       "      <td>0.964792</td>\n",
-       "      <td>0.882484</td>\n",
-       "      <td>0.770444</td>\n",
-       "      <td>0.505439</td>\n",
-       "      <td>1.028587</td>\n",
-       "      <td>-0.881951</td>\n",
-       "      <td>0.990514</td>\n",
-       "      <td>0.997626</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2020-03-26 01:30:00</th>\n",
-       "      <td>-0.355067</td>\n",
-       "      <td>-0.845100</td>\n",
-       "      <td>-0.005201</td>\n",
-       "      <td>16.629999</td>\n",
-       "      <td>197.408005</td>\n",
-       "      <td>195.943497</td>\n",
-       "      <td>-2.7</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>-31.728717</td>\n",
-       "      <td>115.042133</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.629257</td>\n",
-       "      <td>0.316317</td>\n",
-       "      <td>0.895545</td>\n",
-       "      <td>0.957914</td>\n",
-       "      <td>0.933774</td>\n",
-       "      <td>0.006292</td>\n",
-       "      <td>0.851981</td>\n",
-       "      <td>-0.880483</td>\n",
-       "      <td>0.990416</td>\n",
-       "      <td>0.997601</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2020-03-26 02:00:00</th>\n",
-       "      <td>-0.568277</td>\n",
-       "      <td>-0.816935</td>\n",
-       "      <td>-0.024944</td>\n",
-       "      <td>16.660000</td>\n",
-       "      <td>197.412994</td>\n",
-       "      <td>195.948425</td>\n",
-       "      <td>-2.6</td>\n",
-       "      <td>2.9</td>\n",
-       "      <td>-31.728717</td>\n",
-       "      <td>115.042133</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.514470</td>\n",
-       "      <td>0.437113</td>\n",
-       "      <td>0.814067</td>\n",
-       "      <td>0.793395</td>\n",
-       "      <td>0.584762</td>\n",
-       "      <td>-0.494541</td>\n",
-       "      <td>0.551159</td>\n",
-       "      <td>-0.878996</td>\n",
-       "      <td>0.990316</td>\n",
-       "      <td>0.997576</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2020-03-26 02:30:00</th>\n",
-       "      <td>-0.306141</td>\n",
-       "      <td>-0.773147</td>\n",
-       "      <td>-0.028096</td>\n",
-       "      <td>16.719999</td>\n",
-       "      <td>197.419006</td>\n",
-       "      <td>195.954407</td>\n",
-       "      <td>-2.6</td>\n",
-       "      <td>2.7</td>\n",
-       "      <td>-31.728717</td>\n",
-       "      <td>115.042133</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.392074</td>\n",
-       "      <td>0.550470</td>\n",
-       "      <td>0.721473</td>\n",
-       "      <td>0.430136</td>\n",
-       "      <td>-0.085096</td>\n",
-       "      <td>-0.862862</td>\n",
-       "      <td>0.169980</td>\n",
-       "      <td>-0.877489</td>\n",
-       "      <td>0.990217</td>\n",
-       "      <td>0.997551</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2020-03-26 03:00:00</th>\n",
-       "      <td>-0.218563</td>\n",
-       "      <td>-0.757217</td>\n",
-       "      <td>0.013233</td>\n",
-       "      <td>16.790001</td>\n",
-       "      <td>197.429001</td>\n",
-       "      <td>195.964340</td>\n",
-       "      <td>-2.8</td>\n",
-       "      <td>2.8</td>\n",
-       "      <td>-31.728717</td>\n",
-       "      <td>115.042133</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.263881</td>\n",
-       "      <td>0.654460</td>\n",
-       "      <td>0.619026</td>\n",
-       "      <td>-0.040868</td>\n",
-       "      <td>-0.708264</td>\n",
-       "      <td>-0.999980</td>\n",
-       "      <td>-0.235982</td>\n",
-       "      <td>-0.875962</td>\n",
-       "      <td>0.990116</td>\n",
-       "      <td>0.997526</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>239075 rows × 25 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                         VCUR      UCUR      WCUR       TEMP    PRES_REL  \\\n",
-       "TIME                                                                       \n",
-       "2009-07-15 08:00:00 -0.396391  0.089687 -0.009671  18.549999  205.076004   \n",
-       "2009-07-15 08:10:00 -0.407620  0.085398 -0.019875  18.650000  205.078003   \n",
-       "2009-07-15 08:20:00 -0.365314  0.104038  0.000991  18.730000  205.076996   \n",
-       "2009-07-15 08:30:00 -0.406632  0.119376 -0.003729  18.799999  205.067001   \n",
-       "2009-07-15 08:40:00 -0.383744  0.090066 -0.008921  18.860001  205.065994   \n",
-       "...                       ...       ...       ...        ...         ...   \n",
-       "2020-03-26 01:00:00 -0.436635 -0.784922 -0.012147  16.610001  197.384003   \n",
-       "2020-03-26 01:30:00 -0.355067 -0.845100 -0.005201  16.629999  197.408005   \n",
-       "2020-03-26 02:00:00 -0.568277 -0.816935 -0.024944  16.660000  197.412994   \n",
-       "2020-03-26 02:30:00 -0.306141 -0.773147 -0.028096  16.719999  197.419006   \n",
-       "2020-03-26 03:00:00 -0.218563 -0.757217  0.013233  16.790001  197.429001   \n",
-       "\n",
-       "                          DEPTH  ROLL  PITCH   LATITUDE   LONGITUDE  ...  \\\n",
-       "TIME                                                                 ...   \n",
-       "2009-07-15 08:00:00  203.550812   4.6   -3.4 -31.728650  115.037217  ...   \n",
-       "2009-07-15 08:10:00  203.552795   4.6   -2.4 -31.728650  115.037217  ...   \n",
-       "2009-07-15 08:20:00  203.551788   4.8   -2.7 -31.728650  115.037217  ...   \n",
-       "2009-07-15 08:30:00  203.541901   4.7   -2.4 -31.728650  115.037217  ...   \n",
-       "2009-07-15 08:40:00  203.540894   4.9   -2.9 -31.728650  115.037217  ...   \n",
-       "...                         ...   ...    ...        ...         ...  ...   \n",
-       "2020-03-26 01:00:00  195.919662  -2.9    3.0 -31.728717  115.042133  ...   \n",
-       "2020-03-26 01:30:00  195.943497  -2.7    3.0 -31.728717  115.042133  ...   \n",
-       "2020-03-26 02:00:00  195.948425  -2.6    2.9 -31.728717  115.042133  ...   \n",
-       "2020-03-26 02:30:00  195.954407  -2.6    2.7 -31.728717  115.042133  ...   \n",
-       "2020-03-26 03:00:00  195.964340  -2.8    2.8 -31.728717  115.042133  ...   \n",
-       "\n",
-       "                           O1        P1        Q1        M4        M6  \\\n",
-       "TIME                                                                    \n",
-       "2009-07-15 08:00:00  0.286288  0.116457 -1.014973 -0.146817 -0.801534   \n",
-       "2009-07-15 08:10:00  0.242810  0.159551 -1.031149 -0.304345 -0.900573   \n",
-       "2009-07-15 08:20:00  0.198932  0.202343 -1.045759 -0.453239 -0.942304   \n",
-       "2009-07-15 08:30:00  0.154727  0.244751 -1.058780 -0.589276 -0.924071   \n",
-       "2009-07-15 08:40:00  0.110268  0.286697 -1.070194 -0.708598 -0.847034   \n",
-       "...                       ...       ...       ...       ...       ...   \n",
-       "2020-03-26 01:00:00 -0.734741  0.190139  0.964792  0.882484  0.770444   \n",
-       "2020-03-26 01:30:00 -0.629257  0.316317  0.895545  0.957914  0.933774   \n",
-       "2020-03-26 02:00:00 -0.514470  0.437113  0.814067  0.793395  0.584762   \n",
-       "2020-03-26 02:30:00 -0.392074  0.550470  0.721473  0.430136 -0.085096   \n",
-       "2020-03-26 03:00:00 -0.263881  0.654460  0.619026 -0.040868 -0.708264   \n",
-       "\n",
-       "                           S4       MK3        MM       SSA        SA  \n",
-       "TIME                                                                   \n",
-       "2009-07-15 08:00:00 -0.500000  0.370082  0.132683 -0.686775 -0.395743  \n",
-       "2009-07-15 08:10:00 -0.642788  0.494417  0.134147 -0.686601 -0.395853  \n",
-       "2009-07-15 08:20:00 -0.766044  0.610654  0.135610 -0.686427 -0.395963  \n",
-       "2009-07-15 08:30:00 -0.866025  0.716890  0.137073 -0.686253 -0.396072  \n",
-       "2009-07-15 08:40:00 -0.939693  0.811384  0.138535 -0.686080 -0.396182  \n",
-       "...                       ...       ...       ...       ...       ...  \n",
-       "2020-03-26 01:00:00  0.505439  1.028587 -0.881951  0.990514  0.997626  \n",
-       "2020-03-26 01:30:00  0.006292  0.851981 -0.880483  0.990416  0.997601  \n",
-       "2020-03-26 02:00:00 -0.494541  0.551159 -0.878996  0.990316  0.997576  \n",
-       "2020-03-26 02:30:00 -0.862862  0.169980 -0.877489  0.990217  0.997551  \n",
-       "2020-03-26 03:00:00 -0.999980 -0.235982 -0.875962  0.990116  0.997526  \n",
-       "\n",
-       "[239075 rows x 25 columns]"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "xd.to_dataframe().drop(columns=['HEIGHT_ABOVE_SENSOR', 'NOMINAL_DEPTH'])#.columns#[['VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL', 'PITCH']]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:44:41.020269Z",
-     "start_time": "2020-10-26T03:44:41.017322Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# for x in xds:\n",
-    "#     x.DEPTH.plot()\n",
-    "#     plt.ylim(190, 210)\n",
-    "\n",
-    "# plt.show()\n",
-    "# for x in xds:\n",
-    "#     x.plot.scatter('LONGITUDE', 'LONGITUDE')\n",
-    "# plt.show()\n",
-    "\n",
-    "# xd['VCUR'].plot(alpha=0.5)\n",
-    "# xd['UCUR'].plot(alpha=0.5)\n",
-    "# xd['WCUR'].plot(alpha=0.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:51:16.821117Z",
-     "start_time": "2020-10-26T03:51:16.606212Z"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:51:17.614829Z",
-     "start_time": "2020-10-26T03:51:17.204376Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "PosixPath('../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc')"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-26T03:51:18.335001Z",
-     "start_time": "2020-10-26T03:51:18.328504Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "43.107293"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "seq2seq-time",
-   "language": "python",
-   "name": "seq2seq-time"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.8"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {
-    "height": "calc(100% - 180px)",
-    "left": "10px",
-    "top": "150px",
-    "width": "219.011px"
-   },
-   "toc_section_display": true,
-   "toc_window_display": true
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
@@ -0,0 +1,190 @@
+absl-py @ file:///home/conda/feedstock_root/build_artifacts/absl-py_1602289403781/work
+aiohttp @ file:///tmp/build/80754af9/aiohttp_1602530305083/work
+appdirs==1.4.4
+argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1602546578258/work
+async-generator==1.10
+async-timeout==3.0.1
+attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1599308529326/work
+awscli @ file:///home/conda/feedstock_root/build_artifacts/awscli_1602890549104/work
+backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
+backports.functools-lru-cache==1.6.1
+black @ file:///home/conda/feedstock_root/build_artifacts/black-recipe_1599478779128/work
+bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1600454382015/work
+blinker==1.4
+bokeh @ file:///home/conda/feedstock_root/build_artifacts/bokeh_1602690186583/work
+botocore @ file:///home/conda/feedstock_root/build_artifacts/botocore_1602884371056/work
+bpe==1.0
+brotlipy==0.7.0
+cachetools @ file:///home/conda/feedstock_root/build_artifacts/cachetools_1593420445823/work
+certifi==2020.6.20
+cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1602537219008/work
+cftime @ file:///home/conda/feedstock_root/build_artifacts/cftime_1602504440833/work
+chardet @ file:///home/conda/feedstock_root/build_artifacts/chardet_1602255309768/work
+click==7.1.2
+cloudpickle @ file:///home/conda/feedstock_root/build_artifacts/cloudpickle_1598400192773/work
+colorama==0.4.3
+colorcet==2.0.2
+confuse @ file:///home/conda/feedstock_root/build_artifacts/confuse_1593279073800/work
+cryptography @ file:///home/conda/feedstock_root/build_artifacts/cryptography_1602614063317/work
+cycler==0.10.0
+cytoolz==0.11.0
+dask @ file:///home/conda/feedstock_root/build_artifacts/dask-core_1602029610262/work
+datashader @ file:///home/conda/feedstock_root/build_artifacts/datashader_1597664023361/work
+datashape==0.5.4
+decorator==4.4.2
+defusedxml==0.6.0
+distributed @ file:///home/conda/feedstock_root/build_artifacts/distributed_1602493186453/work
+docutils==0.15.2
+entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1602701733603/work/dist/entrypoints-0.3-py2.py3-none-any.whl
+fastparquet @ file:///home/conda/feedstock_root/build_artifacts/fastparquet_1594909864671/work
+fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1602700749102/work
+future @ file:///home/conda/feedstock_root/build_artifacts/future_1602538316704/work
+google-auth @ file:///tmp/build/80754af9/google-auth_1601995530934/work
+google-auth-oauthlib==0.4.1
+grpcio @ file:///home/conda/feedstock_root/build_artifacts/grpcio_1596715635580/work
+HeapDict==1.0.1
+holoviews @ file:///home/conda/feedstock_root/build_artifacts/holoviews_1600439907620/work
+htmlmin==0.1.12
+hypothesis==4.32.3
+idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1593328102638/work
+ImageHash @ file:///home/conda/feedstock_root/build_artifacts/imagehash_1588182723834/work
+importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1600910428305/work
+iniconfig @ file:///tmp/build/80754af9/iniconfig_1602780191262/work
+ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1602682802500/work/dist/ipykernel-5.3.4-py3-none-any.whl
+ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1602640393953/work
+ipython-genutils==0.2.0
+ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1599554010055/work
+jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1602395235501/work
+Jinja2==2.11.2
+jmespath @ file:///home/conda/feedstock_root/build_artifacts/jmespath_1589369830981/work
+joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1601671685479/work
+jsonschema==3.2.0
+jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1598486169312/work
+jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1602537277085/work
+jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1601375948261/work
+kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/kiwisolver_1602517221725/work
+llvmlite==0.34.0
+locket==0.2.0
+Markdown @ file:///home/conda/feedstock_root/build_artifacts/markdown_1602544730470/work
+MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1602267316845/work
+matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1602600750896/work
+mccabe==0.6.1
+missingno==0.4.2
+mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1602381812692/work
+more-itertools @ file:///home/conda/feedstock_root/build_artifacts/more-itertools_1598643641143/work
+msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1602380760823/work
+multidict @ file:///tmp/build/80754af9/multidict_1600456400975/work
+multipledispatch==0.6.0
+mypy @ file:///home/conda/feedstock_root/build_artifacts/mypy_1602270162469/work
+mypy-extensions==0.4.3
+nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1602859080374/work
+nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1602715396354/work
+nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1602732862338/work
+nc-time-axis==1.2.0
+nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1601342677072/work
+netCDF4 @ file:///home/conda/feedstock_root/build_artifacts/netcdf4_1602508544050/work
+networkx @ file:///home/conda/feedstock_root/build_artifacts/networkx_1598210780226/work
+notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1602720128568/work
+numba @ file:///home/conda/feedstock_root/build_artifacts/numba_1599084798687/work
+numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1602429044575/work
+oauthlib==3.1.0
+olefile @ file:///home/conda/feedstock_root/build_artifacts/olefile_1602866521163/work
+packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1589925210001/work
+pandas @ file:///home/conda/feedstock_root/build_artifacts/pandas_1602502751364/work
+pandas-profiling @ file:///home/conda/feedstock_root/build_artifacts/pandas-profiling_1599137999474/work
+pandocfilters==1.4.2
+panel @ file:///home/conda/feedstock_root/build_artifacts/panel_1592920888719/work
+param==1.9.3
+parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1595548966091/work
+partd==1.1.0
+pathspec==0.8.0
+patsy==0.5.1
+pexpect==4.8.0
+phik @ file:///home/conda/feedstock_root/build_artifacts/phik_1590331950347/work
+pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602535628301/work
+Pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1602708615436/work
+pluggy @ file:///home/conda/feedstock_root/build_artifacts/pluggy_1602337415071/work
+prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1590412252446/work
+prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1602524994744/work
+protobuf==3.13.0
+psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1602264040045/work
+ptyprocess==0.6.0
+py @ file:///home/conda/feedstock_root/build_artifacts/py_1593088446458/work
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycodestyle @ file:///home/conda/feedstock_root/build_artifacts/pycodestyle_1589305246696/work
+pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1593275161868/work
+pyct @ file:///tmp/build/80754af9/pyct_1600458283986/work
+pydocstyle @ file:///home/conda/feedstock_root/build_artifacts/pydocstyle_1598747747227/work
+pyflakes==2.2.0
+Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1600347314331/work
+PyJWT==1.7.1
+pylama==7.7.1
+pyOpenSSL==19.1.0
+pyparsing==2.4.7
+PyQt5==5.12.3
+PyQt5-sip==4.19.18
+PyQtChart==5.12
+PyQtWebEngine==5.12.1
+pyrsistent @ file:///home/conda/feedstock_root/build_artifacts/pyrsistent_1602259985647/work
+PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1602326924965/work
+pytest==6.1.1
+python-dateutil==2.8.1
+pytorch-fast-transformers==0.3.0
+pytorch-lightning @ file:///home/conda/feedstock_root/build_artifacts/pytorch-lightning_1602786328955/work
+pytorch-lightning-bolts==0.2.5
+pytz==2020.1
+pyviz-comms @ file:///home/conda/feedstock_root/build_artifacts/pyviz_comms_1594121601757/work
+PyWavelets @ file:///home/conda/feedstock_root/build_artifacts/pywavelets_1602504439149/work
+PyYAML==5.3.1
+pyzmq==19.0.2
+regex @ file:///home/conda/feedstock_root/build_artifacts/regex_1602771401882/work
+requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1592425495151/work
+requests-oauthlib @ file:///home/conda/feedstock_root/build_artifacts/requests-oauthlib_1595492159598/work
+rsa @ file:///home/conda/feedstock_root/build_artifacts/rsa_1591990902901/work
+s3transfer @ file:///home/conda/feedstock_root/build_artifacts/s3transfer_1602631002642/work
+scikit-learn @ file:///home/conda/feedstock_root/build_artifacts/scikit-learn_1596546074663/work
+scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy_1602862657152/work
+seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-base_1599592695803/work
+Send2Trash==1.5.0
+# Editable Git install with no remote (seq2seq-time==0.1.0)
+-e /media/wassname/Storage5/projects2/3ST/seq2seq-time
+six @ file:///home/conda/feedstock_root/build_artifacts/six_1590081179328/work
+sklearn==0.0
+sklearn-pandas==2.0.2
+snowballstemmer==2.0.0
+sortedcontainers @ file:///home/conda/feedstock_root/build_artifacts/sortedcontainers_1591999956871/work
+statsmodels @ file:///home/conda/feedstock_root/build_artifacts/statsmodels_1602599914091/work
+tangled-up-in-unicode @ file:///home/conda/feedstock_root/build_artifacts/tangled-up-in-unicode_1589363771888/work
+tblib @ file:///tmp/build/80754af9/tblib_1597928476713/work
+tensorboard @ file:///home/conda/feedstock_root/build_artifacts/tensorboard_1595378845776/work/tensorboard-2.3.0-py3-none-any.whl
+tensorboard-plugin-wit @ file:///home/conda/feedstock_root/build_artifacts/tensorboard-plugin-wit_1592816951245/work/tensorboard_plugin_wit-1.6.0.post3-py3-none-any.whl
+terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1602679584439/work
+testpath==0.4.4
+threadpoolctl @ file:///tmp/tmp79xdzxkt/threadpoolctl-2.1.0-py3-none-any.whl
+thrift==0.11.0
+toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1589469402899/work
+toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1600973991856/work
+torch==1.6.0
+torchsummaryX==1.3.0
+torchvision==0.7.0
+tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1602488893411/work
+tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1602171507552/work
+traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1602771532708/work
+typed-ast==1.4.1
+typing-extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1602702424206/work
+ucimlr @ git+https://github.com/isacarnekvist/ucimlr@329ed0586effeb2d57f179f3abb0da9862feed01
+unlzw==0.1.1
+uptide==1.0
+urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1595434816409/work
+visions @ file:///home/conda/feedstock_root/build_artifacts/visions_1597645571032/work
+wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1600965781394/work
+webencodings==0.5.1
+Werkzeug==1.0.1
+widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1602769155190/work
+xarray @ file:///home/conda/feedstock_root/build_artifacts/xarray_1600638299066/work
+xlrd==1.2.0
+yapf @ file:///home/conda/feedstock_root/build_artifacts/yapf_1595950469082/work
+yarl @ file:///home/conda/feedstock_root/build_artifacts/yarl_1602671471836/work
+zict==2.0.0
+zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1602852756910/work
@@ -0,0 +1,309 @@
+from typing import List, Tuple
+from torchvision.datasets.utils import download_url, extract_archive, download_and_extract_archive
+import os
+from tqdm.auto import tqdm
+from pathlib import Path
+from sklearn_pandas import DataFrameMapper
+import xarray as xr
+import pandas as pd
+import numpy as np
+
+from .dataset import Seq2SeqDataSet
+from .util import normalize_encode_dataframe, timeseries_split
+from .tidal import generate_tidal_periods
+
+
+class RegressionForecastData:   
+    columns_forecast = None # The input colums which can be included in future (e.g. week or weather forecast)
+    columns_target = None # Target columns
+    
+    def __init__(self, datasets_root):        
+        self.datasets_root = datasets_root
+        
+        # Process data
+        self.df = self.download()        
+        self.df_norm, self.scaler = self.normalize(self.df)
+        self.output_scaler = next(filter(lambda r:r[0][0] in self.columns_target, self.scaler.features))[-1]
+        self.df_train, self.df_test = self.split(self.df_norm)
+        
+        # Check processing
+        self.check()
+    
+    def download(self) -> pd.DataFrame:
+        """Implement this method to download data and return raw df"""
+        raise NotImplementedError()
+        return df
+    
+    def normalize(self, df) -> Tuple[pd.DataFrame, DataFrameMapper]:
+        df_norm, scaler = normalize_encode_dataframe(df)
+        return df_norm, scaler
+    
+    def split(self, df_norm: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        df_train, df_test = timeseries_split(df_norm)
+        return df_train, df_test 
+    
+    def check(self) -> None:
+        """Check the resulting dataframe"""
+        assert isinstance(self.df.index, pd.DatetimeIndex), 'index must be datetime'
+        assert self.df.index.freq is not None, 'df must have freq'        
+        assert self.columns_forecast is not None
+        assert self.columns_target is not None
+        assert ~set(self.columns_target).issubset(set(self.columns_forecast)), 'target columns should not be in forecast'
+        assert set(self.columns_forecast).issubset(set(self.df.columns)), 'columns_forecast must be in df'
+        assert set(self.columns_target).issubset(set(self.df.columns)), 'columns_target must be in df'
+        
+    def to_datasets(self, window_past: int, window_future: int, valid:bool=False) -> Tuple[Seq2SeqDataSet, Seq2SeqDataSet]:
+        """Convert to torch datasets"""
+        ds_train = Seq2SeqDataSet(df_train, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
+        ds_test = Seq2SeqDataSet(df_test, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
+        return ds_train, ds_test
+    
+    def __repr__(self):
+        return f'<{type(self).__name__} {self.df.shape if (self.df is not None) else None}>'
+
+class GasSensor(RegressionForecastData):
+    """
+    See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation
+    """
+    
+    columns_target = ['R1 (MOhm)']
+    columns_forecast = ['Flow rate (mL/min)', 'Heater voltage (V)']
+    
+    def download(self):
+        url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00487/gas-sensor-array-temperature-modulation.zip'
+        
+        # download if needed
+        extract_path = self.datasets_root/'GasSensor'
+        files = sorted(extract_path.glob('*.csv'))
+        if len(files)<13:
+            print('download_and_extract_archive')
+            download_and_extract_archive(url, self.datasets_root, extract_path)
+        
+        # Load csv's
+        files = sorted(extract_path.glob('*.csv'))
+        dfs = []
+        for f in files:
+            now = pd.to_datetime(f.stem, format='%Y%m%d_%H%M%S')
+            df = pd.read_csv(f)
+            df.index = pd.to_timedelta(df['Time (s)'], unit='s') + now
+            dfs.append(df)
+        self.df = pd.concat(dfs).dropna(subset=self.columns_target)
+
+        df = df[[ 'CO (ppm)', 'Humidity (%r.h.)', 'Temperature (C)',
+               'Flow rate (mL/min)', 'Heater voltage (V)', 'R1 (MOhm)']]
+        df = df.resample('0.3S').first()
+        
+        return df
+
+
+class MetroInterstateTraffic(RegressionForecastData):
+    """
+    See: https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume
+    """
+    
+    columns_target = ['traffic_volume']
+    columns_forecast = ['holiday', 'month', 'day', 'week', 'hour',
+       'minute', 'dayofweek']
+    
+    def download(self):
+        url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00492/Metro_Interstate_Traffic_Volume.csv.gz'
+        
+        # download if needed
+        filename = '00492_Metro_Interstate_Traffic_Volume.csv.gz'
+        local_path = self.datasets_root/filename
+        if not local_path.exists():
+            download_url(url, self.datasets_root, filename)
+        df = (pd.read_csv(local_path, index_col='date_time', parse_dates=['date_time'])
+              .dropna(subset=self.columns_target)
+              .resample('1H').first()
+             )
+        
+        # Make holiday a bool
+        df['holiday'] = ~df['holiday'].isna()
+        df['weather_main'] = df['weather_main'].fillna('none')
+        df['weather_description'] = df['weather_description'].fillna('none')
+        
+        # Add time features 
+        time = df.index.to_series()
+        df["month"] = time.dt.month
+        df['day'] = time.dt.day
+        df['week'] = time.dt.isocalendar().week
+        df['hour'] = time.dt.hour
+        df['minute'] = time.dt.minute
+        df['dayofweek'] = time.dt.dayofweek
+        
+        return df
+
+class AppliancesEnergyPrediction(RegressionForecastData):
+    """
+    See: https://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
+    """
+    
+    columns_target = ['log_Appliances']
+    columns_forecast = ['month', 'day', 'week', 'hour',
+       'minute', 'dayofweek']
+    
+    def download(self):
+        url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv'
+        
+        # download if needed
+        filename = '00374_AppliancesEnergyPrediction.csv'
+        local_path = self.datasets_root/filename
+        if not local_path.exists():
+            download_url(url, self.datasets_root, filename)
+        df = pd.read_csv(local_path, index_col='date', parse_dates=['date'])
+        
+        # log target
+        df['log_Appliances'] = np.log(df['Appliances'] + 1e-5)
+        df = df.drop(columns=['Appliances'])
+        df = df.dropna(subset=self.columns_target).resample('10T').first()
+        
+        # Add time features 
+        time = df.index.to_series()
+        df["month"] = time.dt.month
+        df['day'] = time.dt.day
+        df['week'] = time.dt.isocalendar().week
+        df['hour'] = time.dt.hour
+        df['minute'] = time.dt.minute
+        df['dayofweek'] = time.dt.dayofweek
+        
+        return df
+
+class BejingPM25(RegressionForecastData):
+    """
+    See: http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
+    """
+    
+    columns_target = ['log_pm2.5']
+    columns_forecast = ['month', 'day', 'week', 'hour',
+       'minute', 'dayofweek']
+    
+    def download(self):
+        url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00381/PRSA_data_2010.1.1-2014.12.31.csv'
+        
+        # download if needed
+        filename = '00381_BejingPM25.csv'
+        local_path = self.datasets_root/filename
+        if not local_path.exists():
+            download_url(url, self.datasets_root, filename)
+        df = pd.read_csv(local_path)
+        df.index = pd.to_datetime(df[['year', 'month', 'day', 'hour']]).dt.tz_localize('Asia/Shanghai')
+        df = df.drop(columns=['year', 'month', 'day', 'hour', 'No'])
+        
+        # log target
+        df['log_pm2.5'] = np.log(df['pm2.5'] + 1e-5)
+        df = df.drop(columns=['pm2.5'])
+        
+        df.dropna(subset=self.columns_target, inplace=True)
+        df = df.resample('1H').first()
+        
+        df['cbwd'] = df['cbwd'].fillna('none')
+        
+        
+        
+        # Add time features 
+        time = df.index.to_series()
+        df["month"] = time.dt.month
+        df['day'] = time.dt.day
+        df['week'] = time.dt.isocalendar().week
+        df['hour'] = time.dt.hour
+        df['minute'] = time.dt.minute
+        df['dayofweek'] = time.dt.dayofweek
+        
+#         df['log_pm2.5'] = np.log(df['pm2.5']+1e-5)
+        
+        return df
+
+def get_current_timeseries(
+        cache_folder=Path("../data/raw/IMOS_ANMN/"),
+        outfile=Path(
+            '../data/processed/currents/MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc'
+        )):
+    """
+    Download Current data from the IMOS and pre-process.
+    """
+    if not outfile.exists():
+
+        files = [
+            "IMOS_ANMN-WA_AETVZ_20090715T080000Z_WATR20_FV01_WATR20-0907-Continental-194_END-20090716T181317Z_C-20191122T052830Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20100409T080000Z_WATR20_FV01_WATR20-1004-Continental-194_END-20100430T084500Z_C-20191122T053845Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20101222T080000Z_WATR20_FV01_WATR20-1012-Continental-194_END-20110518T051500Z_C-20200916T020035Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20110608T080000Z_WATR20_FV01_WATR20-1106-Continental-194_END-20111122T035000Z_C-20200916T025619Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20111221T060300Z_WATR20_FV01_WATR20-1112-Continental-194_END-20120704T050500Z_C-20200916T043212Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20120726T044000Z_WATR20_FV01_WATR20-1207-Continental-194_END-20130204T044000Z_C-20200916T032027Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20130221T080000Z_WATR20_FV01_WATR20-1302-Continental-194_END-20131003T035000Z_C-20180529T020609Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20131111T080000Z_WATR20_FV01_WATR20-1311-Continental-194_END-20140519T035000Z_C-20200114T033335Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20140710T080000Z_WATR20_FV01_WATR20-1407-Continental-194_END-20150121T021500Z_C-20180529T055902Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20150213T080000Z_WATR20_FV01_WATR20-1502-Continental-194_END-20150424T134002Z_C-20200114T035347Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20150914T080000Z_WATR20_FV01_WATR20-1509-Continental-194_END-20160331T043000Z_C-20180601T013623Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20160427T080000Z_WATR20_FV01_WATR20-1604-Continental-194_END-20160531T021800Z_C-20180531T071709Z.nc",
+            #     "IMOS_ANMN-WA_AETVZ_20170512T080000Z_WATR20_FV01_WATR20-1705-Continental-194_END-20170717T014558Z_C-20190805T004647Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20171204T080000Z_WATR20_FV01_WATR20-1712-Continental-194_END-20180618T030000Z_C-20180620T233149Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20180802T080000Z_WATR20_FV01_WATR20-1807-Continental-194_END-20190225T054500Z_C-20190227T001343Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20190307T080000Z_WATR20_FV01_WATR20-1903-Continental-194_END-20190911T003144Z_C-20200114T045053Z.nc",
+            "IMOS_ANMN-WA_AETVZ_20190926T080000Z_WATR20_FV01_WATR20-1909-Continental-194_END-20200326T030000Z_C-20200420T064334Z.nc",
+        ]
+        base = "http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/"
+
+        # Download files
+        [download_url(base + f, cache_folder) for f in files]
+
+        # load and merge
+        xds = [xr.open_dataset(cache_folder / f) for f in files]
+        vars = [
+            'VCUR', 'UCUR', 'WCUR', 'TEMP', 'PRES_REL', 'DEPTH', 'ROLL',
+            'PITCH'
+        ]
+        xds2 = [x[vars].isel(HEIGHT_ABOVE_SENSOR=18) for x in xds]
+        xd = xr.concat(xds2, dim='TIME')
+        xd = xd.where(xd.DEPTH > 150)  # remove outliers
+
+        xd['TIME'] = xd['TIME'].dt.round('10T')
+        xd = xd.dropna(dim='TIME', subset=['VCUR', 'UCUR', 'WCUR'])
+
+        # Generate tidal freqs
+        t = xd.TIME.to_series()
+        df_eta = generate_tidal_periods(t)
+
+        # Add tidal freqs
+        xd = xd.merge(df_eta)
+
+        # Cache to nc
+        xd.to_netcdf(outfile)
+        print(
+            f'wrote "{outfile}" with size {outfile.stat().st_size*1e-6:2.2f} MB'
+        )
+    return outfile
+
+
+class IMOSCurrentsVel(RegressionForecastData):
+    """
+    
+    Current Speed at ANMN Two Rocks, WA, 204m mooring
+    
+    see:
+    - http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/
+    from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c
+    and http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html
+    And https://en.wikipedia.org/wiki/Theory_of_tides
+    """
+
+    columns_target = ['SPD']
+    columns_forecast = [
+        'M2', 'S2', 'N2', 'K2', 'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4',
+        'MK3', 'MM', 'SSA', 'SA'
+    ]
+
+    def download(self):
+        outfile = self.datasets_root / 'MOS_ANMN-WA_AETVZ_WATR20_FV01_WATR20-1909-Continental-194_currents.nc'
+        get_current_timeseries(outfile=outfile)
+
+        # made in previous notebook
+        xd = xr.load_dataset(outfile)
+        df = xd.to_dataframe().drop(
+            columns=['HEIGHT_ABOVE_SENSOR', 'NOMINAL_DEPTH'])
+        df['SPD'] = np.sqrt(df.VCUR**2 + df.UCUR**2)
+        df.dropna(subset=self.columns_target, inplace=True)
+        df = df.resample('30T').first()
+
+        return df
@@ -20,11 +20,11 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
    Returns x_past, y_past, x_future, etc.
    """
    
-    def __init__(self, df: pd.DataFrame, window_past=40, window_future=10, columns_target=['energy(kWh/hh)'], columns_blank=[],):
+    def __init__(self, df: pd.DataFrame, window_past=40, window_future=10, columns_target=['energy(kWh/hh)'], columns_past=[],):
        """
        Args:
        - df: DataFrame with time index, already scaled
-        - columns_blank: The columns we will blank, in the future
+        - columns_past: The columns we will blank, in the future
        """
        super().__init__()
        assert isinstance(df.index, pd.DatetimeIndex), 'should have a datetime index'
@@ -38,7 +38,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
        self.columns_target = columns_target

        # For speed
-        self._icol_blank = [df.drop(columns = columns_target).columns.tolist().index(n) for n in columns_blank]
+        self._icol_blank = [df.drop(columns = columns_target).columns.tolist().index(n) for n in columns_past]
        self._x = self.df.drop(columns = self.columns_target).values
        self._y = self.df[columns_target].values

@@ -64,6 +64,8 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):

        # Stop it cheating by using future weather measurements. Fill in with last value
        x_future[:, self._icol_blank] = x_past[0, self._icol_blank]
+
+        # x_future[:, self._icol_blank] = 0
        return x_past, y_past, x_future, y_future


@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-import click
-import logging
-from pathlib import Path
-from dotenv import find_dotenv, load_dotenv
-
-
-@click.command()
-@click.argument('input_filepath', type=click.Path(exists=True))
-@click.argument('output_filepath', type=click.Path())
-def main(input_filepath, output_filepath):
-    """ Runs data processing scripts to turn raw data from (../raw) into
-        cleaned data ready to be analyzed (saved in ../processed).
-    """
-    logger = logging.getLogger(__name__)
-    logger.info('making final data set from raw data')
-
-
-if __name__ == '__main__':
-    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    logging.basicConfig(level=logging.INFO, format=log_fmt)
-
-    # not used in this stub but often useful for finding various files
-    project_dir = Path(__file__).resolve().parents[2]
-
-    # find .env automagically by walking up directories until it's found, then
-    # load up the .env entries as environment variables
-    load_dotenv(find_dotenv())
-
-    main()
@@ -0,0 +1,43 @@
+import uptide
+import pandas as pd
+
+# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis
+default_tidal_constituents = [
+    'M2',
+    'S2',
+    'N2',
+    'K2',  # Semi-diurnal
+    'K1',
+    'O1',
+    'P1',
+    'Q1',  # Diurnal
+    'M4',
+    'M6',
+    'S4',
+    'MK3',  # Short period
+    'MM',
+    'SSA',
+    'SA'  # Long period
+]
+
+
+def generate_tidal_periods(t: pd.Series,
+                           constituents: list = default_tidal_constituents):
+    tide = uptide.Tides(constituents)
+    t0 = t[0]
+    td = t - t0
+    td = td.dt.total_seconds().to_numpy().astype(int)
+    tide.set_initial_time(t0)
+
+    # calc tides
+    amplitudes = np.ones_like(td)
+    phases = np.zeros_like(td)
+    eta = {}
+    for name, f, amplitude, omega, phase, phi, u in zip(
+            tide.constituents, tide.f, amplitudes, tide.omega, phases,
+            tide.phi, tide.u):
+        eta[name] = f * amplitude * np.cos(omega * td - phase + phi + u)
+    df_eta = pd.DataFrame(eta, index=t)
+    return df_eta
+
+
@@ -0,0 +1,19 @@
+import sklearn
+from sklearn.preprocessing import StandardScaler, OrdinalEncoder
+from sklearn_pandas import DataFrameMapper
+
+def normalize_encode_dataframe(df, encoder=OrdinalEncoder):
+    """Normalise numeric data, encode categorical data."""
+    columns_input_numeric = list(df._get_numeric_data().columns)
+    columns_categorical = list(set(df.columns)-set(columns_input_numeric))
+    
+    transformers= [([n], StandardScaler()) for n in columns_input_numeric] + \
+                  [([n], encoder()) for n in columns_categorical]
+    scaler = DataFrameMapper(transformers, df_out=True)
+    df_norm = scaler.fit_transform(df)
+    return df_norm, scaler
+    
+def timeseries_split(df, test_fraction=0.2):
+    """Split timeseries data with test in the future"""
+    i = int(len(df)*test_fraction)
+    return df.iloc[:i], df.iloc[i:]